# Data Analytics Module 3 - Delta Lake

### Loading CSV file into dbfs (Databricks File System)

In [None]:
%sh
rm -r /dbfs/delta_lab
mkdir /dbfs/delta_lab
wget -O /dbfs/delta_lab/products.csv https://raw.githubusercontent.com/kuljotSB/DatabricksUdemyCourse/refs/heads/main/DataAnalytics/products.csv

### Loading data into a dataframe

In [None]:
df = spark.read.load('/delta_lab/products.csv', format='csv', header=True)
display(df.limit(10))

### Load the data into a delta table

#### Storing in DBFS (Databricks File System)

In [None]:
delta_table_path = "/delta/products-delta" 
df.write.format("delta").save(delta_table_path)

### Manipulating the Delta Table by creating a DeltaTable Object

In [None]:
from delta.tables import *
from pyspark.sql.functions import *

# Create a deltaTable object
deltaTable = DeltaTable.forPath(spark, delta_table_path)
# Update the table (reduce price of product 771 by 10%)
deltaTable.update(
   condition = "ProductID == 771",
   set = { "ListPrice": "ListPrice * 0.9" })
# View the updated data as a dataframe
deltaTable.toDF().show(10)

### Creating a dataframe from the delta dataset

In [None]:
new_df = spark.read.format("delta").load(delta_table_path)
new_df.show(10)

### Explore Logging for the delta table

In [None]:
deltaTable.history(10).show(10, False, True)

### Creating a Data Catalog Table

In [None]:
df.write.format("delta").saveAsTable("default.ProductsManaged")


### Accessing the Data Catalog Table

In [None]:
%sql
USE default;
SELECT * FROM ProductsManaged;