In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *


## **Data Reading**

In [0]:
df = spark.read.format("parquet").load("abfss://bronze@sadec25etlproject.dfs.core.windows.net/product")

In [0]:
df.display()

In [0]:
df = df.drop("_rescued_data")

df.display()

In [0]:
# creating a view called products and using it as a table for the below function example in sql way
df.createOrReplaceTempView("products")


## **Functions**

In [0]:
%sql
CREATE OR REPLACE FUNCTION dec25etlproject.bronze.discount_func(p_price DOUBLE)
RETURNS DOUBLE
LANGUAGE SQL 
RETURN p_price * 0.90

In [0]:
%sql
SELECT product_id, price, dec25etlproject.bronze.discount_func(price) as discounted_price FROM products

In [0]:
# we can use function in pyspark using expr without need of a view

df = df.withColumn("discounted_price", expr("dec25etlproject.bronze.discount_func(price)"))
df.display()

In [0]:
%skip
%sql
-- how to create a function using python
CREATE OR REPLACE FUNCTION dec25etlproject.bronze.upper_func(p_brand STRING)
RETURN STRING
LANGUAGE PYTHON
AS
$$
  return p_brand.upper()
$$

-- we can create this python function but got error at language , may be scala version issue, so skip this cell

## **Data Writing**

In [0]:
df.write.format("delta")\
    .mode("overwrite")\
    .option("path", "abfss://silver@sadec25etlproject.dfs.core.windows.net/products")\
    .save()
# instead of append using overwrite, because silverlayer is used as a transient layer don't need to store the data

In [0]:
%sql
CREATE TABLE IF NOT EXISTS dec25etlproject.silver.products_silver
USING DELTA
LOCATION 'abfss://silver@sadec25etlproject.dfs.core.windows.net/products'

In [0]:
%sql
SELECT * FROM dec25etlproject.silver.products_silver;