In [0]:
%sql
use catalog ecommerce;
use schema raw_data;

CREATE VOLUME raw_data_vol
COMMENT 'Raw CSV files for ecommerce project';

In [0]:
from pyspark.sql.types  import StructType , StructField , StringType , IntegerType , DataType, TimestampType , FloatType
import pyspark.sql.functions as F

brand_schema = StructType([
    StructField("brand_code",StringType(),False),
    StructField("brand_name",StringType(),False),
    StructField("category_code",StringType(),False)
])

file_location = "/Volumes/ecommerce/raw_data/raw_data_vol/brands/*.csv"
df = spark.read.option('header',"true").format("csv").schema(brand_schema).load(file_location)
df = df.withColumn("ingested_at",F.current_timestamp()).withColumn("id",F.uuid())

df.write.format("delta").mode("overwrite").saveAsTable("ecommerce.bronze.bronze_base_table")

In [0]:
from pyspark.sql.types  import StructType , StructField , StringType , IntegerType , DataType, TimestampType , FloatType
import pyspark.sql.functions as F

brand_schema = StructType([
    StructField("category_name",StringType(),False),
    StructField("category_code",StringType(),False)
])

file_location = "/Volumes/ecommerce/raw_data/raw_data_vol/category/*.csv"
df = spark.read.option('header',"true").format("csv").schema(brand_schema).load(file_location)
df = df.withColumn("ingested_at",F.current_timestamp()).withColumn("id",F.uuid())

df.write.format("delta").mode("overwrite").saveAsTable("ecommerce.bronze.category_base_table")

In [0]:
from pyspark.sql.types  import StructType , StructField , StringType , IntegerType , DataType, TimestampType , FloatType
import pyspark.sql.functions as F

brand_schema = StructType([
    StructField("customer_id",StringType(),False),
    StructField("phone",StringType(),True),
    StructField("country_code",StringType(),False),
    StructField("country",StringType(),True),
    StructField("state",StringType(),True),
])

file_location = "/Volumes/ecommerce/raw_data/raw_data_vol/customers/*.csv"
df = spark.read.option('header',"true").format("csv").schema(brand_schema).load(file_location)
df = df.withColumn("ingested_at",F.current_timestamp()).withColumn("id",F.uuid())

df.write.format("delta").mode("overwrite").saveAsTable("ecommerce.bronze.customer_base_table")

In [0]:
from pyspark.sql.types  import StructType , StructField , StringType , IntegerType , DataType, TimestampType , FloatType
import pyspark.sql.functions as F

brand_schema = StructType([
    StructField("date",StringType(),False),
    StructField("year",IntegerType(),False),
    StructField("day_name",StringType(),False),
    StructField("quarter",IntegerType(),False),
    StructField("week_of_year",IntegerType(),False),
])

file_location = "/Volumes/ecommerce/raw_data/raw_data_vol/date/*.csv"
df = spark.read.option('header',"true").format("csv").schema(brand_schema).load(file_location)
df = df.withColumn("ingested_at",F.current_timestamp()).withColumn("id",F.uuid())

df.write.format("delta").mode("overwrite").saveAsTable("ecommerce.bronze.date_base_table")

In [0]:
from pyspark.sql.types  import StructType , StructField , StringType , IntegerType , DataType, TimestampType , FloatType
import pyspark.sql.functions as F

brand_schema = StructType([
    StructField("product_id",StringType(),False),
    StructField("sku",StringType(),False),
    StructField("category_code",StringType(),False),
    StructField("brand_code",StringType(),False),
    StructField("color",StringType(),False),
    StructField("size",StringType(),False),
    StructField("material",StringType(),False),
    StructField("weight_grams",StringType(),False),
    StructField("length_cm",StringType(),False),
    StructField("width_cm",FloatType(),False),
    StructField("height_cm",FloatType(),False),
    StructField("rating_count",IntegerType(),False)
])

file_location = "/Volumes/ecommerce/raw_data/raw_data_vol/products/*.csv"
df = spark.read.option('header',"true").format("csv").schema(brand_schema).load(file_location)
df = df.withColumn("ingested_at",F.current_timestamp()).withColumn("id",F.uuid())

df.write.format("delta").mode("overwrite").saveAsTable("ecommerce.bronze.product_base_table")