In [0]:
configs = {
  "fs.azure.account.auth.type": "OAuth",
  "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
  "fs.azure.account.oauth2.client.id": "client-id",
  "fs.azure.account.oauth2.client.secret": "secret-key",
  "fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/tenant-id/oauth2/token"
}

mount_point = "/mnt/ecommerce"
if any(mount.mountPoint == mount_point for mount in dbutils.fs.mounts()):
    print(f"{mount_point} is already mounted")
else:
    dbutils.fs.mount(
        source="abfss://landing-zone-2@ecommercemedal.dfs.core.windows.net/",
        mount_point=mount_point,
        extra_configs=configs
    )


/mnt/ecommerce is already mounted


In [0]:
%fs ls "mnt/ecommerce"

path,name,size,modificationTime
dbfs:/mnt/ecommerce/buyers-raw-2/,buyers-raw-2/,0,1741745778000
dbfs:/mnt/ecommerce/countries-raw-2/,countries-raw-2/,0,1741745753000
dbfs:/mnt/ecommerce/sellers-raw-2/,sellers-raw-2/,0,1741745764000
dbfs:/mnt/ecommerce/users-raw-2/,users-raw-2/,0,1741745718000


In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
spark = SparkSession.builder.appName("BronzeLayer").getOrCreate()

In [0]:
spark

## User

In [0]:
userDF = spark.read.format("parquet").option("header", "true").option("inferSchema", "true").load("/mnt/ecommerce/users-raw-2")

In [0]:
userDF.show(5)
userDF.printSchema()

+--------------------+----+----------+--------+-----------------+---------------+-------------------+--------------+------------+----------------+--------------+--------------+------+----------------+-------------+---------+-------------+---------+-----------------+------------------+---------+-----------------+----------------+-----------+
|      identifierHash|type|   country|language|socialNbFollowers|socialNbFollows|socialProductsLiked|productsListed|productsSold|productsPassRate|productsWished|productsBought|gender|civilityGenderId|civilityTitle|hasAnyApp|hasAndroidApp|hasIosApp|hasProfilePicture|daysSinceLastLogin|seniority|seniorityAsMonths|seniorityAsYears|countryCode|
+--------------------+----+----------+--------+-----------------+---------------+-------------------+--------------+------------+----------------+--------------+--------------+------+----------------+-------------+---------+-------------+---------+-----------------+------------------+---------+-----------------+-

In [0]:
userDF.write.format("delta").mode("overwrite").save("/mnt/delta/tables/bronze/users")

## Country

In [0]:
countryDF = spark.read.format("parquet").option("header", "true").option("inferSchema", "true").load("/mnt/ecommerce/countries-raw-2")

In [0]:
countryDF.show(5)
countryDF.printSchema()

+---------+-------+----------+--------------+------------------+---------------------+-------------+-----------+----------------+--------------+----------------+-------------+--------------------+-----------------+----------------------+-------------------+-------------------+---------------------+--------------------+--------------------+---------------+------------------+-------------+-------------+----------------+----------------+
|  country|sellers|topsellers|topsellerratio|femalesellersratio|topfemalesellersratio|femalesellers|malesellers|topfemalesellers|topmalesellers|countrysoldratio|bestsoldratio|toptotalproductssold|totalproductssold|toptotalproductslisted|totalproductslisted|topmeanproductssold|topmeanproductslisted|    meanproductssold|  meanproductslisted|meanofflinedays|topmeanofflinedays|meanfollowers|meanfollowing|topmeanfollowers|topmeanfollowing|
+---------+-------+----------+--------------+------------------+---------------------+-------------+-----------+----------

In [0]:
countryDF.write.format("delta").mode("overwrite").save("/mnt/delta/tables/bronze/countries")

## Sellers

In [0]:
sellerDF = spark.read.format("parquet").option("header", "true").option("inferSchema", "true").load("/mnt/ecommerce/sellers-raw-2")

In [0]:
sellerDF.show(5)
sellerDF.printSchema()

+---------+------+---------+----------------+------------------+------------------+-----------------+-------------------+------------------+------------------+-----------------+-----------+-----------+------------------+-------------+-----------+-----------------+-----------------+------------------+
|  country|   sex|nbsellers|meanproductssold|meanproductslisted|meansellerpassrate|totalproductssold|totalproductslisted|meanproductsbought|meanproductswished|meanproductsliked|totalbought|totalwished|totalproductsliked|meanfollowers|meanfollows|percentofappusers|percentofiosusers|     meanseniority|
+---------+------+---------+----------------+------------------+------------------+-----------------+-------------------+------------------+------------------+-----------------+-----------+-----------+------------------+-------------+-----------+-----------------+-----------------+------------------+
|Allemagne|Female|      116|            4.03|              2.72|             27.33|           

In [0]:
sellerDF.write.format("delta").mode("overwrite").save("/mnt/delta/tables/bronze/sellers")

## Buyers

In [0]:
buyerDF = spark.read.format("parquet").option("header", "true").option("inferSchema", "true").load("/mnt/ecommerce/buyers-raw-2")

In [0]:
buyerDF.show(5)
buyerDF.printSchema()

+-----------+------+---------+-------------+------------+----------+---------------+-------------+-----------------+--------------------+----------------------+------------------+-------------------------+---------------------+-------------------+-------------------+------------------+----------------------+----------------------+---------------------+------------------+------------------+-----------------+---------------------+---------------------+--------------------+---------------+------------------+-------------+-------------+----------------+----------------+
|    country|buyers|topbuyers|topbuyerratio|femalebuyers|malebuyers|topfemalebuyers|topmalebuyers|femalebuyersratio|topfemalebuyersratio|boughtperwishlistratio|boughtperlikeratio|topboughtperwishlistratio|topboughtperlikeratio|totalproductsbought|totalproductswished|totalproductsliked|toptotalproductsbought|toptotalproductswished|toptotalproductsliked|meanproductsbought|meanproductswished|meanproductsliked|topmeanproductsbo

In [0]:
buyerDF.write.format("delta").mode("overwrite").save("/mnt/delta/tables/bronze/buyers")