In [0]:
train_df = spark.read.format('delta')\
                    .option('inferSchema',True)\
                    .option('header',True)\
                    .load('dbfs:/mnt/walmartsilver/Train Silver/')
test_df = spark.read.format('delta')\
                    .option('inferSchema',True)\
                    .option('header',True)\
                    .load('dbfs:/mnt/walmartsilver/Test Silver/')
features_df = spark.read.format('delta')\
                    .option('inferSchema',True)\
                    .option('header',True)\
                    .load('dbfs:/mnt/walmartsilver/Features Silver/')
stores_df = spark.read.format('delta')\
                    .option('inferSchema',True)\
                    .option('header',True)\
                    .load('dbfs:/mnt/walmartsilver/Stores Silver/')

In [0]:
from pyspark.sql.functions import col

# Step 1: Join Train Dataset with Stores Dataset on "Store"
train_stores_df = train_df.join(stores_df, on="Store", how="left")

# Step 2: Join Train Dataset with Features Dataset on "Store", "Date", and "IsHoliday"
final_train_df = train_stores_df.join(
    features_df, 
    on=["Store", "Date", "IsHoliday"], 
    how="left"
)

# -------------------------
# Joining Test Dataset
# -------------------------

# Step 1: Join Test Dataset with Stores Dataset on "Store"
test_stores_df = test_df.join(stores_df, on="Store", how="left")

# Step 2: Join Test Dataset with Features Dataset on "Store", "Date", and "IsHoliday"
final_test_df = test_stores_df.join(
    features_df, 
    on=["Store", "Date", "IsHoliday"], 
    how="left"
)



In [0]:
final_train_df.printSchema()

In [0]:
final_test_df.printSchema()

In [0]:
dbutils.fs.ls("dbfs:/mnt/walmartsilver/Final Silver/")

In [0]:
base_path = "/mnt/walmartsilver/Final Silver/"

# Function to overwrite Delta files cleanly
def write_clean_delta(df, folder_name):
    path = f"{base_path}{folder_name}"
    # Remove the contents of the directory to ensure a clean overwrite
    files = dbutils.fs.ls(path)
    for file in files:
        dbutils.fs.rm(file.path, True)
    # Write the DataFrame in Delta format
    df.write.format("delta").mode("overwrite").save(path)

# Write each DataFrame to its respective folder
write_clean_delta(final_train_df, "Train Silver")
write_clean_delta(final_test_df, "Test Silver")