In [0]:
import sys
import os

# Dynamically set up the project root & src path
project_root = os.getcwd()
src_path = os.path.join(project_root, "src")

if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Import the DataCleaner class
from src.wt_cleaning import DataCleaner  # Adjust if your file is named differently

# Read the Bronze tables
df_original_turbine_data = spark.read.table("hive_metastore.bronze_data.original_turbine_bronze")
df_new_turbine_data = spark.read.table("hive_metastore.bronze_data.new_turbine_bronze")

# Initialize DataCleaner
cleaner = DataCleaner(spark)

# Transform the new turbine dataset (scaling power, renaming, etc.)
df_new_turbine_transformed = cleaner.transform_turbine_data(df_new_turbine_data)

# Merge with original data & clean
df_merged = cleaner.merge_bronze_data(df_original_turbine_data, df_new_turbine_transformed)

# Save the merged/cleaned data to the Silver layer
cleaner.save_silver_table(df_merged, "wind_turbine_silver")

print("Cleaning & merging process completed successfully!")
