In [0]:
%run "/Users/ovidiumtoma@gmail.com/wind_turbine_project/src/wt_transformations"

In [0]:
# Initialize DataTransformer.
data_transformer = DataTransformer(spark)

# Load the Silver data.
input_df = spark.table("silver_data.wind_turbine_silver")

# Apply the transformation pipeline.
df = (input_df
      .transform(data_transformer.compute_expected_power)
      .transform(data_transformer.detect_zscore_anomalies)  # Standard deviation anomalies.
      .transform(data_transformer.detect_record_anomalies)  # Isolation Forest anomalies.
      .transform(data_transformer.combine_anomalies)        # Combine anomaly flags.
     )

# Detect turbine-level anomalies and save statistics.
turbine_anomaly_df = data_transformer.detect_turbine_anomalies(df)
data_transformer.save_turbine_analysis(turbine_anomaly_df, "gold_turbine_stats")

# Apply smart filtering based on turbine status.
df_filtered = data_transformer.apply_smart_filtering(df, turbine_anomaly_df)

# Save the filtered turbine analysis to the Gold layer.
data_transformer.save_turbine_analysis(df_filtered, "gold_turbine_analysis")

# Calculate summary statistics.
df_summary = data_transformer.calculate_summary_statistics(df_filtered)

# Save summary statistics to the Gold layer.
data_transformer.save_summary_table(df_summary, "gold_turbine_summary")

print("Data processing pipeline completed successfully!")
