In [0]:
%run "/Users/ovidiumtoma@gmail.com/wind_turbine_project/src/wt_transformations"

In [0]:
data_transformer = DataTransformer(spark)

# Load silver data
input_df = spark.table("silver_data.wind_turbine_silver")

# Transform pipeline:
df = (input_df
        .transform(data_transformer.compute_expected_power)
        .transform(data_transformer.detect_zscore_anomalies)       # standard dev
        .transform(data_transformer.detect_record_anomalies)       # isolation forest
        .transform(data_transformer.combine_anomalies)             # OR logic
        )

# Detect turbine-level anomalies and save
turbine_anomaly_df = data_transformer.detect_turbine_anomalies(df)
data_transformer.save_turbine_analysis(turbine_anomaly_df, "gold_turbine_stats")

# Smart filter
df_filtered = data_transformer.apply_smart_filtering(df, turbine_anomaly_df)

# Save turbine analysis to Gold
data_transformer.save_turbine_analysis(df_filtered, "gold_turbine_analysis")

# Summary Statistics
df_summary = data_transformer.calculate_summary_statistics(df_filtered)

# Save summary statistics to Gold
data_transformer.save_summary_table(df_summary, "gold_turbine_summary")

print("Data processing pipeline completed successfully!")

2025-02-21 22:48:28 - INFO - DataTransformer initialized.


INFO:__main__:DataTransformer initialized.


2025-02-21 22:48:29 - INFO - Computing expected power output...


INFO:__main__:Computing expected power output...


2025-02-21 22:48:30 - INFO - Expected power computation complete.


INFO:__main__:Expected power computation complete.


2025-02-21 22:48:30 - INFO - Detecting anomalies using Isolation Forest...


INFO:__main__:Detecting anomalies using Isolation Forest...


2025-02-21 22:48:41 - INFO - Anomaly detection complete.


INFO:__main__:Anomaly detection complete.


2025-02-21 22:48:41 - INFO - Detecting turbines with high anomaly rates...


INFO:__main__:Detecting turbines with high anomaly rates...


2025-02-21 22:48:42 - INFO - Turbine anomaly detection complete.


INFO:__main__:Turbine anomaly detection complete.


2025-02-21 22:48:42 - INFO - Saving results to gold_data.gold_turbine_anomalies


INFO:__main__:Saving results to gold_data.gold_turbine_anomalies


2025-02-21 22:48:48 - INFO - Successfully saved to gold_data.gold_turbine_anomalies


INFO:__main__:Successfully saved to gold_data.gold_turbine_anomalies


2025-02-21 22:48:48 - INFO - Applying smart filtering...


INFO:__main__:Applying smart filtering...


2025-02-21 22:48:49 - INFO - Smart filtering complete.


INFO:__main__:Smart filtering complete.


2025-02-21 22:48:49 - INFO - Saving results to gold_data.gold_turbine_analysis


INFO:__main__:Saving results to gold_data.gold_turbine_analysis


2025-02-21 22:49:01 - INFO - Successfully saved to gold_data.gold_turbine_analysis


INFO:__main__:Successfully saved to gold_data.gold_turbine_analysis


Data transformation completed successfully!
