In [23]:
import sys
import os
import pandas as pd

# Add the parent directory to sys.path to allow importing from 'src'
sys.path.append(os.path.abspath(os.path.join('..')))

from src.preprocessing import load_data, preprocess_data
from src.visualization import (plot_aqi_trend, plot_monthly_avg, 
                               plot_correlation, plot_seasonal_distribution, 
                               plot_forecast_results)
from src.model import train_model, generate_forecast

# Define paths
DATA_PATH = '../data/final_dataset.csv'
OUTPUT_DIR = '../outputs/figures/'

In [24]:
# Load Data
try:
    raw_df = load_data(DATA_PATH)
    print("Data loaded successfully.")
except FileNotFoundError:
    print(f"Error: Please ensure '{DATA_PATH}' exists.")

# Clean & Preprocess
df = preprocess_data(raw_df)
print(f"Data processed. Shape: {df.shape}")
df.head()

Data loaded successfully.
Data processed. Shape: (1461, 15)


Unnamed: 0,Date,Month,Year,Holidays_Count,Days,PM2.5,PM10,NO2,SO2,CO,Ozone,AQI,Datetime,Season,AQI_Category
0,1,1,2021,0,5,408.8,442.42,160.61,12.95,2.77,43.19,462,2021-01-01,Winter,Severe
1,2,1,2021,0,6,404.04,561.95,52.85,5.18,2.6,16.43,482,2021-01-02,Winter,Severe
2,3,1,2021,1,7,225.07,239.04,170.95,10.93,1.4,44.29,263,2021-01-03,Winter,Poor
3,4,1,2021,0,1,89.55,132.08,153.98,10.42,1.01,49.19,207,2021-01-04,Winter,Poor
4,5,1,2021,0,2,54.06,55.54,122.66,9.7,0.64,48.88,149,2021-01-05,Winter,Moderate


In [25]:
# Generate and Save Visualizations
print("Generating visualizations...")

plot_aqi_trend(df, OUTPUT_DIR)
plot_monthly_avg(df, OUTPUT_DIR)
plot_correlation(df, OUTPUT_DIR)
plot_seasonal_distribution(df, OUTPUT_DIR)

print(f"All plots saved to {OUTPUT_DIR}")

Generating visualizations...
Saved plot: ../outputs/figures/aqi_trend.png



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=monthly_aqi, x='Month', y='AQI', palette='Blues_d')


Saved plot: ../outputs/figures/monthly_aqi.png
Saved plot: ../outputs/figures/correlation_heatmap.png
Saved plot: ../outputs/figures/seasonal_aqi.png
All plots saved to ../outputs/figures/



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='Season', y='AQI', order=['Winter', 'Summer', 'Monsoon', 'Post-Monsoon'], palette='Set2')


In [26]:
# Generate and Save Visualizations
print("Generating visualizations...")

plot_aqi_trend(df, OUTPUT_DIR)
plot_monthly_avg(df, OUTPUT_DIR)
plot_correlation(df, OUTPUT_DIR)
plot_seasonal_distribution(df, OUTPUT_DIR)

print(f"All plots saved to {OUTPUT_DIR}")

Generating visualizations...
Saved plot: ../outputs/figures/aqi_trend.png



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=monthly_aqi, x='Month', y='AQI', palette='Blues_d')


Saved plot: ../outputs/figures/monthly_aqi.png
Saved plot: ../outputs/figures/correlation_heatmap.png
Saved plot: ../outputs/figures/seasonal_aqi.png
All plots saved to ../outputs/figures/



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='Season', y='AQI', order=['Winter', 'Summer', 'Monsoon', 'Post-Monsoon'], palette='Set2')


In [27]:
# Train Model
model, metrics, X_test, y_test, y_pred = train_model(df)

# Display Metrics
print("\nModel Performance on Test Set:")
print(f"RMSE: {metrics['RMSE']:.2f}")
print(f"MAE:  {metrics['MAE']:.2f}")
print(f"R2 Score: {metrics['R2']:.2f}")

Training Random Forest Model...

Model Performance on Test Set:
RMSE: 68.70
MAE:  53.91
R2 Score: 0.52


In [28]:
# Forecast for next 1 year
last_date = df['Datetime'].max()
forecast_df = generate_forecast(model, last_date, days=365)

print("Forecast generated for next 12 months.")
forecast_df.head()

Forecast generated for next 12 months.


Unnamed: 0,Datetime,Month,Year,Days,Holidays_Count,Predicted_AQI
0,2025-01-01,1,2025,3,0,281.28029
1,2025-01-02,1,2025,4,0,348.309286
2,2025-01-03,1,2025,5,0,267.296036
3,2025-01-04,1,2025,6,0,319.229905
4,2025-01-05,1,2025,7,0,338.586439


In [29]:
# Plot Forecast
plot_forecast_results(df, forecast_df, OUTPUT_DIR)
print("Forecast visualization saved.")

Saved plot: ../outputs/figures/aqi_forecast.png
Forecast visualization saved.


In [30]:
from src.report import generate_pdf_report

# Define paths
REPORT_PATH = '../outputs/reports/Delhi_AQI_Report.pdf'
FIGURES_DIR = '../outputs/figures/'

# Generate the report
generate_pdf_report(REPORT_PATH, FIGURES_DIR)

PDF Report generated successfully: ../outputs/reports/Delhi_AQI_Report.pdf
