In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from statsmodels.tsa.seasonal import seasonal_decompose


sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

In [2]:
path=r"C:\Users\biana\aqi-predictor-10pearls\pearls-aqi-predictor\data\raw_aqi.csv"
df=pd.read_csv(path)

In [3]:
df.head()

Unnamed: 0,timestamp,pm25,pm10,no2,so2,co,aqi
0,2026-01-09 00:00:00,56.9,59.0,29.1,13.6,663.0,85.35
1,2026-01-09 01:00:00,55.5,57.7,38.0,14.8,1008.0,83.25
2,2026-01-09 02:00:00,60.3,62.9,49.9,16.6,1582.0,90.45
3,2026-01-09 03:00:00,68.5,71.5,55.0,18.7,1948.0,102.75
4,2026-01-09 04:00:00,68.5,71.9,47.0,21.4,1877.0,102.75


In [4]:
def plot_aqi_trends(df):

    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)
    
    plt.figure(figsize=(15, 5))
    df['aqi'].plot(label='Hourly AQI', alpha=0.3)
    df['aqi'].rolling(window=24).mean().plot(label='24h Rolling Avg', linewidth=2)
    plt.title('AQI Temporal Trends')
    plt.legend()
    plt.show()

    
    result = seasonal_decompose(df['aqi'], model='additive', period=24)
    result.plot()
    plt.show()

In [5]:
def plot_correlation(df):
    plt.figure(figsize=(10, 8))
    # Select only numeric columns
    corr = df.select_dtypes(include=[np.number]).corr()
    mask = np.triu(np.ones_like(corr, dtype=bool))
    
    sns.heatmap(corr, mask=mask, annot=True, fmt=".2f", cmap='coolwarm', center=0)
    plt.title('Feature Correlation Matrix')
    plt.show()

In [6]:
def plot_distributions(df, pollutants=['pm2_5', 'pm10', 'no2', 'so2']):
    fig, axes = plt.subplots(1, len(pollutants), figsize=(20, 5))
    for i, col in enumerate(pollutants):
        sns.boxplot(y=df[col], ax=axes[i], color='skyblue')
        axes[i].set_title(f'Distribution of {col}')
    plt.tight_layout()
    plt.show()

In [7]:
import shap
from sklearn.ensemble import RandomForestRegressor

def explain_model(X, y):
    model = RandomForestRegressor().fit(X, y)
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)
    
    # Summary plot shows which features drive the AQI up or down
    shap.summary_plot(shap_values, X)

  from .autonotebook import tqdm as notebook_tqdm
