In [None]:
# =========================
# 02_feature_engineering.ipynb
# =========================

import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("delhi-weather-aqi-2025.csv")

df["datetime"] = pd.to_datetime(
    df["date_ist"] + " " + df["time_ist"],
    dayfirst=True
)

df = df.sort_values(["location", "datetime"]).reset_index(drop=True)

# =========================
# Time Features
# =========================

df["hour"] = df["datetime"].dt.hour
df["dayofweek"] = df["datetime"].dt.dayofweek
df["month"] = df["datetime"].dt.month
df["is_weekend"] = df["dayofweek"].isin([5,6]).astype(int)

def season(month):
    if month in [12,1,2]:
        return "winter"
    elif month in [3,4,5]:
        return "summer"
    elif month in [6,7,8,9]:
        return "monsoon"
    else:
        return "post-monsoon"

df["season"] = df["month"].apply(season)

# =========================
# Lag & Rolling Features
# =========================

for lag in [1, 3, 6]:
    df[f"aqi_lag_{lag}"] = df.groupby("location")["aqi_index"].shift(lag)

df["aqi_roll_3"] = (
    df.groupby("location")["aqi_index"]
    .rolling(3)
    .mean()
    .reset_index(level=0, drop=True)
)

df["pm25_roll_3"] = (
    df.groupby("location")["pm2_5"]
    .rolling(3)
    .mean()
    .reset_index(level=0, drop=True)
)

# =========================
# Interaction Features
# =========================

df["wind_pm25"] = df["windspeed_kph"] * df["pm2_5"]
df["humidity_pm25"] = df["humidity"] * df["pm2_5"]

# =========================
# Encoding
# =========================

df = pd.get_dummies(df, columns=["location", "season"], drop_first=True)

# Drop rows with NaNs from lag features
df = df.dropna().reset_index(drop=True)

df.head()

# Save processed data
df.to_csv("processed_data.csv", index=False)



Unnamed: 0,date_ist,time_ist,lat,lon,temp_c,humidity,pressure_mb,windspeed_kph,condition_text,description,...,wind_pm25,humidity_pm25,location_Connaught Place,location_Dwarka,location_IGI Airport,location_Okhla Phase III,location_Rohini,season_post-monsoon,season_summer,season_winter
0,01/01/2025,6:00,28.6469,77.316,8.1,99,994.7,4.8,Overcast,WMO Code 3,...,693.12,14295.6,False,False,False,False,False,False,False,True
1,01/01/2025,7:00,28.6469,77.316,8.0,99,995.2,4.0,Overcast,WMO Code 3,...,553.2,13691.7,False,False,False,False,False,False,False,True
2,01/01/2025,8:00,28.6469,77.316,8.4,99,995.7,6.5,Overcast,WMO Code 3,...,886.6,13503.6,False,False,False,False,False,False,False,True
3,01/01/2025,9:00,28.6469,77.316,9.4,97,996.3,8.0,Overcast,WMO Code 3,...,1128.8,13686.7,False,False,False,False,False,False,False,True
4,01/01/2025,10:00,28.6469,77.316,11.3,89,996.3,8.8,Overcast,WMO Code 3,...,1143.12,11561.1,False,False,False,False,False,False,False,True
