### Final Features Selection for Models

In [None]:
import pandas as pd
df = pd.read_csv("data.csv")

### Time based columns

In [None]:

# Convert timestamp to datetime
df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')

# Time components
df['hour'] = df['datetime'].dt.hour
df['month'] = df['datetime'].dt.month
df['year'] = df['datetime'].dt.year
df['day_of_week'] = df['datetime'].dt.dayofweek  # 0=Monday, 6=Sunday


### Seasonal classification based on month

In [None]:
def get_season(month):
    if month in [12, 1, 2]: return 'Winter'
    elif month in [3, 4, 5]: return 'Spring'
    elif month in [6, 7, 8]: return 'Summer'
    else: return 'Autumn'

df['season'] = df['month'].apply(get_season)

### Time period classification

In [None]:
def classify_time(hour):
    if 5 <= hour < 12: return 'Morning'
    elif 12 <= hour < 17: return 'Afternoon'
    elif 17 <= hour < 21: return 'Evening'
    elif 21 <= hour < 24 or 0 <= hour < 5: return 'Night'

df['time_of_day'] = df['hour'].apply(classify_time)

### Encoding categorical features

In [None]:
# Season encoding
season_map = {
    "Spring": 0,
    "Summer": 1,
    "Autumn": 2,
    "Winter": 3
}

df["seasons"] = df["season"].map(season_map)

# Time of day encoding
time_map = {
    "Morning": 0,
    "Afternoon": 1,
    "Evening": 2,
    "Night": 3
}

df["timeof_day"] = df["time_of_day"].map(time_map)

# Drop original categorical columns
df.drop(columns=["season", "time_of_day"], inplace=True)


### creating lag features

In [None]:
df = df.sort_values("timestamp").reset_index(drop=True)

lags = [1, 2, 3, 6, 12, 24]
for lag in lags:
    df[f"aqi_lag_{lag}"] = df["aqi"].shift(lag)

df["aqi_24hr_avg"] = df["aqi"].rolling(24).mean()

df = df.dropna().reset_index(drop=True)

In [None]:
df2 = df[["timestamp","seasons", "hour", "month", "year", "day_of_week","timeof_day","aqi_lag_1", "aqi_lag_2", "aqi_lag_3", "aqi_lag_6", "aqi_lag_12", "aqi_lag_24", "aqi_24hr_avg", "aqi"]]

In [None]:
df2['aqi_24hr_avg'] = df2['aqi_24hr_avg'].round(2)

In [None]:
# store the cleaned data 
df2.to_csv("final_features.csv", index = False)