In [31]:
import pandas as pd
import numpy as np
import plotly.express as px


In [32]:
df = pd.read_csv("../data/raw/city_day.csv")
df.head()


Unnamed: 0,City,Date,PM2.5,PM10,NO,NO2,NOx,NH3,CO,SO2,O3,Benzene,Toluene,Xylene,AQI,AQI_Bucket
0,Ahmedabad,2015-01-01,,,0.92,18.22,17.15,,0.92,27.64,133.36,0.0,0.02,0.0,,
1,Ahmedabad,2015-01-02,,,0.97,15.69,16.46,,0.97,24.55,34.06,3.68,5.5,3.77,,
2,Ahmedabad,2015-01-03,,,17.4,19.3,29.7,,17.4,29.07,30.7,6.8,16.4,2.25,,
3,Ahmedabad,2015-01-04,,,1.7,18.48,17.97,,1.7,18.59,36.08,4.43,10.14,1.0,,
4,Ahmedabad,2015-01-05,,,22.1,21.42,37.76,,22.1,39.33,39.31,7.01,18.89,2.78,,


In [33]:
df.columns


Index(['City', 'Date', 'PM2.5', 'PM10', 'NO', 'NO2', 'NOx', 'NH3', 'CO', 'SO2',
       'O3', 'Benzene', 'Toluene', 'Xylene', 'AQI', 'AQI_Bucket'],
      dtype='str')

In [34]:
df = df[df["City"].str.lower() != "ahmedabad"]


In [35]:
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

df = df.dropna(subset=["Date", "City", "AQI"])


In [36]:
df["year"] = df["Date"].dt.year
df["month"] = df["Date"].dt.month


In [37]:
df["year"].value_counts().sort_index()


year
2015    1564
2016    2456
2017    3165
2018    5367
2019    6719
2020    4245
Name: count, dtype: int64

In [38]:
# Pre-COVID baseline: same months (Mar–May), 2015–2019
pre_covid = df[
    (df["year"].between(2015, 2019)) &
    (df["month"].between(3, 5))
]

# COVID lockdown: Mar–May 2020
covid = df[
    (df["year"] == 2020) &
    (df["month"].between(3, 5))
]


In [39]:
pre_covid_city = (
    pre_covid
    .groupby("City", as_index=False)["AQI"]
    .mean()
    .rename(columns={"AQI": "aqi_pre_covid"})
)

covid_city = (
    covid
    .groupby("City", as_index=False)["AQI"]
    .mean()
    .rename(columns={"AQI": "aqi_covid"})
)


In [40]:
city_compare = pre_covid_city.merge(
    covid_city,
    on="City",
    how="inner"
)

city_compare["aqi_change"] = (
    city_compare["aqi_covid"] - city_compare["aqi_pre_covid"]
)

city_compare.sort_values("aqi_change").head()


Unnamed: 0,City,aqi_pre_covid,aqi_covid,aqi_change
5,Delhi,265.295652,132.706522,-132.58913
6,Gurugram,231.615625,126.836957,-104.778668
12,Lucknow,213.255924,120.086957,-93.168968
14,Patna,206.798046,142.826087,-63.971959
4,Chennai,121.362587,70.902174,-50.460413


In [41]:
city_coords = {
    "Delhi": (28.6139, 77.2090),
    "Mumbai": (19.0760, 72.8777),
    "Bengaluru": (12.9716, 77.5946),
    "Chennai": (13.0827, 80.2707),
    "Kolkata": (22.5726, 88.3639),
    "Hyderabad": (17.3850, 78.4867),
    "Pune": (18.5204, 73.8567),
    "Ahmedabad": (23.0225, 72.5714),
    "Jaipur": (26.9124, 75.7873),
    "Lucknow": (26.8467, 80.9462)
}


In [42]:
coords_df = (
    pd.DataFrame.from_dict(city_coords, orient="index",
                           columns=["lat", "lon"])
    .reset_index()
    .rename(columns={"index": "City"})
)

city_compare = city_compare.merge(coords_df, on="City", how="inner")
city_compare.head()


Unnamed: 0,City,aqi_pre_covid,aqi_covid,aqi_change,lat,lon
0,Bengaluru,101.841743,77.576087,-24.265656,12.9716,77.5946
1,Chennai,121.362587,70.902174,-50.460413,13.0827,80.2707
2,Delhi,265.295652,132.706522,-132.58913,28.6139,77.209
3,Hyderabad,115.924706,74.467391,-41.457315,17.385,78.4867
4,Jaipur,141.956522,100.282609,-41.673913,26.9124,75.7873


In [43]:
fig_pre = px.scatter_mapbox(
    city_compare,
    lat="lat",
    lon="lon",
    size="aqi_pre_covid",
    color="aqi_pre_covid",
    color_continuous_scale="RdYlGn_r",
    size_max=40,
    zoom=4,
    hover_name="City",
    hover_data={"aqi_pre_covid": True},
    title="India Air Quality Before COVID (2015–2019 baseline)"
)

fig_pre.update_layout(
    mapbox_style="carto-positron",
    height=600,
    margin={"r":0,"t":50,"l":0,"b":0}
)

fig_pre.show()



*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



In [44]:
fig_covid = px.scatter_mapbox(
    city_compare,
    lat="lat",
    lon="lon",
    size="aqi_covid",
    color="aqi_covid",
    color_continuous_scale="RdYlGn_r",
    size_max=40,
    zoom=4,
    hover_name="City",
    hover_data={"aqi_covid": True},
    title="India Air Quality During COVID Lockdown (Mar–May 2020)"
)

fig_covid.update_layout(
    mapbox_style="carto-positron",
    height=600,
    margin={"r":0,"t":50,"l":0,"b":0}
)

fig_covid.show()



*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



In [45]:
fig_diff = px.scatter_mapbox(
    city_compare,
    lat="lat",
    lon="lon",
    size=abs(city_compare["aqi_change"]),
    color="aqi_change",
    color_continuous_scale="RdBu",
    size_max=40,
    zoom=4,
    hover_name="City",
    hover_data={
        "aqi_pre_covid": True,
        "aqi_covid": True,
        "aqi_change": True
    },
    title="Change in AQI During COVID Lockdown (Negative = Improvement)"
)

fig_diff.update_layout(
    mapbox_style="carto-positron",
    height=600,
    margin={"r":0,"t":50,"l":0,"b":0}
)

fig_diff.show()



*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



In [46]:
fig_pre.write_html(
    "../plots/india_pre_covid_map.html",
    include_plotlyjs="cdn",
    full_html=False
)

fig_covid.write_html(
    "../plots/india_covid_map.html",
    include_plotlyjs="cdn",
    full_html=False
)

fig_diff.write_html(
    "../plots/india_covid_change_map.html",
    include_plotlyjs="cdn",
    full_html=False
)


In [47]:
import pandas as pd
import plotly.express as px

# Ensure Date is datetime
df["Date"] = pd.to_datetime(df["Date"])

# Define periods
pre_covid = df[(df["Date"] >= "2015-01-01") & (df["Date"] <= "2019-12-31")]
covid = df[(df["Date"] >= "2020-03-01") & (df["Date"] <= "2020-05-31")]


In [48]:
pre_avg = (
    pre_covid
    .groupby("City")["AQI"]
    .mean()
    .reset_index(name="aqi_pre")
)

covid_avg = (
    covid
    .groupby("City")["AQI"]
    .mean()
    .reset_index(name="aqi_covid")
)

# Merge
city_change = pre_avg.merge(covid_avg, on="City", how="inner")

# AQI change (positive = improvement)
city_change["aqi_drop"] = city_change["aqi_pre"] - city_change["aqi_covid"]

# Sort for readability
city_change = city_change.sort_values("aqi_drop", ascending=False)


In [49]:
fig = px.bar(
    city_change,
    x="City",
    y="aqi_drop",
    color="aqi_pre",
    color_continuous_scale="RdYlGn_r",
    labels={
        "aqi_drop": "AQI Improvement During Lockdown",
        "aqi_pre": "Pre-COVID Average AQI"
    },
    title="City-wise Improvement in AQI During COVID Lockdown"
)

fig.update_layout(
    height=550,
    xaxis_title="City",
    yaxis_title="AQI Reduction (Higher = Cleaner Air)",
    coloraxis_colorbar=dict(title="Pre-COVID AQI"),
    hovermode="x unified"
)

fig.show()


In [51]:
city_change.to_csv(
    "../data/processed/covid_city_aqi_change_2015_2020.csv",
    index=False
)
