In [7]:
import pandas as pd

df = pd.read_csv("../data/raw/AQI_2020_to_2025_October.csv")

df = df.rename(columns={
    "AQI Value": "AQI"
})

df.head()

Unnamed: 0,Date,S.No,City,Air Quality,AQI,Prominent Pollutant
0,01-01-2020,January,Ahmedabad,Moderate,178,"PM 2.5, PM 10"
1,01-01-2020,January,Panchkula,Moderate,110,"PM 2.5, PM 10"
2,01-01-2020,January,Palwal,Very Poor,385,"PM 2.5, PM 10"
3,01-01-2020,January,Pali,Moderate,154,"PM 2.5, PM 10"
4,01-01-2020,January,Noida,Severe,438,"PM 2.5, PM 10"


In [8]:
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

df = df.dropna(subset=["Date", "City", "AQI"])

df = df[(df["AQI"] > 0) & (df["AQI"] < 1000)]


In [9]:
df["year"] = df["Date"].dt.year
df["month"] = df["Date"].dt.month


In [10]:
df["year"].value_counts().sort_index()
df["City"].nunique()


304

In [12]:
lockdown = df[
    (df["year"] == 2020) &
    (df["month"].between(3,5))
]

post = df[
    df["year"].between(2021, 2022)
]


In [13]:
lock_avg = lockdown.groupby("City")["AQI"].mean().reset_index(name="aqi_lock")

post_avg = post.groupby("City")["AQI"].mean().reset_index(name="aqi_post")

compare = lock_avg.merge(post_avg, on="City")


In [14]:
compare["aqi_change"] = compare["aqi_post"] - compare["aqi_lock"]


In [15]:
compare = compare[compare["City"] != "Ahmedabad"]


In [17]:
counts = df["City"].value_counts()

valid = counts[counts > 100].index

compare = compare[compare["City"].isin(valid)]


In [18]:
compare = compare.sort_values("aqi_change", ascending=False)

compare.head(15)


Unnamed: 0,City,aqi_lock,aqi_post,aqi_change
116,Thane,101.1875,160.158879,58.971379
111,Singrauli,147.441176,194.226148,46.784972
9,Ankleshwar,104.782609,145.37037,40.587762
104,Rohtak,136.393939,176.524715,40.130775
79,Manesar,149.966667,189.008065,39.041398
70,Kota,86.515152,125.453571,38.93842
120,Udaipur,80.457143,119.031802,38.574659
11,Aurangabad,67.636364,104.568182,36.931818
23,Chandigarh,80.555556,117.219512,36.663957
45,Hajipur,108.047619,142.105263,34.057644


In [19]:
import plotly.express as px

fig = px.bar(
    compare.head(15),
    x="City",
    y="aqi_change",
    color="aqi_post",
    title="Post-COVID AQI Rebound by City (Higher = Worse Recovery)"
)

fig.update_layout(height=520)

fig.show()

fig.write_html("plots/city_recovery_post_covid.html")


In [20]:
city_trend = (
    df.groupby(["City","year"])["AQI"]
      .mean()
      .reset_index()
)

top_cities = compare.head(8)["City"]

fig = px.line(
    city_trend[city_trend["City"].isin(top_cities)],
    x="year",
    y="AQI",
    color="City",
    markers=True,
    title="City AQI Trends — Recovery Comparison"
)

fig.show()

fig.write_html("plots/city_year_trend.html")
