In [91]:
import pandas as pd

df = pd.read_csv("../data/raw/city_day.csv")
df.head()


Unnamed: 0,City,Date,PM2.5,PM10,NO,NO2,NOx,NH3,CO,SO2,O3,Benzene,Toluene,Xylene,AQI,AQI_Bucket
0,Ahmedabad,2015-01-01,,,0.92,18.22,17.15,,0.92,27.64,133.36,0.0,0.02,0.0,,
1,Ahmedabad,2015-01-02,,,0.97,15.69,16.46,,0.97,24.55,34.06,3.68,5.5,3.77,,
2,Ahmedabad,2015-01-03,,,17.4,19.3,29.7,,17.4,29.07,30.7,6.8,16.4,2.25,,
3,Ahmedabad,2015-01-04,,,1.7,18.48,17.97,,1.7,18.59,36.08,4.43,10.14,1.0,,
4,Ahmedabad,2015-01-05,,,22.1,21.42,37.76,,22.1,39.33,39.31,7.01,18.89,2.78,,


In [92]:
df.shape


(29531, 16)

In [93]:
df.columns


Index(['City', 'Date', 'PM2.5', 'PM10', 'NO', 'NO2', 'NOx', 'NH3', 'CO', 'SO2',
       'O3', 'Benzene', 'Toluene', 'Xylene', 'AQI', 'AQI_Bucket'],
      dtype='str')

In [94]:
import pandas as pd

df["Date"] = pd.to_datetime(df["Date"])
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month


In [95]:
import plotly.express as px

city_avg = (
    df.groupby("City")["AQI"]
    .mean()
    .reset_index()
    .sort_values("AQI", ascending=False)
)

fig = px.bar(
    city_avg,
    x="City",
    y="AQI",
    title="Average Air Quality Across Indian Cities",
    labels={"AQI": "Average AQI"},
)

fig.update_layout(
    xaxis_tickangle=-45,
    height=550
)

fig.show()


In [96]:
top_cities = city_avg.head(5)["City"]

yearly = (
    df[df["City"].isin(top_cities)]
    .groupby(["Year", "City"])["AQI"]
    .mean()
    .reset_index()
)

fig = px.line(
    yearly,
    x="Year",
    y="AQI",
    color="City",
    markers=True,
    title="Yearly AQI Trends in the Most Polluted Indian Cities"
)

fig.show()


In [97]:
df_no_ahm = df[df["City"] != "Ahmedabad"].copy()


In [98]:
import plotly.express as px

city_avg_no_ahm = (
    df_no_ahm.groupby("City")["AQI"]
    .mean()
    .reset_index()
    .sort_values("AQI", ascending=False)
)

city_avg_no_ahm.head()


Unnamed: 0,City,AQI
9,Delhi,259.487744
20,Patna,240.782042
11,Gurugram,225.123882
18,Lucknow,217.973059
22,Talcher,172.886819


In [99]:
fig = px.bar(
    city_avg_no_ahm,
    x="City",
    y="AQI",
    title="Average Air Quality Across Indian Cities ",
    labels={"AQI": "Average AQI"},
)

fig.update_layout(
    xaxis_tickangle=-45,
    height=550
)

fig.show()
fig.write_html(
    "../plots/major_city.html",
    include_plotlyjs="cdn",
    full_html=False
)


In [100]:
top_cities_no_ahm = city_avg_no_ahm.head(5)["City"]

yearly_no_ahm = (
    df_no_ahm[df_no_ahm["City"].isin(top_cities_no_ahm)]
    .groupby(["Year", "City"])["AQI"]
    .mean()
    .reset_index()
)

fig = px.line(
    yearly_no_ahm,
    x="Year",
    y="AQI",
    color="City",
    markers=True,
    title="Yearly AQI Trends in the Most Polluted Indian Cities "
)

fig.show()
# fig.write_html(
#     "../plots/major_city.html",
#     include_plotlyjs="cdn",
#     full_html=False
# )


In [101]:
fig = px.box(
    df_no_ahm[df_no_ahm["City"].isin(top_cities_no_ahm)],
    x="City",
    y="AQI",
    title="Distribution of AQI Levels Across Major Cities (Ahmedabad Excluded)"
)

fig.show()


In [102]:
fig = px.box(
    df_no_ahm[df_no_ahm["City"].isin(top_cities_no_ahm)],
    x="City",
    y="AQI",
    title="Distribution of AQI Levels Across Major Cities (Ahmedabad Excluded)"
)

fig.show()


In [103]:
fig = px.bar(
    df_no_ahm,
    x="City",
    y="AQI",
    animation_frame="Year",
    range_y=(0, 500),
    title="How Air Quality Changes Across Indian Cities Over Time",
    labels={"AQI": "AQI"}
)

fig.update_layout(
    height=600,
    xaxis_tickangle=-45
)

fig.show()


In [104]:
monthly_wave = (
    df_no_ahm
    .groupby(["Year", "Month"])["AQI"]
    .mean()
    .reset_index()
)

import plotly.express as px

fig = px.line(
    monthly_wave,
    x="Month",
    y="AQI",
    color="Year",
    line_group="Year",
    opacity=0.35,
    title="Seasonal AQI Waves Across Years",
)

fig.update_layout(
    height=550,
    xaxis=dict(
        tickmode="array",
        tickvals=list(range(1, 13)),
        ticktext=["Jan","Feb","Mar","Apr","May","Jun",
                  "Jul","Aug","Sep","Oct","Nov","Dec"]
    ),
    yaxis_title="Average AQI",
    legend_title="Year"
)

fig.show()


TypeError: line() got an unexpected keyword argument 'opacity'

In [None]:
import plotly.express as px

monthly_wave = (
    df_no_ahm
    .groupby(["Year", "Month"])["AQI"]
    .mean()
    .reset_index()
)

fig = px.line(
    monthly_wave,
    x="Month",
    y="AQI",
    color="Year",
    line_group="Year",
    title="Seasonal AQI Waves Across Years"
)


In [None]:
fig.update_traces(opacity=0.35)


In [None]:
monthly_city_wave = (
    df_no_ahm
    .groupby(["City", "Month"])["AQI"]
    .mean()
    .reset_index()
)


In [None]:
import plotly.express as px

fig = px.line(
    monthly_city_wave,
    x="Month",
    y="AQI",
    color="City",
    line_group="City",
    title="Seasonal AQI Wave Patterns Across Indian Cities"
)

fig.update_traces(opacity=0.45)

fig.update_layout(
    height=600,
    xaxis=dict(
        tickmode="array",
        tickvals=list(range(1, 13)),
        ticktext=["Jan","Feb","Mar","Apr","May","Jun",
                  "Jul","Aug","Sep","Oct","Nov","Dec"]
    ),
    yaxis_title="Average AQI",
    legend_title="City"
)

fig.show()


In [None]:
fig.add_vrect(
    x0=10.5, x1=2.5,
    fillcolor="grey",
    opacity=0.08,
    layer="below",
    line_width=0
)



In [None]:
monthly_city_wave = (
    df_no_ahm
    .groupby(["City", "Month"])["AQI"]
    .mean()
    .reset_index()
)


In [None]:
focus_city = "Delhi"

df_focus = monthly_city_wave[monthly_city_wave["City"] == focus_city]
df_context = monthly_city_wave[monthly_city_wave["City"] != focus_city]


In [None]:
import plotly.graph_objects as go

fig = go.Figure()

# Context cities (faded)
for city in df_context["City"].unique():
    city_data = df_context[df_context["City"] == city]
    fig.add_trace(
        go.Scatter(
            x=city_data["Month"],
            y=city_data["AQI"],
            mode="lines",
            line=dict(color="rgba(0,0,0,0.18)", width=1.5),
            hoverinfo="skip",
            showlegend=False
        )
    )

# Focus city (highlighted)
fig.add_trace(
    go.Scatter(
        x=df_focus["Month"],
        y=df_focus["AQI"],
        mode="lines+markers",
        line=dict(color="black", width=3),
        marker=dict(size=6),
        name=focus_city
    )
)


In [None]:
fig.add_vrect(
    x0=10.5, x1=2.5,
    fillcolor="black",
    opacity=0.05,
    layer="below",
    line_width=0
)


In [None]:
fig.update_layout(
    title=dict(
        text="Seasonal Pollution Waves Across Indian Cities<br><span style='font-size:14px;color:#555;'>Delhi highlighted against other urban centres</span>",
        x=0.05
    ),
    height=600,
    plot_bgcolor="white",
    paper_bgcolor="white",
    xaxis=dict(
        tickmode="array",
        tickvals=list(range(1, 13)),
        ticktext=["Jan","Feb","Mar","Apr","May","Jun",
                  "Jul","Aug","Sep","Oct","Nov","Dec"],
        showgrid=False,
        title=""
    ),
    yaxis=dict(
        title="Average AQI",
        showgrid=True,
        gridcolor="rgba(0,0,0,0.06)"
    ),
    font=dict(
        family="Georgia",
        size=13,
        color="black"
    ),
    margin=dict(l=70, r=40, t=90, b=50)
)

fig.show()
fig.write_html(
    "../visuals/seasonal_aqi_waves.html",
    include_plotlyjs="cdn",
    full_html=False
)



FileNotFoundError: [Errno 2] No such file or directory: 'plots/seasonal_aqi_waves.html'