<h1 align = 'center'>Air Quality</h1>
<br>
<h3 align = 'center'>Author - Naman Talwar</h3>
<br>

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv("air-quality-india.csv")
data.head()

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.describe().style.background_gradient(cmap = "Blues")

In [None]:
data.isnull().sum()

In [None]:
data.columns

In [None]:
data.plot(y="PM2.5",figsize=(15,6))

plt.xlabel("Timestamp")
plt.ylabel("Particulate Matter 2.5")
plt.title("PM2.5 With Respect to Time")

plt.show()

<br>
<h2>Distribution of Particulate Matter by Month and Year</h2>


In [None]:
import plotly.express as px

In [None]:
fig = px.scatter_3d(data, x = "Year", y = "Month", z = "PM2.5",
                    color = "PM2.5", color_continuous_scale = ["#00FF00", "#FFC800", "#FF0000", "#B803BF"], 
                    range_color = (-45, 225)) 

fig.update_traces(marker = dict(size = 3.5)) 
fig.update_layout(template = "plotly_dark", font = dict(family = "PT Sans", size = 12))
fig.show()

<br>
<h2>Distribution of Particulate Matter by Day and Month</h3>

In [None]:
fig = px.scatter_3d(data, x = "Month", y = "Day", z = "PM2.5",
                    color = "PM2.5", color_continuous_scale = ["#00FF00", "#FFC800", "#FF0000", "#B803BF"], 
                    range_color = (-45, 225)) 

fig.update_traces(marker = dict(size = 2.5)) 
fig.update_layout(template = "plotly_dark", font = dict(family = "PT Sans", size = 12))
fig.show()

<br>
<h2>Distribution of Particulate Matter by Day and Month - Hourwise Animation

In [None]:
fig = px.scatter_3d(data, x = "Month", y = "Day", range_z = [0, 250], z = "PM2.5",
                    color = "PM2.5", color_continuous_scale = ["#00FF00", "#FFC800", "#FF0000", "#B803BF"],
                    animation_frame = "Hour", range_color = (-45, 225))

fig.update_traces(marker = dict(size = 3.5))
fig.update_layout(template = "plotly_dark", font = dict(family = "PT Sans", size = 12))
fig.show()

<br>
<h2>Distribution of Particulate Matter by Hour and Day</h2>

In [None]:
fig = px.scatter_3d(data, x = "Day", y = "Hour", z = "PM2.5",
                    color = "PM2.5", color_continuous_scale = ["#00FF00", "#FFC800", "#FF0000", "#B803BF"], 
                    range_color = (-45, 225)) 

fig.update_traces(marker = dict(size = 2.5)) 
fig.update_layout(template = "plotly_dark", font = dict(family = "PT Sans", size = 12))
fig.show()

<br>
<h2>Distribution of Particulate Matter by Day and Year - Hourwise Animation

In [None]:
fig = px.scatter_3d(data, x = "Year", y = "Day", range_z = [0,250], z = "PM2.5",
                    color = "PM2.5", color_continuous_scale = ["#00FF00", "#FFC800", "#FF0000", "#B803BF"],
                    animation_frame = "Hour", range_color = (-45, 225)) # range of color bar

fig.update_traces(marker = dict(size = 3.5)) # scaling down the markers
fig.update_layout(template = "plotly_dark", font = dict(family = "PT Sans", size = 12))
fig.show()

<br>
<h2>Boxplot of Particulate Matter Distribution by Year</h3>


In [None]:
fig = px.box(data,x= "Year", y= "PM2.5",points = "all", notched = True,color_discrete_sequence = px.colors.qualitative.Set3, color = "Year")

fig.update_layout(template = "plotly_dark")

fig.show()

In [None]:
fig = px.histogram(data, x = "Year", y = "PM2.5", color="Month", hover_data = data.columns,
                   color_discrete_sequence = px.colors.qualitative.Set3, 
                   title = "Sum of PM2.5 Distribution by Year and Month" )

fig.update_layout(template = "plotly_dark")
fig.show()

In [None]:
fig = px.box(data, x = "Month", y = "PM2.5", points = "all", color = "Month",
             color_discrete_sequence = px.colors.qualitative.Set3,
             title = "PM2.5 Distribution by Month")

fig.update_layout(template = "plotly_dark")

fig.show()

In [None]:
fig = px.histogram(data, x = "Month", y = "PM2.5", color="Year", hover_data = data.columns,
                   color_discrete_sequence = px.colors.qualitative.Set3, 
                   title = "Sum of PM2.5 Distribution by Month and Year" )

fig.update_layout(template = "plotly_dark")
fig.show()

In [None]:
fig = px.density_contour(data, x = "Month", y = "PM2.5", title = "PM2.5 Density by Month")

fig.update_layout(yaxis_range = [0, 115])
fig.update_layout(template = "plotly_dark")
fig.update_traces(contours_coloring = "fill", contours_showlabels = True)
fig.show()

In [None]:
# Here I store years 2018-2021 in a data frame because 2017 and 2022 does not have data from every month
df1 = data[data["Year"] > 2017]
df2 = df1[df1["Year"] < 2022]

for i in range(1, 13):
    
    df0 = df2[df2["Month"] == i]

    fig = px.histogram(df0, x = "Day", y = "PM2.5", color="Year", hover_data = data.columns,
                       color_discrete_sequence = px.colors.qualitative.Pastel, 
                       title = f"Daily Sum of PM2.5 in Month {i} (2018-2021)")
    
    fig.update_layout(yaxis_range = [0, 11000])
    fig.update_layout(template = "plotly_dark")

    fig.show()

In [None]:
for i in range(2018, 2022):
    
    df0 = df2[df2["Year"] == i]

    fig = px.histogram(df0, x = "Hour", y = "PM2.5", color="Month", hover_data = data.columns,
                       color_discrete_sequence = px.colors.qualitative.Set3, 
                       title = f"Total Sum of PM2.5 in Year: {i} by Hour")
    
    fig.update_layout(yaxis_range = [0, 21000])
    fig.update_layout(template = "plotly_dark", font = dict(family = "PT Sans", size = 20))

    fig.show()