<a href="https://colab.research.google.com/github/sid007es/Air-Quality-Data-Analysis-of-Bhopal/blob/main/AQI_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
pio.templates.default = "plotly_white"

data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/datasets/AQI data.csv")
print(data.head())

               Date   PM2.5    PM10    NO    NO2    NOx    NH3    SO2    CO  \
0  01-02-2021 01:00  105.75  294.25  5.82  67.52  40.65  18.90  18.57  1.42   
1  01-02-2021 02:00   75.50  225.25  5.53  67.00  40.12  18.70  19.70  1.25   
2  01-02-2021 03:00   67.00  221.25  4.00  34.85  21.77  16.30  11.12  0.66   
3  01-02-2021 04:00   34.75   75.00  3.25  19.60  13.10  14.07  12.75  0.50   
4  01-02-2021 05:00   28.50   59.25  3.55  22.00  14.57  13.82  22.52  0.56   

   Ozone  
0  30.55  
1  28.92  
2  65.80  
3  81.60  
4  72.02  


In [3]:
data['Date'] = pd.to_datetime(data['Date'])

In [4]:
print(data.describe())

            PM2.5        PM10          NO         NO2         NOx         NH3  \
count  672.000000  672.000000  672.000000  672.000000  672.000000  672.000000   
mean    49.071429  137.916667    8.322411   29.636845   23.528646   12.700967   
std     25.121951   75.663647   10.316222   24.469955   20.168366    6.686693   
min     12.750000    0.000000    0.000000    0.000000    0.000000    0.000000   
25%     31.500000   92.750000    4.450000   14.637500   12.270000    9.027500   
50%     44.250000  121.250000    5.835000   22.675000   17.465000   12.135000   
75%     59.812500  165.187500    8.100000   36.805000   26.120000   16.050000   
max    175.000000  598.250000  134.850000  144.150000  174.220000   59.930000   

              SO2          CO       Ozone  
count  672.000000  672.000000  672.000000  
mean    20.929390    0.994464   73.195372  
std     18.055937    0.712429   33.761936  
min      0.000000    0.000000    4.530000  
25%     13.342500    0.580000   48.957500  
50%   

In [7]:
# time series plot for each air pollutant
fig = go.Figure()

for pollutant in ['CO', 'NO', 'NO2', 'Ozone', 'SO2', 'PM2.5', 'PM10', 'NH3']:
    fig.add_trace(go.Scatter(x=data['Date'], y=data[pollutant], mode='lines',
                             name=pollutant))

fig.update_layout(title='Time Series Analysis of Air Pollutants in Bhopal',
                  xaxis_title='Date', yaxis_title='Concentration (µg/m³)')
fig.show()

In [9]:
# Define AQI breakpoints and corresponding AQI values
aqi_breakpoints = [
    (0, 12.0, 50), (12.1, 35.4, 100), (35.5, 55.4, 150),
    (55.5, 150.4, 200), (150.5, 250.4, 300), (250.5, 350.4, 400),
    (350.5, 500.4, 500)
]

def calculate_aqi(pollutant_name, concentration):
    for low, high, aqi in aqi_breakpoints:
        if low <= concentration <= high:
            return aqi
    return None

def calculate_overall_aqi(row):
    aqi_values = []
    pollutants = ['CO', 'NO', 'NO2', 'Ozone', 'SO2', 'PM2.5', 'PM10', 'NH3']
    for pollutant in pollutants:
        aqi = calculate_aqi(pollutant, row[pollutant])
        if aqi is not None:
            aqi_values.append(aqi)

    if not aqi_values:
        return None
    return max(aqi_values)

# Calculate AQI for each row
data['AQI'] = data.apply(calculate_overall_aqi, axis=1)

# Define AQI categories
aqi_categories = [
    (0, 50, 'Good'), (51, 100, 'Moderate'), (101, 150, 'Unhealthy for Sensitive Groups'),
    (151, 200, 'Unhealthy'), (201, 300, 'Very Unhealthy'), (301, 500, 'Hazardous')
]

def categorize_aqi(aqi_value):
    for low, high, category in aqi_categories:
        if low <= aqi_value <= high:
            return category
    return None

# Categorize AQI
data['AQI Category'] = data['AQI'].apply(categorize_aqi)
print(data.head())

                 Date   PM2.5    PM10    NO    NO2    NOx    NH3    SO2    CO  \
0 2021-01-02 01:00:00  105.75  294.25  5.82  67.52  40.65  18.90  18.57  1.42   
1 2021-01-02 02:00:00   75.50  225.25  5.53  67.00  40.12  18.70  19.70  1.25   
2 2021-01-02 03:00:00   67.00  221.25  4.00  34.85  21.77  16.30  11.12  0.66   
3 2021-01-02 04:00:00   34.75   75.00  3.25  19.60  13.10  14.07  12.75  0.50   
4 2021-01-02 05:00:00   28.50   59.25  3.55  22.00  14.57  13.82  22.52  0.56   

   Ozone    AQI    AQI Category  
0  30.55  400.0       Hazardous  
1  28.92  300.0  Very Unhealthy  
2  65.80  300.0  Very Unhealthy  
3  81.60  200.0       Unhealthy  
4  72.02  200.0       Unhealthy  


In [15]:
# AQI over time
fig = px.bar(data, x="Date", y="AQI",
             title="AQI of Bhopal in January")
fig.update_xaxes(title="Date")
fig.update_yaxes(title="AQI")
fig.show()

In [16]:
fig = px.histogram(data, x="Date",
                    color="AQI Category",
                    title="AQI Category Distribution Over Time")
fig.update_xaxes(title="Date")
fig.update_yaxes(title="Count")
fig.show()

In [17]:
# Define pollutants and their colors
pollutants = ["CO", "NO", "NO2", "Ozone", "SO2", "PM2.5", "PM10", "NH3"]
pollutant_colors = px.colors.qualitative.Plotly

# Calculate the sum of pollutant concentrations
total_concentrations = data[pollutants].sum()

# Create a DataFrame for the concentrations
concentration_data = pd.DataFrame({
    "Pollutant": pollutants,
    "Concentration": total_concentrations
})

# Create a donut plot for pollutant concentrations
fig = px.pie(concentration_data, names="Pollutant", values="Concentration",
             title="Pollutant Concentrations in Bhopal",
             hole=0.4, color_discrete_sequence=pollutant_colors)

# Update layout for the donut plot
fig.update_traces(textinfo="percent+label")
fig.update_layout(legend_title="Pollutant")

# Show the donut plot
fig.show()

In [18]:
# Correlation Between Pollutants
correlation_matrix = data[pollutants].corr()
fig = px.imshow(correlation_matrix, x=pollutants,
                 y=pollutants, title="Correlation Between Pollutants")
fig.show()

In [19]:
# Extract the hour from the date
data['Hour'] = pd.to_datetime(data['Date']).dt.hour

# Calculate hourly average AQI
hourly_avg_aqi = data.groupby('Hour')['AQI'].mean().reset_index()

# Create a line plot for hourly trends in AQI
fig = px.line(hourly_avg_aqi, x='Hour', y='AQI',
              title='Hourly Average AQI Trends in Delhi (Jan 2023)')
fig.update_xaxes(title="Hour of the Day")
fig.update_yaxes(title="Average AQI")
fig.show()

In [20]:
# Average AQI by Day of the Week
data['Day_of_Week'] = data['Date'].dt.day_name()
average_aqi_by_day = data.groupby('Day_of_Week')['AQI'].mean().reindex(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
fig = px.bar(average_aqi_by_day, x=average_aqi_by_day.index, y='AQI',
              title='Average AQI by Day of the Week')
fig.update_xaxes(title="Day of the Week")
fig.update_yaxes(title="Average AQI")
fig.show()