<a href="https://colab.research.google.com/github/kaneshow/AirSense_India/blob/main/AirSense.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Step 1: Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import kagglehub

# Step 2: Download Dataset from Kaggle
dataset_path = kagglehub.dataset_download("rohanrao/air-quality-data-in-india")
print("Dataset downloaded to:", dataset_path)

# Step 3: Load the 'city_day.csv' File
data = pd.read_csv(f"{dataset_path}/city_day.csv")
print("First few rows:\n", data.head())

# Step 4: Handle Missing Values
print("\nMissing Values:\n", data.isnull().sum())
data.fillna(data.mean(numeric_only=True), inplace=True)

# Step 5: Convert 'Date' Column to Datetime Format
data['Date'] = pd.to_datetime(data['Date'])

# Step 6: Select and Rename Relevant Columns
data.rename(columns={'O3': 'Ozone'}, inplace=True)
features = ['Date', 'PM2.5', 'PM10', 'NO2', 'CO', 'Ozone']
data = data[features]

# Step 7: Feature Engineering - Create AQI (Simple Average)
data['AQI'] = (data['PM2.5'] + data['PM10'] + data['NO2']) / 3

# Step 8: Data Integrity Checks
print("\nData Types:\n", data.dtypes)
print("\nNegative Values Detected:\n", data[data[['PM2.5', 'PM10', 'NO2', 'CO', 'Ozone']] < 0].dropna(how='all'))

# Step 10: Outlier Detection using IQR (PM2.5)
Q1 = data['PM2.5'].quantile(0.25)
Q3 = data['PM2.5'].quantile(0.75)
IQR = Q3 - Q1
outliers = data[(data['PM2.5'] < (Q1 - 1.5 * IQR)) | (data['PM2.5'] > (Q3 + 1.5 * IQR))]
print("\nPM2.5 Outliers:\n", outliers)

Dataset downloaded to: /kaggle/input/air-quality-data-in-india
First few rows:
         City        Date  PM2.5  PM10     NO    NO2    NOx  NH3     CO    SO2  \
0  Ahmedabad  2015-01-01    NaN   NaN   0.92  18.22  17.15  NaN   0.92  27.64   
1  Ahmedabad  2015-01-02    NaN   NaN   0.97  15.69  16.46  NaN   0.97  24.55   
2  Ahmedabad  2015-01-03    NaN   NaN  17.40  19.30  29.70  NaN  17.40  29.07   
3  Ahmedabad  2015-01-04    NaN   NaN   1.70  18.48  17.97  NaN   1.70  18.59   
4  Ahmedabad  2015-01-05    NaN   NaN  22.10  21.42  37.76  NaN  22.10  39.33   

       O3  Benzene  Toluene  Xylene  AQI AQI_Bucket  
0  133.36     0.00     0.02    0.00  NaN        NaN  
1   34.06     3.68     5.50    3.77  NaN        NaN  
2   30.70     6.80    16.40    2.25  NaN        NaN  
3   36.08     4.43    10.14    1.00  NaN        NaN  
4   39.31     7.01    18.89    2.78  NaN        NaN  

Missing Values:
 City              0
Date              0
PM2.5          4598
PM10          11140
NO         