<a href="https://colab.research.google.com/github/panzeh1r/air-quality-index/blob/main/air_quality.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Loading the dataset
df = pd.read_csv("/content/Air_Quality.csv")

In [41]:
# Function to convert NO2 levels to Air Quality Index (AQI)
def convert_to_hki(no2_level):
    if 0 <= no2_level <= 100:
        return 'Good'
    elif 101 <= no2_level <= 200:
        return 'Moderate'
    elif 201 <= no2_level <= 500:
        return 'Unhealthy'
    elif 501 <= no2_level <= 1000:
        return 'Hazardous'
    else:
        return 'Unknown'

# Adding a new column to the dataset by converting NO2 levels to AQI
df['AQI'] = df['Data Value'].apply(convert_to_hki)

# Printing the first few columns
print(df.head())


   Unique ID  Indicator ID                    Name Measure Measure Info  \
0     172653           375  Nitrogen dioxide (NO2)    Mean          ppb   
1     172585           375  Nitrogen dioxide (NO2)    Mean          ppb   
2     336637           375  Nitrogen dioxide (NO2)    Mean          ppb   
3     336622           375  Nitrogen dioxide (NO2)    Mean          ppb   
4     172582           375  Nitrogen dioxide (NO2)    Mean          ppb   

  Geo Type Name  Geo Join ID                      Geo Place Name  \
0         UHF34          203  Bedford Stuyvesant - Crown Heights   
1         UHF34          203  Bedford Stuyvesant - Crown Heights   
2         UHF34          204                       East New York   
3         UHF34          103                  Fordham - Bronx Pk   
4         UHF34          104                Pelham - Throgs Neck   

           Time Period  Start_Date  Data Value  Message   HKI   AQI  
0  Annual Average 2011  12/01/2010       25.30      NaN  Good  Good  


In [42]:
# Separating features and labels for the model
X = df[['Data Value']]
y = df['AQI']

# Splitting the training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training the model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Evaluating the model on the test data
y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test set accuracy:", accuracy)

Test set accuracy: 0.999691738594328


In [43]:
# Getting the region name from the user
region_name = input("Please enter the region name: ")

# Retrieving NO2 levels for the specified region and all dates
no2_levels_all_dates = df.loc[df['Geo Place Name'] == region_name, ['Time Period', 'AQI']]

# Checking for NO2 levels
if no2_levels_all_dates.empty:
    print("NO2 levels not found for the specified region..")
else:
    print(f"AQI in the {region_name}:")
    print(no2_levels_all_dates)

Please enter the region name: Bedford Stuyvesant - Crown Heights
AQI in the Bedford Stuyvesant - Crown Heights:
               Time Period   AQI
0      Annual Average 2011  Good
1      Annual Average 2009  Good
27          Winter 2009-10  Good
116         Winter 2010-11  Good
117            Summer 2009  Good
...                    ...   ...
15263  Annual Average 2015  Good
15286  Annual Average 2020  Good
15289          Summer 2014  Good
15365       Winter 2013-14  Good
15371  Annual Average 2019  Good

[242 rows x 2 columns]
