In [None]:
import pandas as pd
df = pd.read_csv('/content/air_pollution_data.csv')

In [None]:
df

Unnamed: 0,city,date,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3
0,Ahmedabad,30-11-2020,5,520.71,2.38,16.28,130.18,47.68,65.96,72.13,8.36
1,Ahmedabad,01-12-2020,5,1682.28,7.71,54.84,0.73,21.70,120.95,154.53,27.36
2,Ahmedabad,02-12-2020,5,1815.80,16.54,49.35,0.17,23.84,133.47,172.63,28.12
3,Ahmedabad,03-12-2020,5,2296.45,41.57,40.10,0.00,35.76,150.37,202.15,36.48
4,Ahmedabad,04-12-2020,5,2189.64,23.92,58.95,0.02,28.13,160.79,205.80,40.53
...,...,...,...,...,...,...,...,...,...,...,...
23499,Visakhapatnam,21-05-2023,3,353.81,0.00,2.08,100.14,4.11,37.53,47.09,0.08
23500,Visakhapatnam,22-05-2023,3,380.52,0.00,3.77,82.97,5.07,32.17,43.44,1.74
23501,Visakhapatnam,23-05-2023,3,390.53,0.00,4.28,80.11,5.19,36.01,48.06,1.20
23502,Visakhapatnam,24-05-2023,3,300.41,0.00,1.36,95.84,2.21,30.17,48.89,0.00


In [None]:
df.shape

(23504, 11)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor

In [None]:



features = ['co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10', 'nh3']
X = df[features]
y = df['aqi']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42),
    'XGBoost': XGBRegressor(objective='reg:squarederror', random_state=42)
}


results = {}

In [None]:
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[model_name] = {'MSE': mse, 'R2': r2}
    print(f"{model_name} - MSE: {mse:.2f}, R2: {r2:.2f}")


param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

Linear Regression - MSE: 1.35, R2: 0.33
Random Forest - MSE: 0.05, R2: 0.97
Gradient Boosting - MSE: 0.06, R2: 0.97
XGBoost - MSE: 0.06, R2: 0.97


In [None]:
grid_search_rf = GridSearchCV(RandomForestRegressor(random_state=42), param_grid_rf, cv=5, scoring='neg_mean_squared_error')
grid_search_rf.fit(X_train, y_train)

print("Best parameters for Random Forest: ", grid_search_rf.best_params_)
best_rf_model = grid_search_rf.best_estimator_

# Evaluate the tuned model
y_pred_tuned = best_rf_model.predict(X_test)
mse_tuned = mean_squared_error(y_test, y_pred_tuned)
r2_tuned = r2_score(y_test, y_pred_tuned)
print(f"Tuned Random Forest - MSE: {mse_tuned:.2f}, R2: {r2_tuned:.2f}")

Best parameters for Random Forest:  {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 200}
Tuned Random Forest - MSE: 0.05, R2: 0.97


In [None]:
def predict_pollution_level(model, scaler):


    co = float(input("Enter CO level: "))
    no = float(input("Enter NO level: "))
    no2 = float(input("Enter NO2 level: "))
    o3 = float(input("Enter O3 level: "))
    so2 = float(input("Enter SO2 level: "))
    pm2_5 = float(input("Enter PM2.5 level: "))
    pm10 = float(input("Enter PM10 level: "))
    nh3 = float(input("Enter NH3 level: "))


    input_features = [[co, no, no2, o3, so2, pm2_5, pm10, nh3]]


    input_features_scaled = scaler.transform(input_features)


    predicted_aqi = model.predict(input_features_scaled)
    print(f"Predicted AQI: {predicted_aqi[0]:.2f}")

    return predicted_aqi[0]


In [None]:
predict_pollution_level(best_rf_model,scaler)

Enter CO level: 5
Enter NO level: 699
Enter NO2 level: 65
Enter O3 level: 43
Enter SO2 level: 65
Enter PM2.5 level: 87
Enter PM10 level: 98
Enter NH3 level: 32
Predicted AQI: 5.00




5.0