In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import joblib

# Load data
data = pd.read_csv(r'C:\Aditya\Python\Khushi Project\AQI Files\AnandVihar_AQI_2018_2023.csv')

# Check for missing values and handle them
if data.isnull().any().any():
    # Impute missing values, here using the mean of each column
    imputer = SimpleImputer(strategy='mean')
    data[['PM2.5', 'PM10', 'CO', 'NO2', 'SO2', 'Ozone']] = imputer.fit_transform(data[['PM2.5', 'PM10', 'CO', 'NO2', 'SO2', 'Ozone']])

# Feature scaling
scaler = StandardScaler()
features = scaler.fit_transform(data[['PM2.5', 'PM10', 'CO', 'NO2', 'SO2', 'Ozone']])

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(features, data['Overall AQI'], test_size=0.5, random_state=42)

# Initialize models
rf_model = RandomForestRegressor(n_estimators=50, max_depth=10, random_state=40)
svm_model = SVR(kernel='rbf',C=1.0)
knn_model = KNeighborsRegressor(n_neighbors=5)

# Voting Regressor setup
voting_regressor = VotingRegressor(estimators=[('rf', rf_model), ('svm', svm_model), ('knn', knn_model)])

# Fit models
rf_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
knn_model.fit(X_train, y_train)
voting_regressor.fit(X_train, y_train)

# Evaluate models
def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    rmse = mse ** 0.5
    r2 = r2_score(y_test, predictions)
    return rmse, r2

results = {}
models = ['Random Forest', 'SVM', 'KNN', 'Voting Regressor']
for name, model in zip(models, [rf_model, svm_model, knn_model, voting_regressor]):
    rmse, r2 = evaluate_model(model, X_test, y_test)
    results[name] = {'RMSE': rmse, 'R²': r2}

# Display results
for model_name, metrics in results.items():
    print(f"{model_name} - RMSE: {metrics['RMSE']:.2f}, R²: {metrics['R²']:.2f}")

# Determine the best model based on RMSE
best_model, best_rmse = min([(name, metrics['RMSE']) for name, metrics in results.items()], key=lambda x: x[1])
print(f"Best Model: {best_model} with RMSE: {best_rmse}")




Random Forest - RMSE: 0.01, R²: 1.00
SVM - RMSE: 7.17, R²: 0.96
KNN - RMSE: 5.66, R²: 0.98
Voting Regressor - RMSE: 3.74, R²: 0.99
Best Model: Random Forest with RMSE: 0.005567007450193569


In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import joblib

In [13]:
#call the ML model
import joblib
import numpy as np
model = joblib.load('C:\Aditya\Python\Khushi Project\Models\AnandVihar_Model\Random Forest_best_model.pkl')

In [18]:
#make prediction by takimg input from user
features = np.array([[754.0,999,7.77,127.13,19.75,24.91]])
scaled_features = scaler.transform(features)
prediction = model.predict(scaled_features)
print(f"Predicted AQI: {prediction[0]}")


Predicted AQI: 400.0




In [9]:
#make prediction by takimg input from user
features = np.array([[163.25,249.0,1.6275,53.8,4.375,17.6]])
scaled_features = scaler.transform(features)
prediction = model.predict(scaled_features)
print(f"Predicted AQI: {prediction[0]}")


Predicted AQI: 333.4253779069765




In [None]:
#m