In [1]:
#XGBOOST
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
city_day_data = pd.read_csv("city_day.csv")
city_day_data = city_day_data.dropna(subset=["AQI"])
le = LabelEncoder()
city_day_data['AQI_Bucket'] = le.fit_transform(city_day_data['AQI_Bucket'])
features = ["PM2.5", "PM10", "NO", "NO2", "NOx", "NH3", "CO", "SO2", "O3", "Benzene", "Toluene", "Xylene"]
X = city_day_data[features]
y_category = city_day_data["AQI_Bucket"]
y_value = city_day_data["AQI"]

X_train, X_test, y_category_train, y_category_test, y_value_train, y_value_test = train_test_split(X, y_category, y_value, test_size=0.2, random_state=42)
category_model = xgb.XGBClassifier(objective="multi:softmax", num_class=len(le.classes_))
category_model.fit(X_train, y_category_train)
category_predictions_test = category_model.predict(X_test)
category_accuracy = accuracy_score(y_category_test, category_predictions_test)
print(f"Category Prediction Accuracy: {category_accuracy * 100:.2f}%")
print("Classification Report:\n", classification_report(y_category_test, category_predictions_test))
user_input = []
for feature in features:
    value = float(input(f"Enter {feature}: "))
    user_input.append(value)
user_input = np.array(user_input).reshape(1, -1)
category_prediction = category_model.predict(user_input)[0]
predicted_category = le.inverse_transform([category_prediction])[0]
value_model = xgb.XGBRegressor()
value_model.fit(X_train, y_value_train)
value_prediction = value_model.predict(user_input)[0]
print(f"Predicted AQI Value: {value_prediction}")
print(f"Predicted AQI Category: {predicted_category}")
print(f"Category Prediction Accuracy: {category_accuracy * 100:.2f}%")

Category Prediction Accuracy: 80.97%
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.68      0.75       282
           1       0.82      0.85      0.84      1745
           2       0.68      0.65      0.67       555
           3       0.85      0.87      0.86      1655
           4       0.82      0.79      0.81       271
           5       0.75      0.72      0.74       462

    accuracy                           0.81      4970
   macro avg       0.79      0.76      0.77      4970
weighted avg       0.81      0.81      0.81      4970

Enter PM2.5: 2
Enter PM10: 2
Enter NO: 2
Enter NO2: 2
Enter NOx: 2
Enter NH3: 2
Enter CO: 2
Enter SO2: 2
Enter O3: 2
Enter Benzene: 2
Enter Toluene: 2
Enter Xylene: 22
Predicted AQI Value: 96.58273315429688
Predicted AQI Category: Moderate
Category Prediction Accuracy: 80.97%


In [3]:
#Random Forest 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report
city_day_data = pd.read_csv("city_day.csv")
city_day_data = city_day_data.dropna(subset=["AQI"])
le = LabelEncoder()
city_day_data['AQI_Bucket'] = le.fit_transform(city_day_data['AQI_Bucket'])
features = ["PM2.5", "PM10", "NO", "NO2", "NOx", "NH3", "CO", "SO2", "O3", "Benzene", "Toluene", "Xylene"]
X = city_day_data[features]
y_category = city_day_data["AQI_Bucket"]
X_train, X_test, y_category_train, y_category_test = train_test_split(X, y_category, test_size=0.2, random_state=42)
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)
category_model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
category_model_rf.fit(X_train_imputed, y_category_train)
category_predictions_test_rf = category_model_rf.predict(X_test_imputed)
print(f"Category Prediction Accuracy: {accuracy_score(y_category_test, category_predictions_test_rf) * 100:.2f}%")
print("Classification Report:\n", classification_report(y_category_test, category_predictions_test_rf))


Category Prediction Accuracy: 80.93%
Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.65      0.74       282
           1       0.81      0.86      0.84      1745
           2       0.68      0.63      0.66       555
           3       0.85      0.86      0.86      1655
           4       0.84      0.78      0.81       271
           5       0.76      0.75      0.75       462

    accuracy                           0.81      4970
   macro avg       0.80      0.76      0.78      4970
weighted avg       0.81      0.81      0.81      4970



In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report
city_day_data = pd.read_csv("city_day.csv")
city_day_data = city_day_data.dropna(subset=["AQI"])
le = LabelEncoder()
city_day_data['AQI_Bucket'] = le.fit_transform(city_day_data['AQI_Bucket'])
features = ["PM2.5", "PM10", "NO", "NO2", "NOx", "NH3", "CO", "SO2", "O3", "Benzene", "Toluene", "Xylene"]
X = city_day_data[features]
y_category = city_day_data["AQI_Bucket"]
X_train, X_test, y_category_train, y_category_test = train_test_split(X, y_category, test_size=0.2, random_state=42)
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)
category_model_svm = SVC()
category_model_svm.fit(X_train_imputed, y_category_train)
category_predictions_test_svm = category_model_svm.predict(X_test_imputed)
print(f"Category Prediction Accuracy (SVM): {accuracy_score(y_category_test, category_predictions_test_svm) * 100:.2f}%")
print("Classification Report (SVM):\n", classification_report(y_category_test, category_predictions_test_svm))


Category Prediction Accuracy (SVM): 73.84%
Classification Report (SVM):
               precision    recall  f1-score   support

           0       0.77      0.18      0.29       282
           1       0.75      0.78      0.77      1745
           2       0.65      0.53      0.59       555
           3       0.72      0.86      0.78      1655
           4       0.84      0.80      0.82       271
           5       0.77      0.71      0.74       462

    accuracy                           0.74      4970
   macro avg       0.75      0.64      0.66      4970
weighted avg       0.74      0.74      0.73      4970



In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report
city_day_data = pd.read_csv("city_day.csv")
city_day_data = city_day_data.dropna(subset=["AQI"])
le = LabelEncoder()
city_day_data['AQI_Bucket'] = le.fit_transform(city_day_data['AQI_Bucket'])
features = ["PM2.5", "PM10", "NO", "NO2", "NOx", "NH3", "CO", "SO2", "O3", "Benzene", "Toluene", "Xylene"]
X = city_day_data[features]
y_category = city_day_data["AQI_Bucket"]
X_train, X_test, y_category_train, y_category_test = train_test_split(X, y_category, test_size=0.2, random_state=42)
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)
category_model_knn = KNeighborsClassifier()
category_model_knn.fit(X_train_imputed, y_category_train)
category_predictions_test_knn = category_model_knn.predict(X_test_imputed)
print(f"Category Prediction Accuracy (KNN): {accuracy_score(y_category_test, category_predictions_test_knn) * 100:.2f}%")
print("Classification Report (KNN):\n", classification_report(y_category_test, category_predictions_test_knn))


Category Prediction Accuracy (KNN): 75.98%
Classification Report (KNN):
               precision    recall  f1-score   support

           0       0.69      0.68      0.69       282
           1       0.76      0.81      0.78      1745
           2       0.62      0.58      0.60       555
           3       0.81      0.80      0.80      1655
           4       0.82      0.80      0.81       271
           5       0.76      0.69      0.72       462

    accuracy                           0.76      4970
   macro avg       0.74      0.73      0.73      4970
weighted avg       0.76      0.76      0.76      4970



In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report
city_day_data = pd.read_csv("city_day.csv")
city_day_data = city_day_data.dropna(subset=["AQI"])
le = LabelEncoder()
city_day_data['AQI_Bucket'] = le.fit_transform(city_day_data['AQI_Bucket'])
features = ["PM2.5", "PM10", "NO", "NO2", "NOx", "NH3", "CO", "SO2", "O3", "Benzene", "Toluene", "Xylene"]
X = city_day_data[features]
y_category = city_day_data["AQI_Bucket"]
X_train, X_test, y_category_train, y_category_test = train_test_split(X, y_category, test_size=0.2, random_state=42)
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)
category_model_knn = KNeighborsClassifier()
category_model_knn.fit(X_train_imputed, y_category_train)
category_predictions_test_knn = category_model_knn.predict(X_test_imputed)
print(f"Category Prediction Accuracy (KNN): {accuracy_score(y_category_test, category_predictions_test_knn) * 100:.2f}%")
print("Classification Report (KNN):\n", classification_report(y_category_test, category_predictions_test_knn))


Category Prediction Accuracy (Logistic Regression): 53.44%
Classification Report (Logistic Regression):
               precision    recall  f1-score   support

           0       0.17      0.01      0.01       282
           1       0.51      0.75      0.61      1745
           2       0.36      0.17      0.23       555
           3       0.62      0.51      0.56      1655
           4       0.61      0.61      0.61       271
           5       0.49      0.53      0.51       462

    accuracy                           0.53      4970
   macro avg       0.46      0.43      0.42      4970
weighted avg       0.51      0.53      0.51      4970



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report

city_day_data = pd.read_csv("city_day.csv")

city_day_data = city_day_data.dropna(subset=["AQI"])
le = LabelEncoder()
city_day_data['AQI_Bucket'] = le.fit_transform(city_day_data['AQI_Bucket'])

features = ["PM2.5", "PM10", "NO", "NO2", "NOx", "NH3", "CO", "SO2", "O3", "Benzene", "Toluene", "Xylene"]
X = city_day_data[features]
y_category = city_day_data["AQI_Bucket"]

X_train, X_test, y_category_train, y_category_test = train_test_split(X, y_category, test_size=0.2, random_state=42)

imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)

X_test_imputed = imputer.transform(X_test)

category_model_dt = DecisionTreeClassifier(random_state=42)
category_model_dt.fit(X_train_imputed, y_category_train)

category_predictions_test_dt = category_model_dt.predict(X_test_imputed)

print(f"Category Prediction Accuracy (Decision Tree): {accuracy_score(y_category_test, category_predictions_test_dt) * 100:.2f}%")

print("Classification Report (Decision Tree):\n", classification_report(y_category_test, category_predictions_test_dt))


Category Prediction Accuracy (Decision Tree): 72.92%
Classification Report (Decision Tree):
               precision    recall  f1-score   support

           0       0.64      0.63      0.63       282
           1       0.76      0.77      0.76      1745
           2       0.57      0.57      0.57       555
           3       0.78      0.77      0.78      1655
           4       0.76      0.71      0.74       271
           5       0.66      0.68      0.67       462

    accuracy                           0.73      4970
   macro avg       0.70      0.69      0.69      4970
weighted avg       0.73      0.73      0.73      4970



In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report

city_day_data = pd.read_csv("city_day.csv")

city_day_data = city_day_data.dropna(subset=["AQI"])
le = LabelEncoder()
city_day_data['AQI_Bucket'] = le.fit_transform(city_day_data['AQI_Bucket'])

features = ["PM2.5", "PM10", "NO", "NO2", "NOx", "NH3", "CO", "SO2", "O3", "Benzene", "Toluene", "Xylene"]
X = city_day_data[features]
y_category = city_day_data["AQI_Bucket"]

X_train, X_test, y_category_train, y_category_test = train_test_split(X, y_category, test_size=0.2, random_state=42)

imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)

X_test_imputed = imputer.transform(X_test)

category_model_nb = GaussianNB()
category_model_nb.fit(X_train_imputed, y_category_train)

category_predictions_test_nb = category_model_nb.predict(X_test_imputed)

print(f"Category Prediction Accuracy (Naive Bayes): {accuracy_score(y_category_test, category_predictions_test_nb) * 100:.2f}%")

print("Classification Report (Naive Bayes):\n", classification_report(y_category_test, category_predictions_test_nb))


Category Prediction Accuracy (Naive Bayes): 66.26%
Classification Report (Naive Bayes):
               precision    recall  f1-score   support

           0       0.45      0.48      0.46       282
           1       0.72      0.68      0.70      1745
           2       0.47      0.48      0.47       555
           3       0.72      0.78      0.75      1655
           4       0.66      0.68      0.67       271
           5       0.59      0.51      0.54       462

    accuracy                           0.66      4970
   macro avg       0.60      0.60      0.60      4970
weighted avg       0.66      0.66      0.66      4970

