In [139]:
import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import warnings

warnings.filterwarnings('ignore')

In [140]:
pickleModels = {
    'XGBoost': 'serialized/XGBoostModel.pkl',
    'decisionTree': 'serialized/decisionTreeModel.pkl',
    'randomForest': 'serialized/randomForestModel.pkl'
}

# Loading a serialized model
with open(pickleModels['randomForest'], 'rb') as file:
    model = pickle.load(file)

In [141]:
# New dataset for forecasting
data = {
    'Location': ["Bengaluru", "ITO, Delhi", "New Ind Town, Faridabad"],
    'Year': [2023, 2023, 2022],
    'Month': [2, 2, 10],
    'Day': [23, 22, 2],
    'Hour': [8, 14, 10],
    'PM2.5': [43, 500, 185],
    'PM10': [78, 480, 199],
    'O3': [26, 91, 10],
    'CO': [258, 78, 52],
    'SO2': [10, 17, 12],
    'NO2': [17, 47, 26]
}
new_data = pd.DataFrame(data)
data = pd.DataFrame(data)
new_data = data[['Location', 'Day', 'Hour', 'PM2.5', 'PM10', 'O3', 'CO', 'SO2', 'NO2']].copy()

In [142]:
# Label encoding
le = LabelEncoder()
new_data['Location'] = le.fit_transform(new_data['Location'])
new_data['Hour'] = le.fit_transform(new_data['Hour'])

# Data pre-processing
new_data.fillna(new_data.mean(), inplace=True)

# Convert hours to numeric format
new_data['Hour'] = new_data['Hour'].astype(str).str.split(":").str[0].astype(int)

# Feature selection
X_new = new_data[['Location','Day', 'Hour', 'PM2.5', 'PM10', 'O3', 'CO', 'SO2', 'NO2']]

In [143]:
# Data scaling (use the same scaler as during training)
scaler = MinMaxScaler()
X_new_scaled = scaler.fit_transform(X_new)

In [144]:
predict_proba = model.predict_proba(X_new_scaled)
# Decoding of forecasts (classes)
predicted_classes = np.argmax(predict_proba, axis=1)

print(predicted_classes)

# Displaying categories
category_mapping = {
    '0': 'Good',
    '1': 'Moderate',
    '2': 'USG', 
    '3': 'Unhealthy',
    '4': 'Very Unhealthy',
    '5': 'Severe'
}
predicted_categories = [category_mapping[str(cls)] for cls in predicted_classes]

print("Input data:")
print(X_new)
print("\nPredicted classes:")
print(predicted_classes)
print("\nPredicted categories:")
print(predicted_categories)

[2 5 3]
Input data:
   Location  Day  Hour  PM2.5  PM10  O3   CO  SO2  NO2
0         0   23     0     43    78  26  258   10   17
1         1   22     2    500   480  91   78   17   47
2         2    2     1    185   199  10   52   12   26

Predicted classes:
[2 5 3]

Predicted categories:
['USG', 'Severe', 'Unhealthy']
