In [135]:
import os
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import warnings

warnings.filterwarnings('ignore')

In [136]:
# Loading a serialized model
with open('regression/serialized/decisionTreeModel.pkl', 'rb') as file:
    model = pickle.load(file)

In [137]:
# New dataset for forecasting
data = {
    'Location': ["ITO, Delhi", "Mumbai"],
    'Year': [2023, 2023],
    'Month': [2, 3],
    'Day': [16, 23],
    'Hour': [14, 17],
    'AQI': [268, 47],
    'PM2.5': [401, 14],
    'PM10': [325, 41],
    'O3': [73, 35],
    'CO': [88, 6],
    'SO2': [16, 5],
    'NO2': [np.nan, 7]  # Додано пропущене значення для тесту
}

data = pd.DataFrame(data)
y_true = data['AQI']  # Фактичні значення AQI
new_data = data[['Year', 'Month', 'Day', 'Hour', 'PM2.5', 'PM10', 'O3', 'CO', 'SO2', 'NO2']].copy()

In [138]:
# Label encoding
le = LabelEncoder()
new_data['Hour'] = le.fit_transform(new_data['Hour'])

# Data preprocessing
new_data.fillna(new_data.mean(), inplace=True)  # Заповнення пропущених значень середніми

# Convert hours to numeric format
new_data['Hour'] = new_data['Hour'].astype(str).str.split(":").str[0].astype(int)

# Feature selection
X_new = new_data[['Year', 'Month', 'Day', 'Hour', 'PM2.5', 'PM10', 'O3', 'CO', 'SO2', 'NO2']]

In [139]:
# Data scaling (as in the training model)
scaler = MinMaxScaler()
X_new_scaled = scaler.fit_transform(X_new)

In [140]:
# Prognostication
predictions = model.predict(new_data)

print(predictions)

[3.22641509 4.77464789]


In [141]:
# Metrics evaluation
mse = mean_squared_error(y_true, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, predictions)
r2 = r2_score(y_true, predictions)

print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAE: {mae:.2f}")
print(f"R2 Score: {r2:.2f}")

MSE: 35944.02
RMSE: 189.59
MAE: 153.50
R2 Score: -1.94
