In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error, explained_variance_score
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np

In [4]:
data = pd.read_csv('/content/weather-automated-sensors-dataset.csv')
data = data.dropna(subset=["Air Temperature"])
data["Measurement Timestamp"] = pd.to_datetime(data["Measurement Timestamp"], errors='coerce')

  data["Measurement Timestamp"] = pd.to_datetime(data["Measurement Timestamp"], errors='coerce')


In [5]:
data["Month"] = data["Measurement Timestamp"].dt.month
data["Hour"] = data["Measurement Timestamp"].dt.hour
data["Month_sin"] = np.sin(2 * np.pi * data["Month"] / 12)
data["Month_cos"] = np.cos(2 * np.pi * data["Month"] / 12)
data["Hour_sin"] = np.sin(2 * np.pi * data["Hour"] / 24)
data["Hour_cos"] = np.cos(2 * np.pi * data["Hour"] / 24)

In [7]:
data = data.drop(columns=["Station Name", "Measurement Timestamp", "Measurement Timestamp Label", "Measurement ID", "Month", "Hour"])
imputer = SimpleImputer(strategy="mean")
data_imputed = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)
X = data_imputed.drop(columns=["Air Temperature"])
y = data_imputed["Air Temperature"]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
gbm = GradientBoostingRegressor(random_state=42)
gbr = GradientBoostingRegressor(random_state=42, n_estimators=150, learning_rate=0.1, max_depth=4)
rf = RandomForestRegressor(random_state=42, n_estimators=100)

stacking_regressor = StackingRegressor(
    estimators=[
        ('gbm', gbm),
        ('gbr', gbr),
        ('rf', rf)
    ],
    final_estimator=LinearRegression(),
    n_jobs=-1
)


In [10]:
stacking_regressor.fit(X_train, y_train)

In [11]:
y_pred = stacking_regressor.predict(X_test)

In [12]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
explained_var = explained_variance_score(y_test, y_pred)
tolerance = 2.0
accuracy = np.mean(np.abs(y_test - y_pred) <= tolerance) * 100

print("Mean Absolute Error (MAE):", mae)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared (R2) Score:", r2)
print("Mean Absolute Percentage Error (MAPE):", mape)
print("Explained Variance Score:", explained_var)
print("Accuracy within tolerance ±2.0 units:", accuracy, "%")

Mean Absolute Error (MAE): 0.7436470170445534
Root Mean Squared Error (RMSE): 1.6062053753451515
R-squared (R2) Score: 0.9735088597189322
Mean Absolute Percentage Error (MAPE): 11381093379130.238
Explained Variance Score: 0.9735202540927028
Accuracy within tolerance ±2.0 units: 87.02386998476383 %
