In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import xgboost as xgb

# Load the dataset
df = pd.read_csv('/content/final_generated_data.csv')

# Define the feature columns and the target column
feature_columns = [
    'Temperature', 'Humidity', 'ALLSKY_SFC_UVB', 'ALLSKY_SFC_UV_INDEX',
    'ALLSKY_SFC_UVA', 'QV2M', 'RH2M', 'PRECTOTCORR', 'WS10M',
    'year', 'month', 'day', 'hour', 'weekday', 'Inflation'
]
target_column = 'Load'

# Split the dataset into features and target variable
X = df[feature_columns]
y = df[target_column]

y

0        60160.152
1        58550.010
2        27063.855
3        51948.730
4        32287.680
           ...    
39995    43678.305
39996    41472.008
39997    56719.125
39998    51406.290
39999    61710.120
Name: Load, Length: 40000, dtype: float64

In [27]:

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_test_scaled
X_train_scaled

array([[ 1.37782945,  0.48435447,  2.42928589, ..., -0.31136354,
         0.8027278 , -1.04508695],
       [-1.0815409 ,  0.86067767, -0.45603798, ...,  1.72793843,
         1.7078659 ,  0.97703842],
       [ 3.02503506,  1.09230164,  3.00028676, ..., -0.08653756,
         1.3468266 , -1.04508695],
       ...,
       [-0.40847741,  0.96776605, -0.42535777, ...,  1.32958118,
         1.6918199 , -1.04508695],
       [-0.81888263,  0.92645479, -0.41883037, ..., -1.46007391,
        -0.42098033, -1.04508695],
       [-0.19304393, -0.51262968,  0.09229959, ...,  0.83235129,
         0.68402578, -1.04508695]])

In [28]:

# Initialize the XGBoost model
xg_reg = xgb.XGBRegressor(objective='reg:squarederror', colsample_bytree=0.3, learning_rate=0.1, max_depth=5, alpha=10, n_estimators=100)

# Train the model
xg_reg.fit(X_train_scaled, y_train)

# Make predictions
y_pred = xg_reg.predict(X_test_scaled)
y_pred


array([36073.566, 50716.87 , 44517.297, ..., 31648.36 , 60424.88 ,
       43254.43 ], dtype=float32)

In [29]:
# Evaluate the model
r2_xgb = r2_score(y_test, y_pred)
rmse_xgb = mean_squared_error(y_test, y_pred, squared=False)

# Print the results
print(f'XGBoost - R²: {r2_xgb}, RMSE: {rmse_xgb}')

XGBoost - R²: 0.9289944224282263, RMSE: 3774.1708079804444
