In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score, KFold

In [2]:
df=pd.read_csv("./dataset/solar_radiation_cleaned.csv")

# Feature-Target and Train-test separation

In [3]:
features = df[['dni','dhi','air_temperature', 'relative_humidity', 'wind_speed', 'wind_speed_of_gust', 'wind_from_direction', 'barometric_pressure']]
target = df['ghi_pyr']
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Random Forest Regressor

In [4]:
model = make_pipeline(StandardScaler(), RandomForestRegressor(n_estimators=200, random_state=42))
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Metrics

In [5]:
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")

MSE: 2106.0084538504166
RMSE: 45.891267729824335
R2: 0.9718269402366054


# Cross-validation

In [None]:
num_folds = 5 
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
r2_scores = cross_val_score(model, features, target, cv=kf, scoring='r2')
mse_scores = cross_val_score(model, features, target, cv=kf, scoring='neg_mean_squared_error')
rmse_scores = np.sqrt(-mse_scores)
print(f"Cross-Validation R2 Scores: {r2_scores} \n")
print(f"Mean R2 Score: {np.mean(r2_scores)} \n")
print(f"\nCross-Validation RMSE Scores: {rmse_scores} \n")
print(f"Mean RMSE Score: {np.mean(rmse_scores)} \n")