In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from feature_engine.creation import CyclicalFeatures
import joblib
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.svm import SVR

# Load the data
path = 'kafrEl-SheikhDaily.csv'
data = pd.read_csv(path)

# Drop rows where any column has a value of -999
data = data[(data != -999).all(axis=1)]

# Separate features and targets
df = data.drop(columns="ALLSKY_SFC_SW_DWN")
X = df
y = data["ALLSKY_SFC_SW_DWN"]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.25, random_state=30)

# Apply cyclical transformation to time-related features
cyclical = CyclicalFeatures(variables=['YEAR', 'MO', 'DY'], drop_original=True)
new_X_train = cyclical.fit_transform(X_train[['YEAR', 'MO', 'DY']])
new_X_test = cyclical.transform(X_test[['YEAR', 'MO', 'DY']])

X_train = X_train.drop(columns=['YEAR', 'MO', 'DY'])
X_train = pd.concat([new_X_train, X_train], axis=1)
X_test = X_test.drop(columns=['YEAR', 'MO', 'DY'])
X_test = pd.concat([new_X_test, X_test], axis=1)

# Normalize the features
X_scaler = MinMaxScaler()
X_train[["T2M", "RH2M", "PRECTOTCORR", "PS", "WS10M"]] = X_scaler.fit_transform(X_train[["T2M", "RH2M", "PRECTOTCORR", "PS", "WS10M"]])
X_test[["T2M", "RH2M", "PRECTOTCORR", "PS", "WS10M"]] = X_scaler.transform(X_test[["T2M", "RH2M", "PRECTOTCORR", "PS", "WS10M"]])

# Save the scaler and cyclical transformer
joblib.dump(X_scaler, 'kafrEl-SheikhDaily_scaler_svr.pkl')
joblib.dump(cyclical, 'kafrEl-SheikhDaily_cyclical_svr.pkl')

# Hyperparameter tuning for SVR with different kernels
svr = SVR()
param_grid = {
    'kernel': ['poly'],
    'C': [100],
    'epsilon': [1],
    'gamma': ['scale'],
    'degree': [2],  # Only relevant for polynomial kernel
    'coef0': [0.0]  # Only relevant for polynomial kernel
}

grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='r2')
grid_search.fit(X_train, y_train)

# Best model evaluation
best_svr = grid_search.best_estimator_

# Predict the test data
y_pred = best_svr.predict(X_test)

# Calculate and print Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

# Calculate and print R² score
r2 = r2_score(y_test, y_pred)
print(f'R² Score: {r2}')

# Save the trained SVR model
joblib.dump(best_svr, 'kafrEl-SheikhDaily_model_svr.joblib')


Mean Absolute Error: 0.5477895267207017
R² Score: 0.8735187894659273


['kafrEl-SheikhDaily_model_svr.joblib']

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
import joblib


# Load the model
svr_model = joblib.load('kafrEl-SheikhDaily_model_svr.joblib')
# Load the scalers and cyclical transformer
scaler = joblib.load('kafrEl-SheikhDaily_scaler_svr.pkl')
cyclical = joblib.load('kafrEl-SheikhDaily_cyclical_svr.pkl')

# Load the data
path = 'kafrEl-SheikhDailyTest.csv'
new_data = pd.read_csv(path)

# Drop rows where any column has a value of -999
new_data = new_data[(new_data != -999).all(axis=1)]

# Extract actual values
actual_values = new_data["ALLSKY_SFC_SW_DWN"]

# Prepare the data for prediction
# Apply cyclical transformation to the date attributes
date_features = new_data[['YEAR', 'MO', 'DY']]
date_features_cyclical = cyclical.transform(date_features)

# Apply scaling to the remaining features
scaled_features = scaler.transform(new_data[["T2M", "RH2M", "PRECTOTCORR", "PS", "WS10M"]])

# Concatenate the transformed data
transformed_data = np.concatenate((date_features_cyclical, scaled_features), axis=1)

# Predict using the model
predictions = svr_model.predict(transformed_data)

# Calculate the R^2 score
r2 = r2_score(actual_values, predictions)

print("R^2 score:", r2)


R^2 score: 0.8202864566808414


