In [98]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from vecstack import stacking
from sklearn.neighbors import KNeighborsRegressor

In [99]:
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
import math
def mean_absolute_percentage_error(true, pred):
    abs_error = (np.abs(true - pred)) / true
    sum_abs_error = np.sum(abs_error)
    mape_loss = (sum_abs_error / true.size) * 100
    return mape_loss
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


In [100]:

# Load your dataset (replace 'your_dataset.csv' with your data file)
df = pd.read_csv('test2021.csv')
df.dtypes

Area                       object
Time                       object
Energy delta[Wh]            int64
GHI                       float64
temp                      float64
pressure                    int64
humidity                    int64
wind_speed                float64
rain_1h                   float64
snow_1h                   float64
clouds_all                  int64
isSun                       int64
sunlightTime                int64
dayLength                   int64
SunlightTime/daylength    float64
weather_type                int64
hour                        int64
month                       int64
dtype: object

In [101]:
df

Unnamed: 0,Area,Time,Energy delta[Wh],GHI,temp,pressure,humidity,wind_speed,rain_1h,snow_1h,clouds_all,isSun,sunlightTime,dayLength,SunlightTime/daylength,weather_type,hour,month
0,Bihar,01-01-2021 00:15,0,0.0,-0.2,1007,94,2.6,0.0,0.0,97,0,0,450,0.0,4,0,1
1,Bihar,01-01-2021 00:30,0,0.0,-0.2,1007,94,2.6,0.0,0.0,97,0,0,450,0.0,4,0,1
2,Bihar,01-01-2021 00:45,0,0.0,-0.2,1007,94,2.6,0.0,0.0,97,0,0,450,0.0,4,0,1
3,Bihar,01-01-2021 01:00,0,0.0,-0.4,1007,94,2.7,0.0,0.0,97,0,0,450,0.0,4,1,1
4,Bihar,01-01-2021 01:15,0,0.0,-0.4,1007,94,2.7,0.0,0.0,97,0,0,450,0.0,4,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34362,Algeria,31-12-2021 22:45,0,0.0,10.5,1012,96,5.5,0.0,0.0,100,0,0,450,0.0,4,22,12
34363,Algeria,31-12-2021 23:00,0,0.0,9.6,1013,96,5.2,0.0,0.0,100,0,0,450,0.0,4,23,12
34364,Algeria,31-12-2021 23:15,0,0.0,9.6,1013,96,5.2,0.0,0.0,100,0,0,450,0.0,4,23,12
34365,Algeria,31-12-2021 23:30,0,0.0,9.6,1013,96,5.2,0.0,0.0,100,0,0,450,0.0,4,23,12


In [102]:
df = df.dropna()  # Remove rows with missing data
df= df[(df['Energy delta[Wh]'] >= 0)]

In [103]:
df['Time'] = pd.to_datetime(df['Time'], format='%d-%m-%Y %H:%M')

In [104]:
df.dtypes

Area                              object
Time                      datetime64[ns]
Energy delta[Wh]                   int64
GHI                              float64
temp                             float64
pressure                           int64
humidity                           int64
wind_speed                       float64
rain_1h                          float64
snow_1h                          float64
clouds_all                         int64
isSun                              int64
sunlightTime                       int64
dayLength                          int64
SunlightTime/daylength           float64
weather_type                       int64
hour                               int64
month                              int64
dtype: object

In [105]:
current_date = df['Time'].max()
current_date

Timestamp('2021-12-31 23:45:00')

In [106]:
one_months_ago = current_date - pd.DateOffset(months=1)
one_months_ago

Timestamp('2021-11-30 23:45:00')

In [107]:
last_one_months_data = df[df['Time'] >= one_months_ago]
len(last_one_months_data)

2497

In [108]:
two_months_ago = current_date - pd.DateOffset(months=2)
two_months_ago

Timestamp('2021-10-31 23:45:00')

In [109]:
last_two_months_data = df[df['Time'] >= two_months_ago]
len(last_two_months_data)

5281

In [110]:
# Feature engineering: Create lag features
df['Energy_delta_lag1'] = df['Energy delta[Wh]']
df['GHI_lag1'] = df['GHI']

In [111]:
X = df[['Energy_delta_lag1', 'GHI_lag1']].dropna()
y = df['pressure'].dropna()

In [112]:
len(X)


34367

In [113]:
len(y)

34367

In [114]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [115]:
m=y_train.size

In [116]:
train_data, test_data = df[:m], df[m:]

In [117]:
# Define the base models
model1 = LinearRegression()
model2 = RandomForestRegressor(n_estimators=10, random_state=0)
model3 = DecisionTreeRegressor(random_state=0)
model4 = KNeighborsRegressor(n_neighbors=3) 
model5= SVR(kernel='linear')

In [118]:
model1.fit(X_train, y_train)

In [119]:
model2.fit(X_train, y_train)

In [120]:
model3.fit(X_train, y_train)


In [121]:
model4.fit(X_train, y_train)


In [122]:
model5.fit(X_train, y_train)

In [123]:
y_pred1=model1.predict(X_test)
y_pred2=model2.predict(X_test)
y_pred3=model3.predict(X_test)
y_pred4=model4.predict(X_test)
y_pred5=model5.predict(X_test)


In [124]:
#for linear regression
MAPE1=mean_absolute_percentage_error(y_test,y_pred1)
mae1=mean_absolute_error(y_test,y_pred1)
mse1 = mean_squared_error(y_test, y_pred1)
r21 = r2_score(y_test, y_pred1)
rmse1=math.sqrt(mse1)

In [125]:
#for Random forest Regression
MAPE2=mean_absolute_percentage_error(y_test,y_pred2)
mae2 = mean_absolute_error(y_test,y_pred2)
mse2 = mean_squared_error(y_test, y_pred2)
r22= r2_score(y_test, y_pred2)
rmse2=math.sqrt(mse2)

In [126]:
#for Decision tree regression
MAPE3=mean_absolute_percentage_error(y_test,y_pred3)
mae3=mean_absolute_error(y_test,y_pred3)
mse3 = mean_squared_error(y_test, y_pred3)
r23= r2_score(y_test, y_pred3)
rmse3=math.sqrt(mse3)

In [127]:
#for KNeighbors regression
MAPE4=mean_absolute_percentage_error(y_test,y_pred4)
mae4=mean_absolute_error(y_test,y_pred4)
mse4 = mean_squared_error(y_test, y_pred4)
r24 = r2_score(y_test, y_pred4)
rmse4=math.sqrt(mse4)

In [128]:
#for SVM regression
MAPE5=mean_absolute_percentage_error(y_test,y_pred5)
mae5=mean_absolute_error(y_test,y_pred5)
mse5 = mean_squared_error(y_test, y_pred5)
r25 = r2_score(y_test, y_pred5)
rmse5=math.sqrt(mse5)

In [129]:
print("RMSE=",rmse1,rmse2,rmse3,rmse4,rmse5)
print("MAPE=",MAPE1,MAPE2,MAPE3,MAPE4,MAPE5)
print("MAE=",mae1,mae2,mae3,mae4,mae5)

RMSE= 8.979077480470004 9.250191334058869 9.27494473407893 9.5908395052766 9.021829845490032
MAPE= 0.7013907951178846 0.7232325980120865 0.725573992504634 0.7473458237455894 0.6969295179344985
MAE= 7.115627795402332 7.337234494677304 7.360926306670836 7.58128859147189 7.062843327097531


In [130]:
meta_model = LinearRegression()

In [131]:
models = [ model1, model2, model3,model4,model5]
S_train, S_test = stacking(models, X_train, y_train, X_test, regression=True, mode='oof_pred_bag', needs_proba=False, save_dir=None, metric=None, n_folds=4, stratified=True, shuffle=True, random_state=0, verbose=2)

task:         [regression]
metric:       [mean_absolute_error]
mode:         [oof_pred_bag]
n_models:     [5]

model  0:     [LinearRegression]
    fold  0:  [7.04796318]
    fold  1:  [7.03226881]
    fold  2:  [6.97478555]
    fold  3:  [7.03585216]
    ----
    MEAN:     [7.02271742] + [0.02827792]
    FULL:     [7.02271742]

model  1:     [RandomForestRegressor]




    fold  0:  [7.34935145]
    fold  1:  [7.29626420]
    fold  2:  [7.26007144]
    fold  3:  [7.35311140]
    ----
    MEAN:     [7.31469962] + [0.03873086]
    FULL:     [7.31469962]

model  2:     [DecisionTreeRegressor]
    fold  0:  [7.38801383]
    fold  1:  [7.35874779]
    fold  2:  [7.32363656]
    fold  3:  [7.41854000]
    ----
    MEAN:     [7.37223454] + [0.03513132]
    FULL:     [7.37223454]

model  3:     [KNeighborsRegressor]
    fold  0:  [7.67215386]
    fold  1:  [7.47633300]
    fold  2:  [7.58297306]
    fold  3:  [8.16056978]
    ----
    MEAN:     [7.72300743] + [0.26196603]
    FULL:     [7.72300743]

model  4:     [SVR]
    fold  0:  [7.00623391]
    fold  1:  [6.99962368]
    fold  2:  [6.94472479]
    fold  3:  [6.98398843]
    ----
    MEAN:     [6.98364270] + [0.02387717]
    FULL:     [6.98364270]



In [132]:
meta_model.fit(S_train, y_train)

In [133]:
# Make predictions on the test data
y_pred = meta_model.predict(S_test)

In [134]:
MAPE=mean_absolute_percentage_error(y_test,y_pred)
mae=mean_absolute_error(y_test,y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse=math.sqrt(mse)

In [135]:
print("RMSE=",rmse)
print("MAPE=",MAPE)
print("MAE=",mae)

RMSE= 8.976073002623824
MAPE= 0.7010851250764265
MAE= 7.112507836433281


In [136]:
future_data = df.tail(len(last_one_months_data))
future_data = future_data[['Energy_delta_lag1', 'GHI_lag1']]
future_data=future_data.dropna()
len(future_data)

2497

In [137]:
future_data

Unnamed: 0,Energy_delta_lag1,GHI_lag1
31870,0,0.0
31871,0,0.0
31872,0,0.0
31873,0,0.0
31874,0,0.0
...,...,...
34362,0,0.0
34363,0,0.0
34364,0,0.0
34365,0,0.0


In [138]:
prediction1=model1.predict(future_data)
prediction2=model2.predict(future_data)
prediction3=model3.predict(future_data)
prediction4=model4.predict(future_data)
prediction5=model5.predict(future_data)

In [139]:
prediction1,prediction2,prediction3,prediction4,prediction5

(array([1014.75504416, 1014.75504416, 1014.75504416, ..., 1014.75504416,
        1014.75504416, 1014.75504416]),
 array([1015.07622313, 1015.07622313, 1015.07622313, ..., 1015.07622313,
        1015.07622313, 1015.07622313]),
 array([1015.09143986, 1015.09143986, 1015.09143986, ..., 1015.09143986,
        1015.09143986, 1015.09143986]),
 array([1015.33333333, 1015.33333333, 1015.33333333, ..., 1015.33333333,
        1015.33333333, 1015.33333333]),
 array([1015.86725487, 1015.86725487, 1015.86725487, ..., 1015.86725487,
        1015.86725487, 1015.86725487]))

In [140]:
new_predictions = np.column_stack((prediction1,prediction2,prediction3,prediction4,prediction5))

In [141]:
new_predictions

array([[1014.75504416, 1015.07622313, 1015.09143986, 1015.33333333,
        1015.86725487],
       [1014.75504416, 1015.07622313, 1015.09143986, 1015.33333333,
        1015.86725487],
       [1014.75504416, 1015.07622313, 1015.09143986, 1015.33333333,
        1015.86725487],
       ...,
       [1014.75504416, 1015.07622313, 1015.09143986, 1015.33333333,
        1015.86725487],
       [1014.75504416, 1015.07622313, 1015.09143986, 1015.33333333,
        1015.86725487],
       [1014.75504416, 1015.07622313, 1015.09143986, 1015.33333333,
        1015.86725487]])

In [142]:
meta_model_prediction = meta_model.predict(new_predictions)

In [143]:
print(f"Predicted Inflow for Two Months Ahead (1-month lag): {meta_model_prediction[0]}")

Predicted Inflow for Two Months Ahead (1-month lag): 1014.6833206823774


In [144]:
#import joblib

# Save the trained model to a file in the specified directory
#model_filename = 'E:\Projects\Junior project\model\svm_model.joblib'
#joblib.dump(svm_model, model_filename)
