### Importing Package

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

### Importing Packages for pre-processing and evaluation

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

### Importing Packages for Model Training

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import joblib

In [7]:
input_path = r'C:/Users/sanja/Desktop/Personal Projects/Renewable Energy Forecast/training_data/'
output_path = r'C:/Users/sanja/Desktop/Personal Projects/Renewable Energy Forecast/trained_models/'

### Reading Data

In [9]:
d = pd.read_csv(input_path+'R_solar_data.csv')
d = d.astype('float')

In [10]:
d.head()

Unnamed: 0,GHI,Gen
0,0.0,0.0135
1,1.566579,0.111463
2,14.724596,1.429013
3,33.796818,6.155991
4,81.715012,14.120696


In [11]:
d.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1742 entries, 0 to 1741
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   GHI     1742 non-null   float64
 1   Gen     1742 non-null   float64
dtypes: float64(2)
memory usage: 27.3 KB


In [13]:
x =  d.drop('Gen', axis=1)
y = d['Gen']

In [14]:
x.head()

Unnamed: 0,GHI
0,0.0
1,1.566579
2,14.724596
3,33.796818
4,81.715012


In [15]:
y.head()

0     0.013500
1     0.111463
2     1.429013
3     6.155991
4    14.120696
Name: Gen, dtype: float64

### Train_Test Split

In [16]:
train_input, test_input, train_output, test_output = train_test_split(x, y, test_size= 0.1, random_state=1)
print (train_input.shape, train_output.shape)
print (test_input.shape, test_output.shape)

(1567, 1) (1567,)
(175, 1) (175,)


### Model Training with Random Forest Regressor

In [17]:
random = RandomForestRegressor(n_estimators=100)
random.fit(train_input, train_output)
print ('random_forest_accuracy : {0}'.format(random.score(test_input, test_output)))
predicted = random.predict(test_input)

random_forest_accuracy : 0.9573765823941134


### Model Training with Support Vector Regressor

In [18]:
svm = SVR(gamma='auto')
svm.fit(train_input, train_output)
print ('svm_accuracy : {0}'.format(svm.score(test_input, test_output)))
predicted = svm.predict(test_input)

svm_accuracy : 0.24808409493033545


### Model Training with Decision Tree Regressor

In [19]:
dtree = DecisionTreeRegressor(random_state = 0)
dtree.fit(train_input, train_output)
print ('decision_tree_accuracy : {0}'.format(dtree.score(test_input, test_output)))
predicted = dtree.predict(train_input)

decision_tree_accuracy : 0.9388692290366767


### Model Training with Linear Regression

In [20]:
linear = LinearRegression()
linear.fit(train_input, train_output)
print ('linear_regression_accuracy : {0}'.format(linear.score(test_input, test_output)))
predicted = linear.predict(test_input)

linear_regression_accuracy : 0.9620626148893596


### Model Comparison

In [27]:
models = ['Random Forest', 'SVR', 'Decision Tree', 'Linear Regression']
accuracies = [random.score(test_input, test_output), svm.score(test_input, test_output), dtree.score(test_input, test_output), linear.score(test_input, test_output)]

accuracy_df = pd.DataFrame({'Model': models, 'Accuracy': accuracies})
accuracy_df = accuracy_df.sort_values(by='Accuracy', ascending=False).reset_index(drop=True)
accuracy_df

Unnamed: 0,Model,Accuracy
0,Linear Regression,0.962063
1,Random Forest,0.957377
2,Decision Tree,0.938869
3,SVR,0.248084


### Saving Best Performing Model in a Pickle file

In [29]:
joblib.dump(linear, output_path+'R_model.pkl')

['C:/Users/sanja/Desktop/Personal Projects/Renewable Energy Forecast/trained_models/R_model.pkl']