# **BACKGROUND**

Please see the full Feature Engineering in the notebook on my [github](https://github.com/thabied/Solar-Energy-Antwerp)

I've uploaded the edited dataframe from this notebook into Kaggle as my initial work was done on Colab.

Summary of edits:
* combined weather and solar datasets
* shifted every entry in the solar dataset a day back since readings were taken the morning after
* applied encoding on categorical features where necessary

In [None]:
!pip install pycaret

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pycaret
import pickle

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

from xgboost import XGBRegressor
from xgboost import Booster
from sklearn.linear_model import LassoCV
from sklearn.linear_model import ElasticNetCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score, cross_val_predict, cross_validate, GridSearchCV, RandomizedSearchCV

# **LOAD EDITED DATAFRAME**

In [None]:
df = pd.read_csv('/kaggle/input/df-edited/dfnew.csv')
df.drop(['Unnamed: 0'],axis=1,inplace=True)

# **EXPLORATORY DATA ANALYSIS**

In [None]:
fig, ax = plt.subplots(2,3,figsize=(18,10))

sns.scatterplot(x='temp',y='cum_power',data=df,ax=ax[0,0],color='g')
sns.scatterplot(x='weather',y='cum_power',data=df,ax=ax[0,1],color='g')
sns.scatterplot(x='wind',y='cum_power',data=df,ax=ax[0,2],color='g')
sns.scatterplot(x='humidity',y='cum_power',data=df,ax=ax[1,0],color='g')
sns.scatterplot(x='barometer',y='cum_power',data=df,ax=ax[1,1],color='g')
sns.scatterplot(x='visibility',y='cum_power',data=df,ax=ax[1,2],color='g')

plt.show()

In [None]:
a = sns.jointplot(x='temp',y='cum_power',data=df,kind='kde',color='g')
b = sns.jointplot(x='weather',y='cum_power',data=df,kind='kde',color='g')
c = sns.jointplot(x='wind',y='cum_power',data=df,kind='kde',color='g')
d = sns.jointplot(x='humidity',y='cum_power',data=df,kind='kde',color='g')
e = sns.jointplot(x='barometer',y='cum_power',data=df,kind='kde',color='g')
f = sns.jointplot(x='visibility',y='cum_power',data=df,kind='kde',color='g')

plots = [a,b,c,d,e,f]

for plot in plots:
  plt.show()

In [None]:
plt.figure(figsize=(18,10))
sns.heatmap(df.corr(),
            vmin=-1,
            cmap='coolwarm',
            annot=True)
plt.show()

In [None]:
# Creating figure 
fig = plt.figure(figsize = (16, 9)) 
ax = plt.axes(projection ="3d")

# Add x, y gridlines  
ax.grid(b = True, color ='grey',  
        linestyle ='-.', linewidth = 0.3,  
        alpha = 0.2)  
  
# Creating color map 
my_cmap = plt.get_cmap('hsv') 
  
# Creating plot 
sctt = ax.scatter3D(df['temp'], df['humidity'], df['cum_power'], 
                    alpha = 0.8, 
                    c = df['cum_power']) 
  
plt.title('3D plot of Weather vs Visinilty vs Cum_Power with color legend of cum_power') 
ax.set_xlabel('temp', fontweight ='bold')  
ax.set_ylabel('humidity', fontweight ='bold')  
ax.set_zlabel('cum_power', fontweight ='bold') 
fig.colorbar(sctt, ax = ax, shrink = 0.5, aspect = 5) 
  
# show plot 
plt.show() 

In [None]:
# Creating figure 
fig = plt.figure(figsize = (16, 9)) 
ax = plt.axes(projection ="3d")

# Add x, y gridlines  
ax.grid(b = True, color ='grey',  
        linestyle ='-.', linewidth = 0.3,  
        alpha = 0.2)  
  
# Creating color map 
my_cmap = plt.get_cmap('hsv') 
  
# Creating plot 
sctt = ax.scatter3D(df['temp'], df['month'], df['humidity'], 
                    alpha = 0.8, 
                    c = df['cum_power']) 
  
plt.title('3D plot of Weather vs Month vs Visibility with colour legend of Cum_Power') 
ax.set_xlabel('temp', fontweight ='bold')  
ax.set_ylabel('month', fontweight ='bold')  
ax.set_zlabel('humidity', fontweight ='bold') 
fig.colorbar(sctt, ax = ax, shrink = 0.5, aspect = 5) 
  
# show plot 
plt.show() 

In [None]:
# Creating figure 
fig = plt.figure(figsize = (16, 9)) 
ax = plt.axes(projection ="3d")

# Add x, y gridlines  
ax.grid(b = True, color ='grey',  
        linestyle ='-.', linewidth = 0.3,  
        alpha = 0.2)  
  
# Creating color map 
my_cmap = plt.get_cmap('hsv') 
  
# Creating plot 
sctt = ax.scatter3D(df['weather'], df['visibility'], df['cum_power'], 
                    alpha = 0.8, 
                    c = df['cum_power']) 
  
plt.title('3D plot of Weather vs Visinilty vs Cum_Power with color legend of cum_power') 
ax.set_xlabel('humidity', fontweight ='bold')  
ax.set_ylabel('visibility', fontweight ='bold')  
ax.set_zlabel('cum_power', fontweight ='bold') 
fig.colorbar(sctt, ax = ax, shrink = 0.5, aspect = 5) 
  
# show plot 
plt.show() 

In [None]:
# Creating figure 
fig = plt.figure(figsize = (16, 9)) 
ax = plt.axes(projection ="3d")

# Add x, y gridlines  
ax.grid(b = True, color ='grey',  
        linestyle ='-.', linewidth = 0.3,  
        alpha = 0.2)  
  
# Creating color map 
my_cmap = plt.get_cmap('hsv') 
  
# Creating plot 
sctt = ax.scatter3D(df['humidity'], df['visibility'], df['cum_power'], 
                    alpha = 0.8, 
                    c = df['cum_power']) 
  
plt.title('3D plot of Humidity vs Visibility with colour legend of Cum_Power') 
ax.set_xlabel('weather', fontweight ='bold')  
ax.set_ylabel('month', fontweight ='bold')  
ax.set_zlabel('visibility', fontweight ='bold') 
fig.colorbar(sctt, ax = ax, shrink = 0.5, aspect = 5) 
  
# show plot 
plt.show() 

# **MODEL DEVELOPMENT AND TRAINING**

In [None]:
y = df[['cum_power']]
xt = df[['year','month','day','temp','weather','wind','humidity','barometer','visibility']] 

scaler = StandardScaler()
x = scaler.fit_transform(xt)
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.30, random_state=42)

In [None]:
def rmse_test(model, X=xtrain):
    rmse = np.sqrt(-cross_val_score(model, xtrain, ytrain, scoring="neg_mean_squared_error", cv=5))
    return (rmse)

## **LASSO CV**

In [None]:
lasso = LassoCV(random_state=42, cv=5)

In [None]:
print("RMSE score for Lasso:")
print(rmse_test(lasso).mean())

In [None]:
lasso_model = lasso.fit(xtrain, ytrain)

In [None]:
ypred = lasso_model.predict(xtest)
np.sqrt(mean_squared_error(ytest, ypred))

## **ELASTIC NET**

In [None]:
elastic_net = ElasticNetCV(random_state=42, cv=5)

In [None]:
print("RMSE score for Elastic Net:")
print(rmse_test(elastic_net).mean())

In [None]:
elastic_model = elastic_net.fit(xtrain, ytrain)

In [None]:
ypred = elastic_model.predict(xtest)
np.sqrt(mean_squared_error(ytest, ypred))

## **RANDOM FOREST**

In [None]:
rf = RandomForestRegressor(random_state=42)

In [None]:
print("RMSE score for Random Forest:")
print(rmse_test(rf).mean())

In [None]:
rf_model = rf.fit(xtrain,ytrain)

In [None]:
ypred = rf_model.predict(xtest)
np.sqrt(mean_squared_error(ytest, ypred))

## **XGBOOST**

In [None]:
xgboost = XGBRegressor(random_state=42)

In [None]:
print("RMSE score for XGBoost:")
print(rmse_test(xgboost).mean())

In [None]:
xgb_model = xgboost.fit(xtrain, ytrain)

In [None]:
ypred = xgb_model.predict(xtest)
np.sqrt(mean_squared_error(ytest, ypred))

## **HPERPARAMETER TUNING ON BEST PERFORMING MODEL**

In [None]:
xgboost = XGBRegressor(learning_rate=0.1,n_estimators=200,random_state=42)

In [None]:
print("RMSE score for XGBoost:")
print(rmse_test(xgboost).mean())

In [None]:
xgb_model = xgboost.fit(xtrain, ytrain)

In [None]:
ypred = xgb_model.predict(xtest)
np.sqrt(mean_squared_error(ytest, ypred))

# **MODEL EVALUATION**

In [None]:
xgb_model.feature_importances_

In [None]:
features = np.reshape(xgb_model.feature_importances_,(1,9))
featuredf = pd.DataFrame(features,columns=xt.columns)

In [None]:
# FEATURE COEEFICIENTS
featuredf.head()

In [None]:
featuredf.plot(kind='bar',title='Feature Coefficients',figsize=(15,6))
plt.show()

# **MODEL DEVELOPMENT WITH PYCARET**

In [None]:
from pycaret.regression import *

In [None]:
setup = setup(df, target = 'cum_power', session_id = 123, normalize = True,
              numeric_features = ['year','month','day','temp','weather','wind','humidity','barometer','visibility'],
              polynomial_features = True, trigonometry_features = True, feature_interaction=True,
              bin_numeric_features = ['weather'])

In [None]:
setup[0].columns

In [None]:
regressor = create_model('gbr')

In [None]:
plot_model(regressor)

In [None]:
top3 = compare_models(n_select = 3)