# __Solar Radiation Prediction with Linear Regression and SVR__

## *__Data Import and raw data exploration__*

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
%matplotlib inline

In [None]:
solar = pd.read_csv('../input/SolarEnergy/SolarPrediction.csv')

In [None]:
solar.head()

In [None]:
solar.info()

## *__Data Cleaning/Processing__*

__To make sense of this Data and process it we will look at the hourly mean values and use that for the predictions__

In [None]:
solar['Time'] = pd.to_datetime(solar['Time'])

In [None]:
solar['Data'] = pd.to_datetime(solar['Data'])

In [None]:
solar.info()

In [None]:
date = solar['Data']
solar['Day'] = solar['Data'].apply(lambda date: date.day)
solar['Month'] = solar['Data'].apply(lambda date: date.month)
solar['hour'] = solar['Time'].apply(lambda date: date.hour)

In [None]:
solar.head()

In [None]:
solar = solar.drop(['UNIXTime','Time','Data','TimeSunRise','TimeSunSet'],axis=1)

In [None]:
solar.head()

In [None]:
solar_sorted = solar.pivot_table(index=['Month', 'Day','hour'],values= ['Radiation','Temperature','Pressure','Humidity','WindDirection(Degrees)','Speed'],aggfunc=np.mean)

In [None]:
solar_sorted

In [None]:
sb.pairplot(solar_sorted)

In [None]:
plt.figure(figsize = (12,8))
sb.heatmap(solar_sorted.corr(),cmap='coolwarm',annot=True)
plt.title('Correlations')

## *Train test split of Data*

In [None]:
solar_sorted.columns

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = solar_sorted[['Humidity', 'Pressure', 'Speed', 'Temperature', 'WindDirection(Degrees)']]
y = solar_sorted['Radiation']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

I am just using two different regression algorithm to check the performance between them. While, the corelation between temperature and Radiation is obvious. So for that a simple linear regression will show favorable rsults. Hwoever, we can use a more complicated alogirthm
(whcih I am going to use as SVR) to see if non linear relationships can be established between the parameters.

## *Linear Regression* 

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
solar_linear = LinearRegression()

In [None]:
solar_linear.fit(X_test,y_test)

In [None]:
cdf =pd.DataFrame(solar_linear.coef_,X.columns,columns=['Coeffecient'])
cdf

In [None]:
predictions = solar_linear.predict(X_test)

In [None]:
plt.scatter(y_test,predictions)

In [None]:
from sklearn import metrics

In [None]:
np.sqrt(metrics.mean_squared_error(y_test,predictions))

In [None]:
metrics.r2_score(y_test,predictions)

## *Trying out SVR*

In [None]:
from sklearn.svm import SVR

In [None]:
solar_svr = SVR()

In [None]:
solar_svr.fit(X_train,y_train)

In [None]:
predictions_svr = solar_svr.predict(X_test)

In [None]:
plt.scatter(y_test,predictions_svr)

In [None]:
np.sqrt(metrics.mean_squared_error(y_test,predictions_svr))

In [None]:
metrics.r2_score(y_test,predictions_svr)

## *SVR Grid Search -Optimization*

In [None]:
param_grid ={'C':[1000,2500,5000,7500,10000], 'gamma':[0.01,0.001,0.0001], 'kernel':['rbf']} 
#note the values I have selcted here have been obtained through an iterative trial and error process to tune the model, and the values shown are values I have found to be optimum.

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
grid = GridSearchCV(SVR(),param_grid,refit=True,verbose=3)

In [None]:
grid.fit(X_train,y_train)

In [None]:
grid.best_params_

In [None]:
grid.best_estimator_

In [None]:
solar_svr2 = SVR(C=2500,gamma=0.001)

In [None]:
solar_svr2.fit(X_train,y_train)

In [None]:
predictions_svr2 = solar_svr2.predict(X_test)

In [None]:
plt.scatter(y_test,predictions_svr2)

In [None]:
np.sqrt(metrics.mean_squared_error(y_test,predictions_svr2))

In [None]:
metrics.r2_score(y_test,predictions_svr2)

## *Conclusion*

The SVR when optimised, results in a better prediction 

The results can be summarised as follows ;
 
* Linear Regression  - RMSE = 190.45 W/m2 and R2 = 0.6267
* SVR(Optimised) - RMSE = 152.72 W/m2 and R2 = 0.7491 


Or some value similar to this
