In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
import seaborn as sns

In [None]:
dataset=pd.read_csv('T1.csv')
dataset

In [None]:
dataset.isnull().any()

In [None]:

exp = dataset['LV ActivePower (kW)']
the = dataset['Theoretical_Power_Curve (KWh)']
plt.figure(figsize=(25,10)) 
plt.plot(dataset['Wind Speed (m/s)'], dataset['LV ActivePower (kW)'], 'o', label='Real Power')
plt.plot(dataset['Wind Speed (m/s)'], dataset['Theoretical_Power_Curve (KWh)'], '.', label='theoretical_power_curve (kwh)')
plt.xlabel('wind speed (m/s)', size=15)
plt.ylabel('Power Production (kw)', size=15)
plt.title('Wind Turbine Power Production Prediction')
plt.legend(fontsize=15)
plt.show()

In [None]:
# Importing a visualization library
! pip install windrose

In [None]:
from windrose import WindroseAxes

ax = WindroseAxes.from_ax()
ax.bar(dataset['Wind Direction (°)'], dataset['Wind Speed (m/s)'], normed=True, opening=0.8, edgecolor='white')
ax.set_legend()
plt.title("Wind Direction (°) VS Wind Speed (m/s)")
plt.show()

In [None]:
dataset['LV ActivePower (kW)'].hist(bins=90)

In [None]:
plt.figure(figsize = (10, 7))
dataset.boxplot()

In [None]:
plt.scatter(dataset['Wind Speed (m/s)'],dataset['Wind Direction (°)'])

In [None]:
dataset.drop(['Date/Time'],axis=1,inplace=True)

In [None]:
sns.distplot(dataset['Wind Direction (°)'].dropna())

In [None]:
dataset.drop(['Wind Direction (°)'],axis=1,inplace=True)

In [None]:
dataset.describe()

In [None]:
dataset.rename(columns={'Date/Time':'Time',
                   'LV ActivePower (kW)':'ActivePower(kW)',
                   "Wind Speed (m/s)":"WindSpeed(m/s)",
                   "Wind Direction (°)":"Wind_Direction"},
                    inplace=True)
sns.pairplot(dataset)

#The heat map clearly tells us that there's no realtion between wind direction and 
the Power generated but Wind speed, Theoritical power and Actual power generated 
have a very positive correlation

In [None]:
corr = dataset.corr()
plt.figure(figsize=(10, 8))

In [None]:
ax = sns.heatmap(corr, vmin = -1, vmax = 1, annot = True)
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
plt.show()
corr

In [None]:
#sns.distplot(dataset)

In [None]:
sns.boxplot(dataset['WindSpeed(m/s)'])

In [None]:
sns.boxplot(dataset['ActivePower(kW)'])

In [None]:
sns.boxplot(dataset['Theoretical_Power_Curve (KWh)'])

In [None]:
from scipy import stats

In [None]:
dataset.info()

In [None]:
z=np.abs(stats.zscore(dataset))
z

In [None]:
threshold=3
np.where(z>threshold)

In [None]:
dataset_withoutlayers=dataset[(z<=3).all(axis=1)]
dataset_withoutlayers

In [None]:
x=dataset_withoutlayers.iloc[:,0:2].values   
x

In [None]:
y=dataset_withoutlayers.iloc[:,-1].values
y

In [None]:
plt.scatter(x[:,1],y)

In [None]:
'''
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x=sc.fit_transform(x)
x
'''

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeRegressor
dt=DecisionTreeRegressor(criterion="mae",random_state=0)
dt.fit(x_train,y_train)

In [None]:
y_pred_decision=dt.predict(x_test)
y_pred_decision

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred_decision)

In [None]:
plt.figure(figsize=(15,10))
plt.scatter(x_test[:,1],y_test)
plt.scatter(x_test[:,1],y_pred_decision)

In [None]:
"""
from six import StringIO
from IPython.display import Image
from sklearn.tree import export_graphviz
import pydotplus
dot_data= StringIO()
export_graphviz(dt,out_file=dot_data,
               filled=True, rounded=True,
               special_characters=True)
graph=pydotplus.graph_from_dot_data(dot_data.getvalue())
Image(graph.create_png())
"""

In [None]:
#import joblib
#joblib.dump(dt,'modelrf.save')

# KNN

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor

In [None]:
knn_grid=GridSearchCV(estimator=KNeighborsRegressor(),param_grid={'n_neighbors':np.arange(1,20)},cv=5)

In [None]:
knn_grid.fit(x_train,y_train)

In [None]:
knn_grid.best_params_

In [None]:
knn=KNeighborsRegressor(n_neighbors=3,p=2)

In [None]:
knn.fit(x_train,y_train)

In [None]:
y_pred_knn=knn.predict(x_test)

In [None]:
r2_score(y_test,y_pred_knn)

In [None]:
plt.figure(figsize=(15,10))
plt.scatter(x_test[:,1],y_test)
plt.scatter(x_test[:,1],y_pred_knn)

# Linear regression

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x, y)

### PyGam Modelling

In [None]:
!pip install pygam

In [None]:
from pygam import LinearGAM, s, f
gam = LinearGAM(s(0)).fit(x_train[:,1], y_train)


In [None]:
 pred_test = gam.predict(x_test[:,1])

In [None]:
y_pred_dt = regressor.predict(x_test)

In [None]:
r2_score(y_test,y_pred_dt)

In [None]:
import joblib
joblib.dump(regressor,'modellr.save')

In [None]:
plt.figure(figsize=(15,10))
plt.scatter(x_test[:,1],y_test)
plt.scatter(x_test[:,1],y_pred_dt)

In [None]:
gam.summary()

In [None]:
plt.figure(figsize=(15,10))
plt.scatter(x_test[:,1],y_test)
plt.scatter(x_test[:,1],pred_test)

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rf_grid=GridSearchCV(estimator=RandomForestRegressor(),param_grid={'n_estimators':np.arange(1,20),'max_depth':np.arange(2,30)},cv=5)

In [None]:
rf_grid.fit(x_train,y_train)

In [None]:
rf_grid.best_params_

In [None]:
rf=RandomForestRegressor(n_estimators=19,max_depth= 27)
rf.fit(x_train,y_train)

In [None]:
y_pred_rf=rf.predict(x_test)
y_pred_rf

In [None]:
r2_score(y_test,y_pred_rf)

In [None]:
#import joblib
#joblib.dump(regressor,'modelrf.save')

In [None]:
plt.figure(figsize=(15,10))
plt.scatter(x_test[:,1],y_test)
plt.scatter(x_test[:,1],y_pred_rf)

In [None]:
rf.predict(x_test)

In [None]:
import joblib
joblib.dump(rf,'modelrf.save')