<a href="https://colab.research.google.com/github/shailavij/ML-Projects/blob/master/WindturbinePowerLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import datetime
import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler


import keras 
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping 
from keras.wrappers.scikit_learn import KerasRegressor


mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

# to display all columns
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [None]:
!pip install tensorflow

In [None]:
df= pd.read_csv('/content/Turbine_Data.csv')
df.head(10)

In [None]:
df.shape

In [None]:
df_updated= df[0:40000].copy()
df_updated['Unnamed: 0']=pd.to_datetime(df_updated['Unnamed: 0'])
df_updated.rename(columns={'Unnamed: 0':'date_column'},inplace=True)
df_updated.head(2)

In [None]:
df_updated.shape

In [None]:
if (df_updated['Blade2PitchAngle'].equals(df_updated['Blade3PitchAngle'])==True):
    df_updated = df_updated.drop('Blade3PitchAngle', axis=1)
df_updated.info()

In [None]:
#Chk for NULL value
df.isnull().sum()

In [None]:
df.shape

In [None]:
# To handle missing value using ffill method
df_updated = df_updated.fillna(method='ffill').fillna(method='bfill')
df_updated.isnull().sum()

In [None]:
sns.heatmap(df_updated.corr())

In [None]:
# Selecting important feature
df1 = df_updated[['date_column', 'WindSpeed', 'GeneratorRPM', 'ReactivePower', 'RotorRPM', 'AmbientTemperatue', \
                 'WindDirection', 'Blade1PitchAngle', 'Blade2PitchAngle', 'HubTemperature', 'MainBoxTemperature', 'GearboxBearingTemperature', \
                 'GearboxOilTemperature','NacellePosition','ActivePower']].copy()

In [None]:
sns.heatmap(df1.corr())

In [None]:
df2=df1.copy()

In [None]:

fig = px.line(df2, x = 'date_column',y = 'ActivePower',title = 'windpower with slider')

fig.update_xaxes(
    rangeslider_visible= True,
    rangeselector=dict(
                        buttons = list([
                        dict(count = 1,label = '1y',step='year',stepmode = "backward"),
                        dict(count = 2,label = '2y',step='year',stepmode = "backward"),
                        dict(count = 3,label = '3y',step='year',stepmode = "backward"),
                        dict(step= 'all')
                            ])        
                        )
                   )
fig.show()

In [None]:
fig = px.line(df2, x = 'date_column',y = 'GearboxOilTemperature',title = 'windpower with slider')

fig.update_xaxes(
    rangeslider_visible= True,
    rangeselector=dict(
                        buttons = list([
                        dict(count = 1,label = '1y',step='year',stepmode = "backward"),
                        dict(count = 2,label = '2y',step='year',stepmode = "backward"),
                        dict(count = 3,label = '3y',step='year',stepmode = "backward"),
                        dict(step= 'all')
                            ])        
                        )
                   )
fig.show()

In [None]:
df3= df2[3000:16000]
df3.shape

In [None]:
df4=df3[[ 'WindSpeed', 'GeneratorRPM',
       'ReactivePower', 'RotorRPM', 'AmbientTemperatue', 'WindDirection',
       'Blade1PitchAngle', 'Blade2PitchAngle', 'HubTemperature',
       'MainBoxTemperature', 'GearboxBearingTemperature',
       'GearboxOilTemperature', 'NacellePosition','ActivePower']]

In [None]:
df4.head(2)

In [None]:
import seaborn as sns
sns.set(rc={'figure.figsize':(100,100)})

sns.set(style="ticks", color_codes=True)
g = sns.pairplot(df4)


import matplotlib.pyplot as plt
plt.show()

From Pairplot,features like Reactivepower, Windspeed are highly correlated to ouput feature 'Active Power'

Increase in Nacelleposition, WindDirection data also important feature to consider for 'Active power'

In [None]:
#Setindex Date&Timestamp
df2.set_index('date_column',inplace=True)
df2.head(2)

In [None]:
df2.shape

**Split the data**

We will use 70%, 20%, 10% split for the training, validation and test sets.

In [None]:
column_indices = {name: i for i, name in enumerate(df.columns)}

n = len(df2)
print(n)
train_df = df2[0:int(n*0.7)]
val_df = df2[int(n*0.7):int(n*0.9)]
test_df = df2[int(n*0.9):]

num_features = df2.shape[1]
print(num_features)

In [None]:
print(train_df.shape)
print(val_df.shape)
print(test_df.shape)

In [None]:
# scale the data using MinMax Scaler from -1 to 1 as LSTM has a default tanh activation function
from sklearn.preprocessing import MinMaxScaler
SCALER = MinMaxScaler(feature_range=(-1,1))

scaler = SCALER.fit(train_df.to_numpy())

train_scaled = scaler.transform(train_df.to_numpy())
test_scaled = scaler.transform(test_df.to_numpy())
val_scaled = scaler.transform(val_df.to_numpy())

In [None]:
# create a function to split the datasets into two week windows
timestep = 15*24*6 # 24hours,15days,6 (10 minutes sample per hour)

def create_dataset(dataset, timestep=timestep):
    """
    Function which creates two week chunks of x_train data, and a single
    value for y_train.
    """
    X, y = [], []
    for i in range(len(dataset)):
        target_value = i + timestep
        if target_value == len(dataset):
            break
        feature_chunk, target = dataset[i:target_value, 1:], dataset[target_value, 0]
        X.append(feature_chunk)
        y.append(target)
    
    return np.array(X), np.array(y) 

In [None]:
#create x_train, y_train, X_test,y_test
X_train, y_train = create_dataset(train_scaled)
X_test, y_test = create_dataset(test_scaled)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

Model Creation

In [None]:
# create X_train, y_train, X_test, y_test datasets
# create a function to build a stacked LSTM model
# input needs to be [samples, timesteps, features]
def create_model(X_train, y_train):
    units = 32
    dropout = 0.05
    epochs = 35
    batch_size = 14
    optimizer = keras.optimizers.Adam(learning_rate=0.0005)
    early_stopping = EarlyStopping(patience=7, monitor='loss')

    model = keras.Sequential()

    model.add(LSTM(units=units, dropout=dropout, return_sequences=True,
                   input_shape=(X_train.shape[1], X_train.shape[2])))

    model.add(LSTM(units=units, dropout=dropout))

    model.add(Dense(units=1))

    model.compile(optimizer=optimizer, loss='mean_squared_error')
    history = model.fit(X_train, y_train, validation_split=0.3, shuffle=False,
              epochs=epochs, batch_size=batch_size, verbose=1, callbacks=[early_stopping])

    return model, history

In [None]:
# function to predict a single value 
def single_prediction(model, history, timestep=timestep):
        
        history = np.array(history)
        history = history.reshape(history.shape[0]*history.shape[1], history.shape[2])
        
        input_value = history[-timestep:]
        input_value = input_value.reshape(1, input_value.shape[0], input_value.shape[1])
        
        yhat = model.predict(input_value, verbose=0)
        return yhat

In [None]:
# function which takes first test chunk, makes a prediction, add the test chunk back into training data 
#to make next prediction

def walk_forward_prediction(X_train, y_train, X_test, timestep):
    
    MODEL, history = create_model(X_train=X_train, y_train=y_train)
    hist_train = [i for i in X_train]
    predictions = []
    
    for i in range(len(X_test)):
        test = X_test[i]
        yhat = single_prediction(model=MODEL, history=hist_train, timestep=timestep)
        predictions.append(yhat) 
        hist_train.append(test)
    
    return predictions, history, MODEL

In [None]:
def prior_inverse(features, targets):
    '''
    Append prediction value to test dataset and return a test shape format.
    '''
    dataset = []
    
    for i in range(features.shape[0]):
        last_row, target = features[i][0], targets[i]
        appended = np.append(last_row, target)
        dataset.append(appended)
    
    return np.array(dataset) 

In [None]:
#run experiemnt returning the real, predicted values
def experiment(X_train, y_train, X_test, timestep):
    
    pred_seq, history, MODEL = walk_forward_prediction(X_train, y_train, X_test, timestep)
    
    pred_seq = np.array(pred_seq).reshape(-1)

    pred = prior_inverse(X_test, pred_seq)
    real = prior_inverse(X_test, y_test)

    inv_pred = scaler.inverse_transform(pred)
    inv_real = scaler.inverse_transform(real)

    power_pred = inv_pred[:,-1]
    power_real = inv_real[:,-1]
    
    return power_real, power_pred, history, MODEL

In [None]:
power_real, power_pred, history, MODEL = experiment(X_train, y_train, X_test, timestep)

loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
#plot validation and training convergence graph
plt.figure(figsize=(10,5))
plt.plot(loss, label='train')
plt.plot(val_loss, label='validation')
plt.legend()
plt.xlabel('epochs')
plt.ylabel('MSE')
plt.title('LSTM Training Validation Loss')
plt.tight_layout()
plt.savefig('figures/train_val_plot.png')
plt.show()

In [None]:
from keras.preprocessing.sequence import TimeseriesGenerator
import numpy as np
data = np.array([[i] for i in range(50)])
targets = np.array([[i] for i in range(50)])
data_gen = TimeseriesGenerator(data, targets,
                               length=10, sampling_rate=2,
                               batch_size=2)

assert len(data_gen) == 20
batch_0 = data_gen[0]
x, y = batch_0
assert np.array_equal(x,
                      np.array([[[0], [2], [4], [6], [8]],
                                [[1], [3], [5], [7], [9]]]))
assert np.array_equal(y,
                      np.array([[10], [11]]))

In [None]:
x.shape

In [None]:
y.shape

In [None]:
data_gen[0]

In [None]:
len(data_gen)