<a href="https://colab.research.google.com/github/soumyaiitkgp/Prediction-and-modelling-of-wave-height/blob/master/LSTM_4_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import sklearn
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Dense, Dropout
from keras.layers import Embedding
from keras.layers import LSTM
from keras import optimizers
from keras.callbacks import CSVLogger
import os
import logging
import time
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
logging.getLogger("tensorflow").setLevel(logging.ERROR)
os.environ['TZ'] = 'Asia/Mumbai'  # to set timezone; needed when running on cloud
time.tzset() 

In [0]:
from google.colab import files
uploaded = files.upload()

In [0]:
data = pd.read_csv('mehamn_data.csv', engine = 'python')
print(data.shape)
data.head()

In [0]:
params = {
    "batch_size": 1440, #processing the information for 30 days at once
    "epochs": 100,
    "lr": 0.0001,
    "time_steps": 120 #prediction for 15 days
}

iter_changes = "dropout_layers_0.4_0.4"

TIME_STEPS = params["time_steps"]
BATCH_SIZE = params["batch_size"]

In [0]:
for col in data.columns:
    print(col)

In [0]:
d = data

In [0]:
d_train,d_test = train_test_split(d,test_size=0.2, shuffle = False)
print(d_train.shape, d_test.shape)
d_train.head()

In [0]:
print(d_test.shape)
d_test.head()

In [0]:
x_train = d_train
x_test = d_test
print(x_train.shape,x_test.shape)

In [0]:
plt.figure()
plt.plot(x_train["total_sea_TP"])
plt.plot(x_train["total_sea_HS"])
plt.show()

In [0]:
#train_cols = ["Month","Day","Hour", "wind_WSP","wind_DIR","total_sea_TP","total_sea_DIRP","wind_sea_TP","wind_sea_DIRP","swell_TP","swell_DIRP"]
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(x_train)
X_test = min_max_scaler.fit_transform(x_test)

In [0]:
type(X_train)
print(X_train.shape)
plt.figure()
plt.plot(X_train[0:,8])
plt.plot(X_train[0:,7])
plt.show()

In [0]:
def build_timeseries(mat, y_col_index):
    # y_col_index is the index of column that would act as output column, here it will be 9
    # total number of time-series samples would be len(mat) - TIME_STEPS
    #TIME_STEPS = 8 for one day
    #LSTMs consume input in format [ batch_size, time_steps, Features ]; a 3- dimensional array.
    #So till now we have a matrix of shape (3, 5), 3 being the time step and 5 being the number of features
    dim_0 = mat.shape[0] - TIME_STEPS
    dim_1 = mat.shape[1]
    x = np.zeros((dim_0, TIME_STEPS, dim_1))
    y = np.zeros((dim_0,))
    print(dim_0,mat.shape[0])
    for i in tqdm_notebook(range(dim_0)):
        #print(i)
        x[i] = mat[i:TIME_STEPS+i]
        #print(mat[TIME_STEPS+i, y_col_index])
        y[i] = mat[TIME_STEPS+i, y_col_index]
    print("length of time-series i/o",x.shape,y.shape)
    return x, y

In [0]:
def trim_dataset(mat, batch_size):
    """
    trims dataset to a size that's divisible by BATCH_SIZE
    """
    no_of_rows_drop = mat.shape[0]%batch_size
    if(no_of_rows_drop > 0):
        return mat[:-no_of_rows_drop]
    else:
        return mat

In [0]:
x_t, y_t = build_timeseries(X_train, 9)
x_t = trim_dataset(x_t, BATCH_SIZE)
y_t = trim_dataset(y_t, BATCH_SIZE)

In [0]:
x_temp, y_temp = build_timeseries(X_test, 9)
x_tt = trim_dataset(x_temp, BATCH_SIZE)
y_tt = trim_dataset(y_temp, BATCH_SIZE)
print(y_tt.shape)
print(x_tt.shape)
x_val, x_test_t = np.split(x_tt,2)
y_val, y_test_t = np.split(y_tt,2)
print(x_val.shape, x_test_t.shape)
print(y_val.shape, y_test_t.shape)
#x_val, x_test_t = np.split(trim_dataset(x_temp, BATCH_SIZE),2)
#y_val, y_test_t = np.split(trim_dataset(y_temp, BATCH_SIZE),2)

In [0]:
lstm_model = Sequential()
lstm_model.add(LSTM(64, batch_input_shape=(BATCH_SIZE, TIME_STEPS, x_t.shape[2]), dropout=0.0, recurrent_dropout=0.0, stateful=True, return_state=True, kernel_initializer='random_uniform'))
lstm_model.add(Dropout(0.2)) 
print(0)
lstm_model.add(LSTM(128, return_state=True))
lstm_model.add(Dropout(0.2)) 
print(1)
lstm_model.add(LSTM(256, return_state=True))
lstm_model.add(Dropout(0.2)) 
print(2)
lstm_model.add(LSTM(128, return_state=True))
lstm_model.add(Dropout(0.2)) 
print(3)
lstm_model.add(LSTM(64))
print(4)
lstm_model.add(Dense(20,activation='relu'))
lstm_model.add(Dense(1,activation='sigmoid'))
optimizer = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
lstm_model.compile(loss='mean_squared_error', optimizer=optimizer)

In [0]:
OUTPUT_PATH = "D:\Acads\Saud sir"
csv_logger = CSVLogger(os.path.join(OUTPUT_PATH, 'your_log_name' + '.log'), append=True)

history = lstm_model.fit(x_t, y_t, epochs=25, verbose=2, batch_size=BATCH_SIZE,
                    shuffle=False, validation_data=(trim_dataset(x_val, BATCH_SIZE),
                    trim_dataset(y_val, BATCH_SIZE)))#, callbacks=[csv_logger])

In [0]:
# model.evaluate(x_test_t, y_test_t, batch_size=BATCH_SIZE
from sklearn.metrics import mean_squared_error
y_pred = lstm_model.predict(trim_dataset(x_test_t, BATCH_SIZE), batch_size=BATCH_SIZE)
y_pred = y_pred.flatten()
y_test_t = trim_dataset(y_test_t, BATCH_SIZE)
error = mean_squared_error(y_test_t, y_pred)
print("Error is", error, y_pred.shape, y_test_t.shape)
print(y_pred[0:15])
print(y_test_t[0:15])

In [0]:
# convert the predicted value to range of real data
y_pred_org = (y_pred * min_max_scaler.data_range_[9]) + min_max_scaler.data_min_[9]
# min_max_scaler.inverse_transform(y_pred)
y_test_t_org = (y_test_t * min_max_scaler.data_range_[9]) + min_max_scaler.data_min_[9]
# min_max_scaler.inverse_transform(y_test_t)
print(y_pred_org[0:15])
print(y_test_t_org[0:15])

In [0]:
# Visualize the training data
from matplotlib import pyplot as plt
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
#plt.show()

In [0]:
y_pred = lstm_model.predict(trim_dataset(x_test_t, BATCH_SIZE), batch_size=BATCH_SIZE)
y_pred = y_pred.flatten()
#y_test_t = trim_dataset(y_test_t, BATCH_SIZE)
error = mean_squared_error(y_test_t, y_pred)
print("Error is", error, y_pred.shape, y_test_t.shape)
print(y_pred[0:15])
print(y_test_t[0:15])
y_pred_org = (y_pred * min_max_scaler.data_range_[9]) + min_max_scaler.data_min_[9] # min_max_scaler.inverse_transform(y_pred)
y_test_t_org = (y_test_t * min_max_scaler.data_range_[9]) + min_max_scaler.data_min_[9] # min_max_scaler.inverse_transform(y_test_t)
print(y_pred_org[100:200])
print(y_test_t_org[100:200])

# Visualize the prediction
from matplotlib import pyplot as plt
plt.figure()
#plt.plot(y_test_t_org)
#plt.plot(y_pred_org)

plt.plot(y_pred_org[0:100])
plt.plot(y_test_t_org[0:100])
plt.plot()
plt.title('Prediction vs Real Stock Price')
plt.ylabel('Price')
plt.xlabel('Days')
plt.legend(['Prediction', 'Real'], loc='upper left')
plt.show()
print("batch_size = ",BATCH_SIZE ,"timesteps =",TIME_STEPS , "lr = 0.001","optimizer = Adam", "No of LSTM layers =2","Dropout=0.4","stateful=True ")

In [0]:
# Visualize the prediction
from matplotlib import pyplot as plt
plt.figure()
#plt.plot(y_test_t_org)
#plt.plot(y_pred_org)

plt.plot(y_pred_org[0:100])
plt.plot(y_test_t_org[0:100])
plt.plot()
plt.title('Prediction vs Real Stock Price')
plt.ylabel('Price')
plt.xlabel('Days')
plt.legend(['Prediction', 'Real'], loc='upper left')
plt.show()

In [0]:
# Visualize the prediction
from matplotlib import pyplot as plt
plt.figure()
#plt.plot(y_test_t_org)
#plt.plot(y_pred_org)

plt.plot(y_pred_org[0:100])
plt.plot(y_test_t_org[0:100])
plt.plot()
plt.title('Prediction vs Real Stock Price')
plt.ylabel('Price')
plt.xlabel('Days')
plt.legend(['Prediction', 'Real'], loc='upper left')
plt.show()

In [0]:
for i in range(175):
    plt.figure()
    plt.plot()
    plt.ylim(top = 12)
    plt.plot(y_pred_org[i*100:i*100 + 100])
    plt.plot(y_test_t_org[i*100:i*100 + 100])
    plt.plot()
    plt.title('Prediction vs wave height')
    plt.ylabel('Wave height')
    plt.xlabel('Number')
    plt.legend(['Prediction', 'Real'], loc='upper left')
    #from google.colab import files
    from google.colab import files
    plt.savefig('Plot'+ str(i)+'0'+'(LSTM_4.1)'+'.png')
    files.download('Plot'+ str(i)+'0'+'(LSTM_4.1)'+'.png')
    #plt.savefig('Plot'+ str(i)+'0'+'(LSTM_4.1)'+'.png')
    #files.download('Plot'+ str(i)+'0'+'(LSTM_4.1)'+'.png')
    plt.show()
    
    plt.figure()
    plt.plot()
    plt.ylim(top = 12)
    plt.plot(y_pred_org[50+i*100:i*100 + 150])
    plt.plot(y_test_t_org[50+i*100:i*100 + 150])
    plt.plot()
    plt.title('Prediction vs wave height')
    plt.ylabel('Wave height')
    plt.xlabel('Number')
    plt.legend(['Prediction', 'Real'], loc='upper left')
    #from google.colab import files
    from google.colab import files
    plt.savefig('Plot'+ str(i)+'1'+'(LSTM_4.1)'+'.png')
    files.download('Plot'+ str(i)+'1'+'(LSTM_4.1)'+'.png')
    #plt.savefig('Plot'+ str(i)+'1'+'(LSTM_4.1)'+'.png')
    #files.download('Plot'+ str(i)+'0'+'(LSTM_4.1)'+'.png')
    plt.show()

In [0]:
plt.figure()
plt.plot(y_test_t - y_pred)

In [0]:
print(max(y_test_t - y_pred))
print(min(y_test_t - y_pred))
print(type(y_test_t - y_pred))
print(np.mean(y_test_t - y_pred))
print(np.std(y_test_t - y_pred))

Max error: 0.4540718008490169
Min error: -0.2863886167021359
Average: -0.0006845318656293139
Standard deviation: 0.05870196666310056
Error is 0.003446389473990832