# Long Short-Term Memory (LSTM) Model for Daily Precipitation Prediction 


---


Last Updated : 27th February, 2020

Importing required libraries

In [0]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from tqdm import tqdm

Data Preparation

In [0]:
#Prepare the data
df = pd.read_csv(r"gdrive/My Drive/tp_1980_1990.csv")
outfl = open(r"gdrive/My Drive/LSTM_result.csv", 'w')

outfl.write("CombinationID, Timestep, Dropout, Batch Size, Layers, Units, Accuracy\n")
outfl.close()

ppts = df['total_precipitation'].values

def preparedata(x, ts=60):
    x_train = []
    y_train = []
    

    x = np.array(x)

    for v in tqdm(range(ts, len(x))):
        xt = x[v-ts:v]
        yt = x[v]

        x_train.append(xt)
        y_train.append(yt)
    
    return np.array(x_train), np.array(y_train)



vals = np.array(ppts)
rfval = vals.reshape(-1, 1)

#Data normalisation
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(rfval)

In [0]:

#Model
timesteps = [90,180,365,730,1095,1460,1825]
dropouts = [0.1,0.2,0.5,0.7,0.9]
batchsize = [100,200,500,1000]
layers = [1,2,3,4,5,6,7,8,9,10]
units = [25,50,100,150,200]

instance = 0
for ts in timesteps:
    for do in dropouts:
        for batch_size in batchsize:
            for n_layer in layers:
                for unit in units:

                    print("Current Configuration\n---------------------------------------------")
                    print("Combination : " + str(instance) + " out of 7000")
                    print("Timestep : " + str(ts))
                    print("Droup out rate : " + str(do))
                    print("Batch size : " + str(batch_size))
                    print("Number of deep layers : "  + str(n_layer))
                    print("Neurons in each layer : " + str(unit))
                    print("---------------------------------------------")


                    #Prepare datset according to timestep
                    x,y = preparedata(training_set_scaled, ts=ts)
                    x_train = x[:10000]
                    y_train = y[:10000]


                    #Prepare Validation Set
                    x_test,y_test = preparedata(ppts[10000:], ts=ts)


                    x_train = x_train.reshape((len(x_train), ts, 1))
                    x_test = x_test.reshape((len(x_test), ts, 1))

                    #Prepare Model


                    # Initialising the RNN
                    regressor = Sequential()

                    # Adding the first LSTM layer and some Dropout regularisation
                    regressor.add(LSTM(units = unit, return_sequences = True, input_shape = (x_train.shape[1], 1)))
                    regressor.add(Dropout(do))

                    for _ in range(n_layer):

                        # Adding a  LSTM layer and some Dropout regularisation
                        regressor.add(LSTM(units = unit, return_sequences = True))
                        regressor.add(Dropout(do))

                        


                    regressor.add(LSTM(units = unit))
                    regressor.add(Dropout(do))

                    # Adding the output layer
                    regressor.add(Dense(units = 1))

                    # Compiling the RNN
                    regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

                    regressor.fit(x_train, y_train, epochs = 5, batch_size = batch_size, validation_split=0.2)

                    y_pred = regressor.predict(x_test, batch_size = batch_size)

                    import matplotlib.pyplot as plt

                    plt.scatter(y_test, y_pred)
                    plt.plot(y_test, label="Test")
                    plt.plot(y_pred, label="Prediction")
                    plt.legend()




                    #Write output in text file...
                    #outfl = open(r"gdrive/My Drive/LSTM_result.csv", 'a')

                    #outfl.write("{0}, {1}, {2}, {3}, {4}, {5}, {6}\n".format(instance,ts,do,batch_size,n_layer,unit,acc))
                    #outfl.close()










100%|██████████| 14308/14308 [00:00<00:00, 794294.17it/s]
100%|██████████| 4308/4308 [00:00<00:00, 845059.47it/s]

Current Configuration
---------------------------------------------
Combination : 0 out of 7000
Timestep : 180
Droup out rate : 0.2
Batch size : 100
Number of deep layers : 10
Neurons in each layer : 100
---------------------------------------------





Train on 8000 samples, validate on 2000 samples
Epoch 1/5
Epoch 2/5
1800/8000 [=====>........................] - ETA: 3:19 - loss: 0.0027

KeyboardInterrupt: ignored