In [0]:
import pandas as pd
import numpy as np
from math import sqrt
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Dropout,LSTM
colab_path = "https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/"

# Univariate Forecasting

In [0]:
rainfall_data = pd.read_csv(colab_path+"data/All_India_Area_Weighted_Monthly_Rainfall.csv")

In [0]:

def makeXy(ts, nb_timesteps):
    """
    Input: 
           ts: original time series
           nb_timesteps: number of time steps in the regressors
    Output: 
           X: 2-D array of regressors
           y: 1-D array of target 
    """
    X = []
    y = []
    for i in range(nb_timesteps, ts.shape[0]):
        X.append(list(ts.loc[i-nb_timesteps:i-1]))
        y.append(ts.loc[i])
    X, y = np.array(X), np.array(y)
    return X, y

In [0]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
rainfall_data['Value'] = scaler.fit_transform(np.array(rainfall_data['Value']).reshape(-1, 1))

In [0]:
X,y = makeXy(rainfall_data['Value'],10)

X_train = X[:1100]
X_test = X[1100:]
y_train = y[:1100]
y_test = y[1100:]

In [0]:
X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1],1))
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))

In [0]:
lstm_model = Sequential()
lstm_model.add(LSTM(50, input_shape = (X_train.shape[1],1), return_sequences=True, activation = 'relu'))
lstm_model.add(Dropout(0.1))
lstm_model.add(LSTM(50,activation='relu'))
lstm_model.add(Dropout(0.1))
lstm_model.add(Dense(1))
lstm_model.compile(loss = 'mean_squared_error',
              optimizer = 'adam',
              metrics = ['mse'])
lstm_model.fit(X_train, y_train, 
    nb_epoch=50, batch_size=50,validation_split=0.2)

In [0]:
preds = lstm_model.predict(X_test)

In [0]:
def plot_predictions(preds,y_test):
    plt.figure(figsize=(10, 5.5))
    plt.plot(preds,linestyle='-',color='b')
    plt.plot(y_test,linestyle='-',color='r')
    plt.legend(['Predicted','Actual'], loc=2)

In [0]:
r2_score(y_test,preds)

# MultiVariate Forecasting

In [0]:
multi_data = pd.read_csv(colab_path+"data/pollution.csv",parse_dates=True,index_col=0)

In [0]:
multi_data.head()

In [0]:
def create_lags(df):
    for col in df.columns:
        df[col+"_1"] = df[col].shift(1)
    return df.dropna()

In [0]:
reframed_multi_data = create_lags(multi_data)

In [0]:
reframed_multi_data.reset_index(inplace=True)
del reframed_multi_data['date']

In [0]:
reframed_multi_data.columns

In [0]:
y = reframed_multi_data['pollution']

In [0]:
x = reframed_multi_data[['dew', 'temp', 'press', 'wnd_spd', 'pollution_1', 'dew_1',
       'temp_1', 'press_1', 'wnd_spd_1']]

In [0]:
x_train = x[:35799]
x_test = x[35799:]

In [0]:
y_train = y[:35799]
y_test = y[35799:]

In [0]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
y_train = scaler.fit_transform(y_train.values.reshape(-1,1))
y_test = scaler.transform(y_test.values.reshape(-1,1))

x_train = x_train.reshape(x_train.shape[0],1,x_train.shape[1])
x_test = x_test.reshape(x_test.shape[0],1,x_test.shape[1])

In [0]:
# Samples, Timesteps, features
x_train.shape

In [0]:
multi_lstm_model = Sequential()
multi_lstm_model.add(LSTM(100,input_shape=(x_train.shape[1],x_train.shape[2])))
multi_lstm_model.add(Dense(1))
multi_lstm_model.compile(loss="mse",optimizer="adam")

In [0]:
multi_lstm_model.fit(x_train,y_train,epochs=25,batch_size=500)

In [0]:
y_pred = multi_lstm_model.predict(x_test)

In [0]:
def plot_predictions(preds,y_test):
    plt.figure(figsize=(10, 5.5))
    plt.plot(preds,linestyle='-',color='b')
    plt.plot(y_test,linestyle='-',color='r')
    plt.legend(['Predicted','Actual'], loc=2)
    plt.title('Actual vs Predicted')

In [0]:
plot_predictions(y_pred[:100],y_test[:100])

In [0]:
r2_score(y_test,y_pred)