In [0]:
import keras
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.optimizers import SGD
colab_path = "https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/"

In [0]:
rainfall_data_monthly = pd.read_csv(colab_path+"data/All_India_Area_Weighted_Monthly_Rainfall.csv")

In [0]:
rainfall_data_monthly.head()

In [0]:
rainfall_data_monthly['Value'].plot()

In [0]:
rainfall_data_monthly['Time'] = pd.to_datetime(rainfall_data_monthly['Time'])

In [0]:
rainfall_data_monthly = rainfall_data_monthly.set_index('Time')

In [0]:
rainfall_data_monthly['Value'].resample('6M').mean().plot()

In [0]:
rainfall_data_monthly = rainfall_data_monthly.reset_index()

In [0]:
rainfall_data_monthly['Value_s_1']=rainfall_data_monthly['Value'].shift(1)
rainfall_data_monthly['Value_s_2']= rainfall_data_monthly['Value'].shift(2)
rainfall_data_monthly['Value_d_1']= rainfall_data_monthly['Value'].diff(1)

In [0]:
rainfall_data_monthly[['Value','Value_s_1','Value_s_2','Value_d_1']].loc[3]

In [0]:
# lets split the data into train and test
# train ==> 4 splits , test ==> 1 split

tscv = TimeSeriesSplit(n_splits=5)

for train_index, test_index in tscv.split(rainfall_data_monthly.Time):
    y_train,y_test = rainfall_data_monthly['Value'][train_index],rainfall_data_monthly['Value'][test_index]
    x_train,x_test = rainfall_data_monthly[['Value_s_1','Value_s_2','Value_d_1']].loc[train_index],rainfall_data_monthly[['Value_s_1','Value_s_2','Value_d_1']].loc[test_index]

In [0]:
x_train = x_train[3:]
y_train = y_train[3:]

In [0]:
#x_train = x_train.values.reshape(-1,1)
#x_test = x_test.values.reshape(-1,1)

In [0]:
#y_train

In [0]:
# Scaling Y

scaler = MinMaxScaler(feature_range=(0,1))

x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)


In [0]:
y_train = y_train.values.reshape(-1,1)
y_test = y_test.values.reshape(-1,1)
y_train = scaler.fit_transform(y_train)
y_test = scaler.fit_transform(y_test)


In [0]:
mlp_model = Sequential()
mlp_model.add(Dense(100, activation='sigmoid', input_dim=x_train.shape[1]))
mlp_model.add(Dense(100,activation='sigmoid'))
mlp_model.add(Dropout(0.2))
mlp_model.add(Dense(1,activation='sigmoid'))
opt = SGD(lr=0.001)
mlp_model.compile(loss='mean_squared_error', optimizer='adam')

In [0]:
mlp_model.fit(x_train,y_train,nb_epoch=50, batch_size=50, validation_split=0.2)

In [0]:
preds = mlp_model.predict(x_test)

In [0]:
plt.figure(figsize=(10, 5.5))
plt.plot(preds,linestyle='-', marker='*',color='b')
plt.plot(y_test,linestyle='-', marker='.',color='r')
plt.legend(['Predicted','Actual'], loc=2)
plt.title('Actual vs Predicted Rainfall')
plt.ylabel('rainfall in mm')
plt.xlabel('Index')
plt.savefig('rain_fall_mlp', format='png', dpi=300)

In [0]:
from sklearn.metrics import r2_score

r2_score(y_test,preds)