<a href="https://colab.research.google.com/github/saugatbh/RainfallDataAnalysis/blob/main/MonthlyCNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
#!pip install keras-tuner
import pandas as pd
import numpy as np
from glob import glob

from math import sqrt

from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense,LSTM
from keras.layers import Flatten, Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from matplotlib import pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

In [2]:
def compare(ytest,yhat):
  plt.plot(ytest, color = 'blue', label='True')
  plt.plot(yhat, color = 'red', label='Predicted')
  plt.legend()

In [3]:
def split_sequence(sequence, n_steps):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the sequence
		if end_ix > len(sequence)-1:
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return np.array(X), np.array(y)

In [4]:
def train_test_split(data, n_test, type='feat'):
    if type == 'feat':
        return data[:-n_test, :], data[-n_test:,:]
    if type == 'label':
        return data[:-n_test, :], data[-n_test:, :]

**MLP prediction**

In [26]:
def mlp_predict(fixed_grid_data):
  n_steps=70
  X, y = split_sequence(fixed_grid_data, n_steps)
  #print(X,y)
  #print(X.shape, y.shape)
  # reshape from [samples, timesteps] into [samples, timesteps, features]
  n_features = 1
  X = X.reshape((X.shape[0], X.shape[1]))
  Xtrain, Xtest = train_test_split(X, 16, type='feat')
  ytrain, ytest = train_test_split(y, 16, type='label')
  # define model
  model = Sequential()
  model.add(Dense(100, activation='relu',input_dim=(n_steps)))
  model.add(Dense(1))
  model.compile(optimizer='adam', loss='mse')
  # fit model
  model.fit(Xtrain, ytrain, epochs=500,batch_size=128, verbose=0)
  # demonstrate prediction
  yhat = model.predict(Xtest, verbose=0)
  #print([yhat, ytest])
  #print("rmse-",sqrt(mean_squared_error(ytest, yhat)))
  return sqrt(mean_squared_error(ytest, yhat))
#compare(ytest,yhat)

In [37]:
def create_cnn():
  # define model
  model = Sequential()
  model.add(Conv1D(150, 5, activation='relu',padding='same',strides=1, input_shape=(n_steps, n_features)))
  model.add(Conv1D(175, 5, activation='relu', padding='same', strides=1))
  model.add(MaxPooling1D(pool_size=2))
  model.add(Dropout(0.5))
  model.add(Flatten())
  model.add(Dense(120, activation='relu'))
  model.add(Dense(1))
  model.compile(optimizer='adam', loss='mse',metrics=['accuracy'])
  return model
  # fit model
  #model.fit(Xtrain, ytrain, epochs=1000,batch_size=128, verbose=0)
  # demonstrate prediction
  #yhat = model.predict(Xtest, verbose=0)
  #print(sqrt(mean_squared_error(ytest, yhat)))
  #return sqrt(mean_squared_error(ytest, yhat))

**Grid Search for CNN**

In [38]:
one_monthly_grid = normal_data[:,0,3,3].reshape(115,1)
# split into samples
n_steps=70
X, y = split_sequence(one_monthly_grid, n_steps)
#print(X,y)
print(X.shape, y.shape)
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1],n_features))
Xtrain, Xtest = train_test_split(X, 16, type='feat')
ytrain, ytest = train_test_split(y, 16, type='label')
seed = 7
np.random.seed(seed)
model = KerasClassifier(build_fn=create_cnn, verbose=0)
# define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100,128]
epochs = [10, 50, 100,500,1000]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(Xtrain, ytrain)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

(45, 70, 1) (45, 1)


KeyboardInterrupt: ignored

In [7]:
years = np.arange(1901, 2016)
months = ['Jan', 'Feb', 'March', 'April', 'May', 'June', 'July', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']

In [8]:
data = np.load('/content/drive/MyDrive/rainfall_data.npy')
#xaxis=np.loadtxt('xaxis.txt')
#yaxis=np.loadtxt('yaxis.txt')

**Extracting monthly data for Odisha**

In [9]:
reqdata = np.zeros((115,12,21,25))
normal_data = np.zeros_like(reqdata)
reqdata = data[:,:,44:65,60:85]
reqdata = np.where(reqdata<0,0,reqdata)
print(reqdata.shape)

for i in range(len(years)):
  for j in range(len(months)):
    normal_data[i,j,:,:] = preprocessing.normalize(reqdata[i,j,:,:])

(115, 12, 21, 25)


**Predicting for each grid monthly**

In [28]:
jan = normal_data[:,0,:,:]
print(jan.shape)
score = np.zeros((21,25))
for m in range(1):
  monthly_data = normal_data[:,m,:,:]
  for i in range(21):
    for j in range(25):
      grid=monthly_data[:,i,j].reshape(115,1)
      rmse=mlp_predict(grid)
      #rmse=cnn_predict(grid)
      score[i,j]=rmse
print(score)

[[0.29612238 0.31817845 0.28873418 0.30196713 0.29476253 0.49913375
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.        ]
 [0.19512165 0.26656224 0.35160862 0.2942181  0.24497185 0.38023265
  0.32999219 0.4703956  0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.        ]
 [0.23409347 0.23566087 0.22735604 0.25419752 0.27246304 0.22656853
  0.35563292 0.34174385 0.37215157 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.        ]
 [0.27872005 0.22213327 0.23804991 0.29267053 0.25502142 0.18489374
  0.34898438 0.35337724 0.29127598 0.4048212  0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.