In [11]:
import json
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
import matplotlib.axes._axes as axes
from statistics import stdev, mean
sns.set()

In [12]:
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Activation, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import RMSprop, SGD, Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras import backend as K
from sklearn.model_selection import train_test_split
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.callbacks import LearningRateScheduler

In [13]:
total_iter = 100
epoch_no = 300
initial_rate = 1e-3

def poly_decay(epoch):
    max_epochs = epoch_no
    baseLR = initial_rate
    power = 1.0

    alpha = baseLR * (1 - (epoch / float(max_epochs))) ** power
    return alpha

In [14]:
data = pd.read_csv(filepath_or_buffer='bangl40kmdir.csv')
print(f"Shape of data: {data.shape}")
print(f"Head of data: .....")
print(data.head())

Shape of data: (307, 18)
Head of data: .....
    AREA  PERIMETER  CELL_ID     Lon     Lat  cdirjan  cdirfeb  cdirmar  \
0  0.127      1.427   287108  88.394  27.461     4954     3491     3231   
1  0.126      1.426   288108  88.024  27.392     5138     3858     3814   
2  0.126      1.423   285107  89.207  27.262     5676     4556     4011   
3  0.126      1.421   286107  88.838  27.198     5518     4099     3790   
4  0.125      1.420   287107  88.469  27.133     5128     4063     3735   

   cdirapr  cdirmay  cdirjun  cdirjul  cdiraug  cdirsep  cdiroct  cdirnov  \
0     3306     2959     2053     1521     1750     1890     5114     4981   
1     4181     3732     2263     1478     1702     2319     5631     5298   
2     4070     3472     2124     1540     1786     2195     6022     5906   
3     3727     2930     2110     1310     1858     1944     5508     5646   
4     4206     3610     2553     1586     2197     2640     6258     5451   

   cdirdec  cdirann  
0     5233     3373

In [15]:
# drop the cell column
data.drop(axis=1, columns=['CELL_ID', 'cdirann'], inplace=True)
print(f"Dataset after column dropped:.....")
print(data.head())

Dataset after column dropped:.....
    AREA  PERIMETER     Lon     Lat  cdirjan  cdirfeb  cdirmar  cdirapr  \
0  0.127      1.427  88.394  27.461     4954     3491     3231     3306   
1  0.126      1.426  88.024  27.392     5138     3858     3814     4181   
2  0.126      1.423  89.207  27.262     5676     4556     4011     4070   
3  0.126      1.421  88.838  27.198     5518     4099     3790     3727   
4  0.125      1.420  88.469  27.133     5128     4063     3735     4206   

   cdirmay  cdirjun  cdirjul  cdiraug  cdirsep  cdiroct  cdirnov  cdirdec  
0     2959     2053     1521     1750     1890     5114     4981     5233  
1     3732     2263     1478     1702     2319     5631     5298     5473  
2     3472     2124     1540     1786     2195     6022     5906     6048  
3     2930     2110     1310     1858     1944     5508     5646     5725  
4     3610     2553     1586     2197     2640     6258     5451     5740  


In [16]:
scaler = StandardScaler()
transformed_data = scaler.fit_transform(data)

In [17]:
transformed_data = pd.DataFrame(data=transformed_data,
                                columns=data.columns)
print(f"Shape of Transformed Data: {transformed_data.shape}")
transformed_data.head()

Shape of Transformed Data: (307, 16)


Unnamed: 0,AREA,PERIMETER,Lon,Lat,cdirjan,cdirfeb,cdirmar,cdirapr,cdirmay,cdirjun,cdirjul,cdiraug,cdirsep,cdiroct,cdirnov,cdirdec
0,2.220518,2.139975,-1.181275,2.092918,-0.440599,-1.090284,-1.469974,-0.706887,-0.724947,1.301844,2.439707,0.568503,0.081891,0.378378,0.131528,-0.595965
1,2.061056,2.113542,-1.397072,2.055,-0.229423,-0.668575,-0.527803,0.950101,0.712523,1.903646,2.220726,0.400109,1.605891,1.696027,0.860217,-0.2836
2,2.061056,2.034242,-0.707106,1.983559,0.388037,0.133476,-0.209437,0.739901,0.229028,1.50531,2.536466,0.694799,1.165388,2.692546,2.257829,0.464774
3,2.061056,1.981376,-0.922319,1.948389,0.206701,-0.391649,-0.566589,0.090361,-0.778875,1.46519,1.365174,0.947391,0.273724,1.382544,1.660166,0.044383
4,1.901595,1.954942,-1.137533,1.912669,-0.2409,-0.433016,-0.655473,0.997444,0.485652,2.734708,2.770724,2.136678,2.746227,3.294026,1.211919,0.063906


In [18]:
x_labels = ['AREA', 'PERIMETER', 'Lon', 'Lat']
x = transformed_data[x_labels]
y = transformed_data.drop(x_labels, axis=1)

In [19]:
# do the data split
def get_data(testSize):
    trainx, testx, trainy, testy = train_test_split(x, y, test_size=testSize)
    trainx = np.array(trainx)
    testx = np.array(testx)
    trainy = np.array(trainy)
    testy = np.array(testy)

    return trainx, testx, trainy, testy

In [20]:
def coeff_determination(y_true, y_pred):
    SS_res =  K.sum(K.square( y_true-y_pred )) 
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) ) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [21]:
def prediction_model(trainxShape, trainyShape):

    model = Sequential()

    model.add(Dense(512, input_dim=trainxShape, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(trainyShape))

    opt = Adam(lr=initial_rate)
    model.compile(loss='mse',
                  optimizer=opt,
                  metrics=[RootMeanSquaredError(), coeff_determination])

    return model

In [22]:
best_r2_history = []
best_rmse_history = []


for iteration in range(1,total_iter+1):

    print(f"Model Fitting Started for Iteration: {iteration}")
    trainx, testx, trainy, testy = get_data(testSize=0.3)
    model = prediction_model(trainx.shape[1], trainy.shape[1])

    H = model.fit(trainx, trainy,
              validation_data=(testx, testy),
              epochs=epoch_no,
              verbose=0,
              callbacks=[LearningRateScheduler(poly_decay)])

    print(f"Plotting and storing performance curves.....")
    epochs = range(1,epoch_no+1)
    train_loss = H.history['loss']
    train_rmse = H.history['root_mean_squared_error']

    val_loss = H.history['val_loss']
    val_rmse = H.history['val_root_mean_squared_error']
    val_R2 = H.history['val_coeff_determination']

    # store the best R2
    best_r2_history.append(float(val_R2[-1]))
    best_rmse_history.append(float(val_rmse[-1]))

    plot_df = pd.DataFrame(data=np.c_[epochs,train_loss,train_rmse,val_loss,
                                      val_rmse, val_R2],
                           columns=['epochs','train_loss', 'train_rmse',
                                    'val_loss', 'val_rmse', 'val_R2'])

    sns.set(font_scale=1)
    f, ax = plt.subplots(1, 1, figsize=(18,10))
    sns.lineplot(data=plot_df, x='epochs', y='train_rmse',
                 ax=ax, label='train_rmse', linewidth=3)
    sns.lineplot(data=plot_df, x='epochs', y='val_rmse',
                 ax=ax, label='val_rmse', linewidth=3)
    sns.lineplot(data=plot_df, x='epochs', y='val_R2',
                 ax=ax, label='R2_score', linewidth=3)

    ax.set_ylabel('RMSE/R2')
    ax.set_xlabel('Epochs')
    ax.set_title(f"Performance Curves of Iteration: {iteration}")
    plt.setp(ax.get_legend().get_texts(), fontsize='18')# for legend text
    plt.savefig(f"output/curve_iter_{iteration}");
    plt.close(fig=f)

    print("=" * 50)

print(f"Serialising output as JSON.....")
out_dict = {
    'r2': best_r2_history,
    'rmse': best_rmse_history
}

f = open('output/out_dict.json', 'w')
f.write(json.dumps(out_dict))
f.close()

Model Fitting Started for Iteration: 1
Plotting and storing performance curves.....
Model Fitting Started for Iteration: 2
Plotting and storing performance curves.....
Model Fitting Started for Iteration: 3
Plotting and storing performance curves.....
Model Fitting Started for Iteration: 4
Plotting and storing performance curves.....
Model Fitting Started for Iteration: 5
Plotting and storing performance curves.....
Model Fitting Started for Iteration: 6
Plotting and storing performance curves.....
Model Fitting Started for Iteration: 7
Plotting and storing performance curves.....
Model Fitting Started for Iteration: 8
Plotting and storing performance curves.....
Model Fitting Started for Iteration: 9
Plotting and storing performance curves.....
Model Fitting Started for Iteration: 10
Plotting and storing performance curves.....
Model Fitting Started for Iteration: 11
Plotting and storing performance curves.....
Model Fitting Started for Iteration: 12
Plotting and storing performance c

In [24]:
# print the avg and the std dev of the r2 scores and validation rmse
print(f"R2 Score statistics.....")
print("Mean R2 Score: ", mean(best_r2_history))
print(f"STD_Dev of R2 Score: ", stdev(best_r2_history))

print(f"RMSE statistics.....")
print("Mean RMSE: ", mean(best_rmse_history))
print(f"STD_Dev of RMSE", stdev(best_rmse_history))


R2 Score statistics.....
Mean R2 Score:  0.8219685661792755
STD_Dev of R2 Score:  0.029001416553739348
RMSE statistics.....
Mean RMSE:  0.41508217453956603
STD_Dev of RMSE 0.03207919231549063


In [199]:
#2.220518	2.139975	-1.181275	2.092918

In [200]:
sample_input = np.array([[2.220518, 2.139975, -1.181275, 2.092918]])

In [None]:
sample_op = model.predict(sample_input)
sample_op

In [None]:
sample_data = np.hstack((sample_input, sample_op))
sample_data

In [205]:
scaler.inverse_transform(sample_data)

array([[1.26999998e-01, 1.42700000e+00, 8.83940006e+01, 2.74610000e+01,
        4.98678980e+03, 3.92071968e+03, 3.59750531e+03, 4.02984843e+03,
        3.32738933e+03, 2.30557796e+03, 1.44067773e+03, 1.81906735e+03,
        2.31034458e+03, 5.77743285e+03, 5.17155548e+03, 5.44689920e+03]])

In [None]:
model = Sequential()
model.add(Dense(128, input_dim=trainx.shape[1], activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(trainy.shape[1]))

opt = RMSprop(lr=0.001)
model.compile(loss='mse',
              optimizer=opt,
              metrics=['mae', 'mse', coeff_determination])

In [None]:
model = Sequential()
model.add(Dense(128, input_dim=trainx.shape[1], activation='relu'))
#model.add(Dense(256, activation='relu'))
#model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(trainy.shape[1]))