# Classification Model

## importing libraries

In [1]:
import pandas as pd
import numpy as np
import pickle
import tensorflow as tf

## importing dataset

In [16]:
# import the dataset
df = pd.read_csv('../data/csv/2007to2009.csv')
df = df.set_index('index')

In [17]:
# df

In [18]:
# with open("../data/pkl/2010to2017.pkl", 'rb') as f:
#     data = pickle.load(f)

In [19]:
%run ../script/cleanup.py

In [20]:
# cleanup the data and add day and month columns
data = cleanup(df, onehot=False)

In [21]:
data

Unnamed: 0,minTemp,maxTemp,Eto_values,DayPrecip,WindSpd,DaySolRad,rain,stage,Month,Day
0,38.1,73.3,0.15,0.00,3.5,519.0,1.31,0.0,4,3
1,41.0,73.9,0.13,0.00,3.2,436.0,1.31,0.0,4,4
2,43.6,73.7,0.14,0.00,3.5,477.0,1.31,0.0,4,5
3,46.5,67.8,0.11,0.00,5.8,447.0,1.31,0.0,4,6
4,47.8,66.4,0.07,0.02,4.5,270.0,1.31,0.0,4,7
5,45.6,69.7,0.15,0.00,5.7,521.0,1.31,0.0,4,8
6,47.5,67.0,0.15,0.00,5.6,545.0,1.31,0.0,4,9
7,36.3,71.5,0.16,0.00,4.0,542.0,1.31,0.0,4,10
8,46.0,61.9,0.10,0.18,5.3,390.0,1.31,0.0,4,11
9,40.3,65.6,0.16,0.01,4.9,565.0,1.31,0.0,4,12


## selecting the right features

In [22]:
dataset=data[['maxTemp','Eto_values','rain','DaySolRad','stage']]

## Working with Sklearn

In [23]:
from sklearn.model_selection import train_test_split
# dataset
labels = dataset.pop('stage')
feat_data = dataset


In [24]:
X_train, X_test, y_train, y_test = train_test_split(feat_data, labels, test_size = 0.3, random_state = 101)

In [25]:
# normalization
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaled_x_train = scaler.fit_transform(X_train)
scaled_x_test = scaler.transform(X_test)

In [26]:
X_test

Unnamed: 0,maxTemp,Eto_values,rain,DaySolRad
12,74.6,0.17,1.31,565.0
19,62.0,0.15,1.31,585.0
133,84.7,0.20,0.00,642.0
211,68.8,0.18,0.43,627.0
317,91.5,0.21,1.31,577.0
206,79.5,0.24,0.43,667.0
315,87.7,0.20,1.31,572.0
383,78.3,0.17,0.03,584.0
91,81.3,0.22,0.02,701.0
430,95.2,0.20,0.00,549.0


## constracting the model using Keras in tensorflow

In [None]:
# from tensorflow.contrib.keras import models
# # add layers to the model
# from tensorflow.contrib.keras import layers

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
#creating models
# dnn_keras_model = models.Sequential()

In [None]:
# dnn_keras_model.add(layers.Dense(units = 13, activation='relu'))
# dnn_keras_model.add(layers.Dense(units= 13, activation = 'relu'))
# # last layer have to have an out units of 3 because we are predicting three class
# dnn_keras_model.add(layers.Dense(units=4, activation='softmax'))

In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(units=64, activation=tf.nn.relu, input_shape = [4]), # [len(scaled_x_train.keys())]),
        layers.Dense(units=128, activation=tf.nn.relu),
#         layers.Dense(units=256, activation=tf.nn.relu),
#         layers.Dense(units=128, activation=tf.nn.relu),
        layers.Dense(units=64, activation=tf.nn.relu),
        layers.Dense(units=4, activation=tf.nn.softmax)
    ])
    optimizer = tf.keras.optimizers.Adam(lr=0.01)
    model.compile(loss = 'sparse_categorical_crossentropy',
                  optimizer= optimizer,
#                   metrics=['mse'])
                 metrics=['accuracy'])
                  metrics= ['sparse_categorical_crossentropy'])
    return model

In [None]:
model = build_model()

In [None]:
model.summary()

In [None]:
# compile the model
# from tensorflow.keras import losses, optimizers, metrics, activations

In [None]:
# dnn_keras_model.compile(optimizer='adam',
#                        loss = 'sparse_categorical_crossentropy',
#                        metrics=['accuracy'])

### Training

In [None]:
class PrintDot(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch % 100 == 0: print('')
        print('.', end='')

In [None]:
# dnn_keras_model.fit(scaled_x_train, y_train, epochs=50)
EPOCHS = 10000
history = model.fit(scaled_x_train, y_train,
    epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[PrintDot()])

In [None]:
hist = pd.DataFrame(history.history)

In [None]:
hist['epoch'] = history.epoch

In [None]:
head = hist.tail()

In [None]:
head

## Writing plot function

In [None]:
import matplotlib.pyplot as plt

def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch
    
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Classification Error')
    plt.plot(hist['epoch'], hist['sparse_categorical_crossentropy'],
            label='Train Error')
    plt.plot(hist['epoch'], hist['val_sparse_categorical_crossentropy'],
            label='Val Error')
    plt.legend()
    plt.savefig('../image/modelerror.png')
    plt.ylim([0,5])
    
#     plt.figure()
#     plt.xlabel('Epoch')
#     plt.ylabel('Mean Square Error [Harverst^2]')
#     plt.plot(hist['epoch'], hist['mean_squared_error'],
#             label='Train Error')
#     plt.plot(hist['epoch'], hist['val_mean_squared_error'],
#             label='Val Error')
#     plt.legend()
#     plt.ylim([0,0.0625])

In [None]:
plot_history(history)


In [None]:
scores = model.evaluate(scaled_x_test, y_test)
print("Accuracy: %.2f%%"%(scores[1]*100))


In [None]:
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(scaled_x_train,y_train, epochs=EPOCHS, validation_split= 0.2, verbose=0, callbacks=[early_stop, PrintDot()])
plot_history(history)

In [None]:
scores = model.evaluate(scaled_x_test, y_test)
print("Accuracy: %.2f%%"%(scores[1]*100))

## Writing a predictive script

In [None]:
def get_prediction(inp):
    if(type(inp)==list):
        inp = np.array(inp)[np.newaxis]
        #         return
        # check the input to 4x1 array
        # value = (np.array(inp)[:,np.newaxis]).flatten()
        #print(value.shape)
        #(inp)[np.newaxis,:]
        # call model dot predict
    p = history.model.predict(inp)
    #print(p)
    # get the max value out of the prediction
    return np.argmax(p)
    # return the largest value
    

In [None]:
get_prediction( [0.80827887, 0.80769231, 0.        , 0.5748731 ])

In [None]:
scaled_x_test

In [None]:
test = np.array(inp)[np.newaxis]
test.shape

In [None]:
scaled_x_test.shape

In [None]:
X_test.head()

In [None]:
data.tail(10)