In [108]:
#Importing essentials libraries

import csv
import numpy
import matplotlib.pyplot as plt
import pandas
import math

import keras
from keras.models import Sequential
from keras.layers import Dense,Activation, Dropout
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler, StandardScaler, Normalizer, RobustScaler
from sklearn.metrics import mean_squared_error

import collections
import datetime
import scipy
from scipy import stats

import graphviz
import pydot
import plotly
import plotly.plotly as py
from plotly.graph_objs import Scatter, Heatmap, Layout
plotly.offline.init_notebook_mode(connected=True)

In [109]:
#Function for building the model

# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
  dataX, dataY = [], []
  for i in range(len(dataset)-look_back-1):
    a = dataset[i:(i+look_back), 0]
    dataX.append(a)
    dataY.append(dataset[i + look_back, 0])
  return numpy.array(dataX), numpy.array(dataY)


def remove_extreme_vals(keys,values):
    quartiles = numpy.percentile(values, [5,95])
    minVal = 40 #quartiles[0] #5% of the values are inferior or equal to his values.
    maxVal = 200 #quartiles[1] #95% of the values are inferior or equal to this value
    jours_x = []
    jours_y = []
    
    print(minVal)
    print(maxVal)
    
    for i in range(0, len(values) ):
        if minVal < values[i] < maxVal:
            jours_x.append(keys[i])
            jours_y.append(values[i])
            
    return jours_x,jours_y

def dividers(x):
    liste_dividers = []
    
    for i in range(1,x):
        if ( x%i == 0 ):
            liste_dividers.append(i)
            
    return liste_dividers

In [110]:
#Read dataset file
file = pandas.read_csv("../data/improved_spvm_2015-2019.csv")

In [111]:
#Display Dataset to see what we are working on
jours = file["JOUR"]
jours = list(jours)

#Sort the list to be able to see something.
jours = sorted(jours)

jours = collections.Counter(jours)

jours_x = list(jours.keys())
jours_y = list(jours.values())

#jours_x,jours_y = remove_extreme_vals( list(jours.keys()), list(jours.values()) )

plotly.offline.iplot({
    "data": [Scatter(x= jours_x, y= jours_y)],
    "layout": Layout(title="Crimes per day")
})


In [112]:
#Preparing the dataset : 70% trainset ; 30% test set.

#x = int(0.70*len(jours_x))
y = int(0.70*len(jours_y))

#scaler = RobustScaler()
scaler = MinMaxScaler(feature_range=(0,1))

x_train = jours_y[0:y]
x_test = jours_y[y+1:-1]

X = len(x_train)
Y = len(x_test)

x_train = numpy.asarray(x_train)
x_test = numpy.asarray(x_test)

x_train = x_train.reshape(X,1)
x_test = x_test.reshape(Y,1)

#Nu-Vac
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

print("Preparing dataset finished !")

Preparing dataset finished !



Data with input dtype int64 was converted to float64 by MinMaxScaler.



In [113]:
#Building the training models

divs =  dividers( len(x_train) )
look_back = divs[ int(len(divs)/2) ] #Numbers of days to look back and feed the model

trainX, trainY = create_dataset(x_train, look_back=look_back)
testX, testY = create_dataset(x_test, look_back=look_back)

trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))


tbCallBack = keras.callbacks.TensorBoard(log_dir='Graph', histogram_freq=0,  
          write_graph=True, write_images=True)

model = Sequential()

model.add(LSTM(16, input_shape=(1, look_back)))
model.add(Dense(1))
model.add(Activation("sigmoid"))

model.summary()

model.compile(loss='mean_squared_error', optimizer='adam',metrics=['mae'])
history = model.fit(trainX, trainY, epochs=100, batch_size=look_back, verbose=0,shuffle=True,callbacks=[tbCallBack])

trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_29 (LSTM)               (None, 16)                1472      
_________________________________________________________________
dense_16 (Dense)             (None, 1)                 17        
_________________________________________________________________
activation_3 (Activation)    (None, 1)                 0         
Total params: 1,489
Trainable params: 1,489
Non-trainable params: 0
_________________________________________________________________


In [114]:
#Training Metrics

# Create traces
trace0 = Scatter(
    y = history.history['loss'],
    mode = 'markers+lines',
    name = 'Train'
)
#trace1 = go.Scatter(
#    y = history.history['val_loss'],
#    mode = 'markers+lines',
#    name = 'Test'
#)

data = [trace0]

plotly.offline.iplot(data, filename='Loss+ValLoss')

trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:, 0]))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 11.96 RMSE
Test Score: 14.02 RMSE


In [115]:
#Showing Predictions



predictions = (scaler.inverse_transform(testPredict).flatten() / 100).round()
normal_vals = (scaler.inverse_transform(x_test)).flatten()
normal_vals = normal_vals[look_back:-1]

random_x = jours_x[y+1:-1]


# Create traces
trace_pred = Scatter(
    x = random_x,
    y = predictions.tolist(),
    mode = 'markers',
    name = 'Predictions'
)
trace_normal = Scatter(
    x = random_x,
    y = normal_vals.tolist(),
    mode = 'markers',
    name = 'Actual Values'
)

data = [trace_pred, trace_normal]

plotly.offline.iplot(data, filename='Res')

In [116]:
equals = numpy.intersect1d(predictions, normal_vals)
print(len(equals))
print(len(predictions))
print(len(normal_vals))
print(max(predictions))
print(max(normal_vals))

22
452
452
78.0
120.0
