In [1]:
import numpy as np
import pickle

from numpy.random import seed
seed(1)
import tensorflow as tf

def acoplarMatricesenDia(matricesparaAcoplar, dias):
    numeroMatrices = len(matricesparaAcoplar)
    rows_columns = len(matricesparaAcoplar[0])
    matricesAcopladas = []
    matrizTemporal = np.zeros(shape=(rows_columns, rows_columns))
    for i in range(numeroMatrices-dias):
        for j in range(dias):
            matrizTemporal += matricesparaAcoplar[i+j]
        matricesAcopladas.append(matrizTemporal)
        matrizTemporal = np.zeros(shape=(rows_columns, rows_columns))
    return matricesAcopladas

def acoplarDias(diasparaAcoplar, dias):
    numeroDias = len(diasparaAcoplar)
    diasAcoplados = []
    acopladorTemporal = ""
    for i in range(numeroDias-dias):
        for j in range(dias):
            acopladorTemporal += " "+ diasparaAcoplar[i+j]
        diasAcoplados.append(acopladorTemporal)
        acopladorTemporal = ""
    return diasAcoplados

def createSetBFF_toX(Matrices, Days, twdaysbefore, twdaysafter):
    X = []
    Y = []
    XD = []
    YD = []
    n_matrix = len(Matrices)
    for i in range(n_matrix):
        if (i<n_matrix-twdaysbefore-twdaysafter):
            XD.append([])
            YD.append([])
            for j in range(twdaysbefore):
                X.append(Matrices[i + j])
                XD[i].append(Days[i + j])
            for j in range(twdaysafter):
                Y.append(Matrices[i + twdaysbefore + j])
                YD[i].append(Days[i + twdaysbefore + j])
    return (X, XD, Y, YD)

def createTrainingTest(Matrices, Days, percentage, daysBefore, daysAfter):
    rows_columns = Matrices[0].shape[0]
    division = round(len(Matrices) * percentage)
    training = Matrices[:division]
    test = Matrices[division:]
    dtr = Days[:division]
    dte = Days[division:]
    X_training, XD_training, Y_training, YD_training = createSetBFF_toX(training, dtr, daysBefore, daysAfter)
    X_test, XD_test, Y_test, YD_test = createSetBFF_toX(test, dte, daysBefore, daysAfter)
    X_training = np.array(X_training).reshape(-1,daysBefore,rows_columns,rows_columns, 1)
    Y_training = np.array(Y_training).reshape(-1,rows_columns * rows_columns)
    X_test = np.array(X_test).reshape(-1,daysBefore,rows_columns,rows_columns, 1)
    Y_test = np.array(Y_test).reshape(-1,rows_columns * rows_columns)
    return (X_training, XD_training, Y_training, YD_training, X_test, XD_test, Y_test,  YD_test)

import logging
logger = tf.get_logger()
logger.setLevel(logging.ERROR)
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense, SimpleRNN
from tensorflow.keras.optimizers import Adam

def createCLSTM(features, labels, n_convolutions = 16, kernel_conv = 2, kernel_pool = 2, lstm_size = 2, output = 'sigmoid'):
    tf.random.set_seed(4)
    input_nn = Input(features.shape[1:])
    conv1 = TimeDistributed(Conv2D(n_convolutions, (kernel_conv,kernel_conv), padding = 'same', activation='linear'))(input_nn)
    maxp1 = TimeDistributed(MaxPooling2D((kernel_pool, kernel_pool), padding='same'))(conv1)
    flatt1 = TimeDistributed(Flatten())(maxp1)
    lstm1 = LSTM(lstm_size, activation='linear', return_sequences=False)(flatt1)
    output_nn = Dense(labels.shape[1], activation=output)(lstm1)    
    CLSTM = Model(inputs=input_nn, outputs=output_nn)
    #CLSTM.summary()
    return CLSTM

def createRNNCNN(features, labels, n_convolutions = 16, kernel_conv = 2, kernel_pool = 2, lstm_size = 2, output = 'sigmoid'):
    tf.random.set_seed(4)
    input_nn = Input(features.shape[1:])
    conv1 = TimeDistributed(Conv2D(n_convolutions, (kernel_conv,kernel_conv), padding = 'same', activation='linear'))(input_nn)
    maxp1 = TimeDistributed(MaxPooling2D((kernel_pool, kernel_pool), padding='same'))(conv1)
    flatt1 = TimeDistributed(Flatten())(maxp1)
    lstm1 = SimpleRNN(lstm_size, activation='linear', return_sequences=False)(flatt1)
    output_nn = Dense(labels.shape[1], activation=output)(lstm1)    
    CLSTM = Model(inputs=input_nn, outputs=output_nn)
    #CLSTM.summary()
    return CLSTM

def createLSTM(X_training,Y_training, lstm_size = 2, output = 'sigmoid'):
    tf.random.set_seed(4)
    input_nn = Input(X_training.shape[1:])
    flatt1 = TimeDistributed(Flatten())(input_nn)
    lstm1 = LSTM(lstm_size, activation='linear', return_sequences=False)(flatt1)
    output_nn = Dense(Y_training.shape[1], activation=output)(lstm1)    
    only_LSTM = Model(inputs=input_nn, outputs=output_nn)
    #only_LSTM.summary()
    return only_LSTM

import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import accuracy_score, roc_curve, precision_recall_curve, auc

def makeBinaryMetric(expected, predicted, setName=' ', debug = True, savefigure = False, filename = ''):
    f_pred = predicted.flatten()
    f_real = expected.flatten()
    i = 0.5
    best_acc = 0
    p = np.copy(f_pred)
    r = np.copy(f_real)
    p[p>=round(i,1)] = 1
    p[p<round(i,1)] = 0
    acc = accuracy_score(r, p)
    if(acc > best_acc):
        best_acc = acc
        best_p = np.copy(p)
        best_r = np.copy(r)
        best_i = round(i,1)
    fpr, tpr, threshold = roc_curve(best_r, best_p)
    precision, recall, thresholds = precision_recall_curve(best_r, best_p)
    roc_auc = auc(fpr, tpr)
    pr_auc = auc(recall, precision)
    if debug:
        if savefigure:
            plot_ROC(fpr, tpr, roc_auc, setName, savefigure, filename)
            plot_PR(recall, precision, pr_auc, setName, savefigure, filename)
        else:
            plot_ROC(fpr, tpr, roc_auc, setName)
            plot_PR(recall, precision, pr_auc, setName)
    return (best_acc, roc_auc, pr_auc, best_p, best_r, best_i)

def plot_ROC(fpr, tpr, roc_auc, setName, savefigure = False, filename = ''):
    f = plt.figure()
    plt.title('Receiver Operating Characteristic {}'.format(setName))
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.plot([0, 1], [0, 1],'r--', label = 'Low performance')
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 1.01])
    plt.plot(fpr, tpr, 'tab:red', label = 'AUC = %0.4f' % roc_auc)
    plt.legend(loc = 'lower right')
    plt.tight_layout()
    if savefigure:
        f.savefig('../reports/figures/roc_auc_{}.pdf'.format(filename), bbox_inches='tight')
    plt.show()

def plot_PR(recall, precision, pr_auc, setName, savefigure = False, filename = ''):
    f = plt.figure()
    plt.title('Precision-Recall {}'.format(setName))
    plt.ylabel('Precision')
    plt.xlabel('Recall')
    plt.plot([0, 1], [0.1, 0.1],'r--', label = 'Low performance')
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 1.01])
    plt.plot(recall, precision, 'tab:red', label = 'AUC = %0.4f' % pr_auc)
    plt.legend(loc = 'lower left')
    plt.tight_layout()
    if savefigure:
        f.savefig('../reports/figures/pr_auc_{}.pdf'.format(filename), bbox_inches='tight')
    plt.show()

def train_test_a_Model(Model_creator, Model_Name, inputRed, inputDias):
    for matricesAcopladaspor in range(len(inputRed)):
        x_tr, xd_tr, y_tr, yd_tr, x_te, xd_te, y_te, yd_te = createTrainingTest(inputRed[matricesAcopladaspor], inputDias[matricesAcopladaspor], 0.7, 5, 1)
        y_tr[y_tr >= 1] = 1
        y_te[y_te >= 1] = 1

        print(f'Modelo {Model_Name}: matrices acopladas por {matricesAcopladaspor + 1} dias')
        model = Model_creator(x_tr,y_tr)
        model.compile(loss='binary_crossentropy',optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False),metrics=['BinaryAccuracy'])
        epochs = 100 
        batch_size = 32
        history = model.fit(x_tr, y_tr, batch_size=batch_size, epochs=epochs, verbose=0,validation_data=(x_te, y_te))
        dg_tr = model.predict(x_tr)
        dg_te = model.predict(x_te)

        best_acc, roc_auc, pr_auc, best_ptr, best_rtr, best_i = makeBinaryMetric(y_tr, dg_tr, '', False)
        print('Training -> Accuracy = {}, Roc_auc = {}, Pr_auc = {}, b_i = {}'.format(best_acc, roc_auc, pr_auc,best_i))
        best_acc, roc_auc, pr_auc, best_pte, best_rte, best_i = makeBinaryMetric(y_te, dg_te, '', False)
        print('Test -> Accuracy = {}, Roc_auc = {}, Pr_auc = {}, b_i = {} \n'.format(best_acc, roc_auc, pr_auc,best_i))

In [3]:
#Se filtra desde 2016 hasta 2018
with open('../../data/ALLDAYS.pickle', 'rb') as f:
    ALLDAYS = pickle.load(f)
with open('../../data/MB16_ROBBERYSTREET.pickle', 'rb') as f:
    MB16_ROBBERYSTREET = pickle.load(f)
with open('../../data/MB16_LARCENY.pickle', 'rb') as f:
    MB16_LARCENY = pickle.load(f)    

MB16_ROBBERYSTREET = MB16_ROBBERYSTREET[365*2:((365*5)+1)]
MB16_LARCENY = MB16_LARCENY[365*2:((365*5)+1)]
ALLDAYS = ALLDAYS[365*2:((365*5)+1)]

MB16_ROBBERYSTREET = np.array(MB16_ROBBERYSTREET)
MB16_LARCENY = np.array(MB16_LARCENY)

In [4]:
inputsRedes_Larceny = []
inputsRedes_RobberyStreet = []
inputsRedes_Dias =[]
for dia in range(1,9):
    inputsRedes_Larceny.append(acoplarMatricesenDia(MB16_LARCENY, dia))
    inputsRedes_RobberyStreet.append(acoplarMatricesenDia(MB16_ROBBERYSTREET, dia))
    inputsRedes_Dias.append(acoplarDias(ALLDAYS,dia))

In [5]:
print('Larceny')
train_test_a_Model(createLSTM, 'LSTM', inputsRedes_Larceny, inputsRedes_Dias)
train_test_a_Model(createRNNCNN, 'RNN+CNN', inputsRedes_Larceny, inputsRedes_Dias)
train_test_a_Model(createCLSTM, 'CLSTM', inputsRedes_Larceny, inputsRedes_Dias)

Larceny
Modelo LSTM: matrices acopladas por 1 dias
Training -> Accuracy = 0.9207956414473685, Roc_auc = 0.5146747333482284, Pr_auc = 0.36838522661135537, b_i = 0.5
Test -> Accuracy = 0.9176422213622291, Roc_auc = 0.5133635291830467, Pr_auc = 0.34891176501712495, b_i = 0.5 

Modelo LSTM: matrices acopladas por 2 dias
Training -> Accuracy = 0.8596114309210526, Roc_auc = 0.5434057824949151, Pr_auc = 0.42989600735455225, b_i = 0.5
Test -> Accuracy = 0.8553595690993789, Roc_auc = 0.5423194710622425, Pr_auc = 0.42869147221825793, b_i = 0.5 

Modelo LSTM: matrices acopladas por 3 dias
Training -> Accuracy = 0.8181354990118577, Roc_auc = 0.6039446373491064, Pr_auc = 0.5142311426729369, b_i = 0.5
Test -> Accuracy = 0.8111655667701864, Roc_auc = 0.5996291376544781, Pr_auc = 0.5047408268627512, b_i = 0.5 

Modelo LSTM: matrices acopladas por 4 dias
Training -> Accuracy = 0.7906961164248021, Roc_auc = 0.6421302324943824, Pr_auc = 0.5837828519179977, b_i = 0.5
Test -> Accuracy = 0.7801217973602484,

In [6]:
print('Robbery Street')
train_test_a_Model(createLSTM, 'LSTM', inputsRedes_RobberyStreet, inputsRedes_Dias)
train_test_a_Model(createRNNCNN, 'RNN+CNN', inputsRedes_RobberyStreet, inputsRedes_Dias)
train_test_a_Model(createCLSTM, 'CLSTM', inputsRedes_RobberyStreet, inputsRedes_Dias)

Robbery Street
Modelo LSTM: matrices acopladas por 1 dias
Training -> Accuracy = 0.9659333881578948, Roc_auc = 0.5, Pr_auc = 0.5170333059210527, b_i = 0.5
Test -> Accuracy = 0.968483939628483, Roc_auc = 0.5, Pr_auc = 0.5157580301857585, b_i = 0.5 

Modelo LSTM: matrices acopladas por 2 dias
Training -> Accuracy = 0.9377878289473685, Roc_auc = 0.5311305226277886, Pr_auc = 0.35200105877708393, b_i = 0.5
Test -> Accuracy = 0.9411272321428571, Roc_auc = 0.5197443916366774, Pr_auc = 0.28382206060099535, b_i = 0.5 

Modelo LSTM: matrices acopladas por 3 dias
Training -> Accuracy = 0.9164453639657444, Roc_auc = 0.5756440823179336, Pr_auc = 0.42841341148857737, b_i = 0.5
Test -> Accuracy = 0.9195822010869565, Roc_auc = 0.5602045090649322, Pr_auc = 0.37847223789271006, b_i = 0.5 

Modelo LSTM: matrices acopladas por 4 dias
Training -> Accuracy = 0.9003597048153035, Roc_auc = 0.6257200960111502, Pr_auc = 0.49068618884921256, b_i = 0.5
Test -> Accuracy = 0.9029503105590062, Roc_auc = 0.6104161075