In [22]:
import numpy as np
import pickle

from numpy.random import seed
seed(1)
import tensorflow as tf

def acoplarMatricesenDia(matricesparaAcoplar, dias):
    numeroMatrices = len(matricesparaAcoplar)
    rows_columns = len(matricesparaAcoplar[0])
    matricesAcopladas = []
    matrizTemporal = np.zeros(shape=(rows_columns, rows_columns))
    for i in range(numeroMatrices-dias):
        for j in range(dias):
            matrizTemporal += matricesparaAcoplar[i+j]
        matricesAcopladas.append(matrizTemporal)
        matrizTemporal = np.zeros(shape=(rows_columns, rows_columns))
    return matricesAcopladas

def acoplarDias(diasparaAcoplar, dias):
    numeroDias = len(diasparaAcoplar)
    diasAcoplados = []
    acopladorTemporal = ""
    for i in range(numeroDias-dias):
        for j in range(dias):
            acopladorTemporal += " "+ diasparaAcoplar[i+j]
        diasAcoplados.append(acopladorTemporal)
        acopladorTemporal = ""
    return diasAcoplados

def createSetBFF_toX(Matrices, Days, twdaysbefore, twdaysafter):
    X = []
    Y = []
    XD = []
    YD = []
    n_matrix = len(Matrices)
    for i in range(n_matrix):
        if (i<n_matrix-twdaysbefore-twdaysafter):
            XD.append([])
            YD.append([])
            for j in range(twdaysbefore):
                X.append(Matrices[i + j])
                XD[i].append(Days[i + j])
            for j in range(twdaysafter):
                Y.append(Matrices[i + twdaysbefore + j])
                YD[i].append(Days[i + twdaysbefore + j])
    return (X, XD, Y, YD)

def createTrainingTest(Matrices, Days, percentage, daysBefore, daysAfter):
    rows_columns = Matrices[0].shape[0]
    division = round(len(Matrices) * percentage)
    training = Matrices[:division]
    test = Matrices[division:]
    dtr = Days[:division]
    dte = Days[division:]
    X_training, XD_training, Y_training, YD_training = createSetBFF_toX(training, dtr, daysBefore, daysAfter)
    X_test, XD_test, Y_test, YD_test = createSetBFF_toX(test, dte, daysBefore, daysAfter)
    X_training = np.array(X_training).reshape(-1,daysBefore,rows_columns,rows_columns, 1)
    Y_training = np.array(Y_training).reshape(-1,rows_columns * rows_columns)
    X_test = np.array(X_test).reshape(-1,daysBefore,rows_columns,rows_columns, 1)
    Y_test = np.array(Y_test).reshape(-1,rows_columns * rows_columns)
    return (X_training, XD_training, Y_training, YD_training, X_test, XD_test, Y_test,  YD_test)

import logging
logger = tf.get_logger()
logger.setLevel(logging.ERROR)
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense, SimpleRNN
from tensorflow.keras.optimizers import Adam

def createCLSTM(features, labels, n_convolutions = 16, kernel_conv = 2, kernel_pool = 2, lstm_size = 2, output = 'sigmoid'):
    tf.random.set_seed(4)
    input_nn = Input(features.shape[1:])
    conv1 = TimeDistributed(Conv2D(n_convolutions, (kernel_conv,kernel_conv), padding = 'same', activation='linear'))(input_nn)
    maxp1 = TimeDistributed(MaxPooling2D((kernel_pool, kernel_pool), padding='same'))(conv1)
    flatt1 = TimeDistributed(Flatten())(maxp1)
    lstm1 = LSTM(lstm_size, activation='linear', return_sequences=False)(flatt1)
    output_nn = Dense(labels.shape[1], activation=output)(lstm1)    
    CLSTM = Model(inputs=input_nn, outputs=output_nn)
    #CLSTM.summary()
    return CLSTM

import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import accuracy_score, roc_curve, precision_recall_curve, auc

def makeBinaryMetric(expected, predicted, setName=' ', debug = True, savefigure = False, filename = ''):
    f_pred = predicted.flatten()
    f_real = expected.flatten()
    i = 0.5
    best_acc = 0
    p = np.copy(f_pred)
    r = np.copy(f_real)
    p[p>=round(i,1)] = 1
    p[p<round(i,1)] = 0
    acc = accuracy_score(r, p)
    if(acc > best_acc):
        best_acc = acc
        best_p = np.copy(p)
        best_r = np.copy(r)
        best_i = round(i,1)
    fpr, tpr, threshold = roc_curve(best_r, best_p)
    precision, recall, thresholds = precision_recall_curve(best_r, best_p)
    roc_auc = auc(fpr, tpr)
    pr_auc = auc(recall, precision)
    if debug:
        if savefigure:
            plot_ROC(fpr, tpr, roc_auc, setName, savefigure, filename)
            plot_PR(recall, precision, pr_auc, setName, savefigure, filename)
        else:
            plot_ROC(fpr, tpr, roc_auc, setName)
            plot_PR(recall, precision, pr_auc, setName)
    return (best_acc, roc_auc, pr_auc, best_p, best_r, best_i)

def plot_ROC(fpr, tpr, roc_auc, setName, savefigure = False, filename = ''):
    f = plt.figure()
    plt.title('Receiver Operating Characteristic {}'.format(setName))
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.plot([0, 1], [0, 1],'r--', label = 'Low performance')
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 1.01])
    plt.plot(fpr, tpr, 'tab:red', label = 'AUC = %0.4f' % roc_auc)
    plt.legend(loc = 'lower right')
    plt.tight_layout()
    if savefigure:
        f.savefig('../reports/figures/roc_auc_{}.pdf'.format(filename), bbox_inches='tight')
    plt.show()

def plot_PR(recall, precision, pr_auc, setName, savefigure = False, filename = ''):
    f = plt.figure()
    plt.title('Precision-Recall {}'.format(setName))
    plt.ylabel('Precision')
    plt.xlabel('Recall')
    plt.plot([0, 1], [0.1, 0.1],'r--', label = 'Low performance')
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 1.01])
    plt.plot(recall, precision, 'tab:red', label = 'AUC = %0.4f' % pr_auc)
    plt.legend(loc = 'lower left')
    plt.tight_layout()
    if savefigure:
        f.savefig('../reports/figures/pr_auc_{}.pdf'.format(filename), bbox_inches='tight')
    plt.show()
    
def train_test_a_Model(Model_creator, Model_Name, inputRed, inputDias):
    for i in range(1,8):
        x_tr, xd_tr, y_tr, yd_tr, x_te, xd_te, y_te, yd_te = createTrainingTest(inputRed, inputDias, 0.7, i, 1)
        y_tr[y_tr >= 1] = 1
        y_te[y_te >= 1] = 1

        print(f'Modelo {Model_Name}: inputs matrices= {i}')
        model = Model_creator(x_tr,y_tr)
        model.compile(loss='binary_crossentropy',optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False),metrics=['BinaryAccuracy'])
        epochs = 100 
        batch_size = 32
        history = model.fit(x_tr, y_tr, batch_size=batch_size, epochs=epochs, verbose=0,validation_data=(x_te, y_te))
        dg_tr = model.predict(x_tr)
        dg_te = model.predict(x_te)

        best_acc, roc_auc, pr_auc, best_ptr, best_rtr, best_i = makeBinaryMetric(y_tr, dg_tr, '', False)
        print('Training -> Accuracy = {}, Roc_auc = {}, Pr_auc = {}, b_i = {}'.format(best_acc, roc_auc, pr_auc,best_i))
        best_acc, roc_auc, pr_auc, best_pte, best_rte, best_i = makeBinaryMetric(y_te, dg_te, '', False)
        print('Test -> Accuracy = {}, Roc_auc = {}, Pr_auc = {}, b_i = {} \n'.format(best_acc, roc_auc, pr_auc,best_i))
        print(f'{i} & {best_acc:.4f} & {roc_auc:.4f} & {pr_auc:.4f} \\\\ \hline')
        print('')

In [2]:
#Se filtra desde 2016 hasta 2018
with open('../../data/ALLDAYS.pickle', 'rb') as f:
    ALLDAYS = pickle.load(f)
with open('../../data/MB8_ROBBERYSTREET.pickle', 'rb') as f:
    MB8_ROBBERYSTREET = pickle.load(f)
with open('../../data/MB8_LARCENY.pickle', 'rb') as f:
    MB8_LARCENY = pickle.load(f)    

MB8_ROBBERYSTREET = MB8_ROBBERYSTREET[365*2:((365*5)+1)]
MB8_LARCENY = MB8_LARCENY[365*2:((365*5)+1)]
ALLDAYS = ALLDAYS[365*2:((365*5)+1)]

MB8_ROBBERYSTREET = np.array(MB8_ROBBERYSTREET)
MB8_LARCENY = np.array(MB8_LARCENY)

In [3]:
inputsRedes_Larceny = []
inputsRedes_RobberyStreet = []
inputsRedes_Dias =[]
for dia in range(1,6):
    inputsRedes_Larceny.append(acoplarMatricesenDia(MB8_LARCENY, dia))
    inputsRedes_RobberyStreet.append(acoplarMatricesenDia(MB8_ROBBERYSTREET, dia))
    inputsRedes_Dias.append(acoplarDias(ALLDAYS,dia))

In [12]:
print('Larceny')
train_test_a_Model(createCLSTM, 'CLSTM', inputsRedes_Larceny[0], inputsRedes_Dias[0])

Larceny
Modelo CLSTM: inputs matrices= 1
Training -> Accuracy = 0.771842277486911, Roc_auc = 0.6108805811251234, Pr_auc = 0.54325115344932, b_i = 0.5
Test -> Accuracy = 0.7634747706422018, Roc_auc = 0.6074610679205859, Pr_auc = 0.5479646112400696, b_i = 0.5 

1 & 0.7635 & 0.6075 & 0.5480 \\ \hline

Modelo CLSTM: inputs matrices= 2
Training -> Accuracy = 0.7735910878112713, Roc_auc = 0.6219705139800243, Pr_auc = 0.5514424330398013, b_i = 0.5
Test -> Accuracy = 0.761407208588957, Roc_auc = 0.6145763368395717, Pr_auc = 0.5461305730640985, b_i = 0.5 

2 & 0.7614 & 0.6146 & 0.5461 \\ \hline

Modelo CLSTM: inputs matrices= 3
Training -> Accuracy = 0.7740116469816273, Roc_auc = 0.6217599407191495, Pr_auc = 0.5524455888424269, b_i = 0.5
Test -> Accuracy = 0.7588942307692308, Roc_auc = 0.6118474490690659, Pr_auc = 0.5402256454626042, b_i = 0.5 

3 & 0.7589 & 0.6118 & 0.5402 \\ \hline

Modelo CLSTM: inputs matrices= 4
Training -> Accuracy = 0.7750698094612353, Roc_auc = 0.6229210529189906, Pr_au

In [13]:
print('Robbery Street')
train_test_a_Model(createCLSTM, 'CLSTM', inputsRedes_RobberyStreet[0], inputsRedes_Dias[0])

Robbery Street
Modelo CLSTM: inputs matrices= 1
Training -> Accuracy = 0.8916066753926701, Roc_auc = 0.6038763829122894, Pr_auc = 0.44056547838861215, b_i = 0.5
Test -> Accuracy = 0.8911028287461774, Roc_auc = 0.5924126080831698, Pr_auc = 0.3914324287347706, b_i = 0.5 

1 & 0.8911 & 0.5924 & 0.3914 \\ \hline

Modelo CLSTM: inputs matrices= 2
Training -> Accuracy = 0.8924475753604194, Roc_auc = 0.6043238172555447, Pr_auc = 0.445718715147417, b_i = 0.5
Test -> Accuracy = 0.8917753067484663, Roc_auc = 0.5916335852235088, Pr_auc = 0.3921883903369212, b_i = 0.5 

2 & 0.8918 & 0.5916 & 0.3922 \\ \hline

Modelo CLSTM: inputs matrices= 3
Training -> Accuracy = 0.8925114829396326, Roc_auc = 0.6154976242984573, Pr_auc = 0.45344064484742685, b_i = 0.5
Test -> Accuracy = 0.8908653846153847, Roc_auc = 0.6018033272090538, Pr_auc = 0.3996513095975943, b_i = 0.5 

3 & 0.8909 & 0.6018 & 0.3997 \\ \hline

Modelo CLSTM: inputs matrices= 4
Training -> Accuracy = 0.8928219448094612, Roc_auc = 0.61892072440