In [299]:
# Montar/treinar o modelo e verificar a precisão

In [300]:
import tensorflow as tf
import keras
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from category_encoders import OneHotEncoder
from datetime import datetime


In [326]:
df = pd.read_csv("./data-fict/Encoded_Data_Ocorr.csv")
df.head()

Unnamed: 0,data_ocorrencia,hora_ocorrencia,regiao_administrativa,tipo_crime,sexo_vitima,idade_vitima,risco
0,2023-01-14,21:00,0,0,2,19,5.241323e-08
1,2023-01-20,02:00,0,0,2,17,5.241323e-08
2,2023-01-24,20:15,0,0,2,30,5.241323e-08
3,2023-01-03,09:30,0,2,1,41,5.241323e-08
4,2023-01-18,20:00,0,2,1,54,5.241323e-08


In [327]:
## Separação da variável dependente

X = df.drop(columns=["risco", "tipo_crime"])
y = df["risco"]

In [328]:
y.head()

0    5.241323e-08
1    5.241323e-08
2    5.241323e-08
3    5.241323e-08
4    5.241323e-08
Name: risco, dtype: float64

In [329]:
# Imputação dos dados
## Isso aqui transforma os valores entre 0 e 1 

scaler = MinMaxScaler()
Y = scaler.fit_transform(y.values.reshape(-1, 1))
Y = scaler.transform(y.values.reshape(-1, 1))

Y = pd.DataFrame(Y, columns=["risco"])
Y.to_csv("./a.csv", index=True)

In [334]:
print(Y)

[[0.00132747]
 [0.00132747]
 [0.00132747]
 ...
 [0.0111592 ]
 [0.0111592 ]
 [0.0111592 ]]


In [335]:
## Isso aqui transforma os valores entre 0 e 1 
scaler = MinMaxScaler()
Y = scaler.fit_transform(y.values.reshape(-1, 1))
Y = scaler.transform(y.values.reshape(-1, 1))

def classifica_valor(x):
    if x > 0.8: 
        return 5
    elif x > 0.6:
        return 4
    elif x > 0.4:
        return 3
    elif x > 0.2:
        return 2
    elif x >= 0.0:
        return 1

Y = pd.DataFrame(Y, columns=["risco"])
Y['risco'] = Y['risco'].apply(classifica_valor)

In [336]:
# Conversão da hora string para minutos

def tempo_para_minutos(tempo):
    # Divide a string 'hh:mm' em horas e minutos
    horas, minutos = map(int, tempo.split(':'))
    
    # Converte tudo para minutos
    total_minutos = horas * 60 + minutos
    return total_minutos

X["hora_ocorrencia"] = X["hora_ocorrencia"].apply(tempo_para_minutos)

X.head()

Unnamed: 0,data_ocorrencia,hora_ocorrencia,regiao_administrativa,sexo_vitima,idade_vitima
0,2023-01-14,1260,0,2,19
1,2023-01-20,120,0,2,17
2,2023-01-24,1215,0,2,30
3,2023-01-03,570,0,1,41
4,2023-01-18,1200,0,1,54


In [337]:
def data_para_dias(data_str):
    data = datetime.strptime(data_str, r'%Y-%m-%d')
    referencia = datetime(1970, 1, 1)
    return (data - referencia).days

X["data_ocorrencia"] = X["data_ocorrencia"].apply(data_para_dias)

X.head()

Unnamed: 0,data_ocorrencia,hora_ocorrencia,regiao_administrativa,sexo_vitima,idade_vitima
0,19371,1260,0,2,19
1,19377,120,0,2,17
2,19381,1215,0,2,30
3,19360,570,0,1,41
4,19375,1200,0,1,54


In [338]:
# Isso aqui divide o treino do teste com uma proporção de 0.5 
## Se esse valor ficar muito grande, a eficiência pode ser reduzida porque o modelo começa a decorar os dados

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.5, random_state=42)

In [339]:
# Imputação dos dados
## Isso aqui transforma os valores entre 0 e 1 

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [340]:
print(X_train_scaled)

[[0.9        0.35789474 0.28125    0.         0.43103448]
 [0.3        0.77894737 0.90625    1.         0.65517241]
 [0.         0.37894737 0.53125    1.         0.56896552]
 ...
 [0.6        0.6        0.5        1.         0.10344828]
 [0.56666667 0.18947368 0.03125    0.         0.37931034]
 [1.         0.16842105 0.625      0.         0.5       ]]


In [341]:
# tf.random.set_seed(2)
# Entrada = Bairro, Trimestre (Média), Tipo de crime
#

# Construção das camadas
inp = keras.layers.Input((X_train_scaled.shape[1], ))

hid1 = keras.layers.Dense(50, activation="relu")(inp)
drop1 = keras.layers.Dropout(0.4)(hid1)
hid2 = keras.layers.Dense(50, activation="relu")(drop1)
drop2 = keras.layers.Dropout(0.4)(hid2)
hid3 = keras.layers.Dense(50, activation="relu")(drop2)

out_soft = keras.layers.Dense(len(y_train), activation="softmax")(hid3)
# out_sig = keras.layers.Dense(units=1, activation="sigmoid")(drop)


# model_sig = keras.Model(inp, out_sig)
model_soft = keras.Model(inp, out_soft)

# model_sig.compile(optimizer='adam',
#               loss='msle',
#               metrics=['accuracy'])
model_soft.compile(optimizer='adam',
                   loss="sparse_categorical_crossentropy",
                   metrics=["accuracy"])

# model_sig.summary()
model_soft.summary()

In [342]:
es = keras.callbacks.EarlyStopping(
    monitor="val_loss", min_delta=0, patience=10, mode="min", restore_best_weights=True,
)

In [343]:
model_soft.fit(x=X_train_scaled, y=y_train, validation_data=(X_test_scaled, y_test), shuffle=True, batch_size=32, callbacks=[es], verbose=1, epochs=30)

Epoch 1/30
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.6114 - loss: 5.1593 - val_accuracy: 0.7788 - val_loss: 0.7086
Epoch 2/30
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7800 - loss: 0.7059 - val_accuracy: 0.7788 - val_loss: 0.6552
Epoch 3/30
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7788 - loss: 0.6529 - val_accuracy: 0.7764 - val_loss: 0.6154
Epoch 4/30
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7665 - loss: 0.6095 - val_accuracy: 0.7033 - val_loss: 0.5793
Epoch 5/30
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7613 - loss: 0.5741 - val_accuracy: 0.7202 - val_loss: 0.5503
Epoch 6/30
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7635 - loss: 0.5414 - val_accuracy: 0.7788 - val_loss: 0.5331
Epoch 7/30
[1m164/164[0m 

<keras.src.callbacks.history.History at 0x7fdc63658090>

In [344]:
# Testar modelo

loss, accuracy = model_soft.evaluate(X_test_scaled, y_test, batch_size=32)
print(f'Loss no conjunto de teste: {loss:.4f}')
print(f'Accuracy no conjunto de teste: {accuracy:.4f}')

[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9819 - loss: 0.1029
Loss no conjunto de teste: 0.1081
Accuracy no conjunto de teste: 0.9801
