<a href="https://colab.research.google.com/github/orifelszer/CrimeData/blob/main/DNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing Required Libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

Loading Supervised Datasets from GitHub Repository

In [12]:
import zipfile
import requests
from io import BytesIO

# Define the base URL of the repository and dataset file names
base_url = "https://github.com/orifelszer/CrimeData/raw/main/"
files = {
    "X_train": "X_train_supervised.zip",
    "X_test": "X_test_supervised.zip",
    "y_train": "y_train_supervised.zip",
    "y_test": "y_test_supervised.zip"}

# Function to download and extract ZIP files from the GitHub repository
def load_data_from_repo(file_name):
    url = base_url + file_name
    response = requests.get(url)
    if response.status_code == 200:
        with zipfile.ZipFile(BytesIO(response.content)) as z:
            extracted_file_name = z.namelist()[0]
            with z.open(extracted_file_name) as f:
                return pd.read_csv(f)
    else:
        raise Exception(f"Failed to download {file_name}")

# Load datasets from the repository
X_train = pd.DataFrame(load_data_from_repo(files["X_train"]))
X_test = pd.DataFrame(load_data_from_repo(files["X_test"]))
y_train = pd.DataFrame(load_data_from_repo(files["y_train"]))
y_test = pd.DataFrame(load_data_from_repo(files["y_test"]))

Building and Training a Neural Network with Early Stopping and Learning Rate Reduction

In [14]:
# Define early stopping and learning rate reduction callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# Build the neural network model
model = Sequential()

# Input layer with L2 regularization, LeakyReLU activation, and dropout
model.add(Dense(512, kernel_regularizer=l2(0.01), input_shape=(X_train.shape[1],)))
model.add(LeakyReLU(alpha=0.01))
model.add(BatchNormalization())
model.add(Dropout(0.3))

# Hidden layer 1
model.add(Dense(256, kernel_regularizer=l2(0.01)))
model.add(LeakyReLU(alpha=0.01))
model.add(BatchNormalization())
model.add(Dropout(0.3))

# Hidden layer 2
model.add(Dense(128, kernel_regularizer=l2(0.01)))
model.add(LeakyReLU(alpha=0.01))
model.add(BatchNormalization())
model.add(Dropout(0.3))

# Output layer with softmax activation for multi-class classification
model.add(Dense(y_train['target'].nunique(), activation='softmax'))

# Compile the model with Adam optimizer and sparse categorical cross-entropy
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model with early stopping and learning rate reduction
history = model.fit(X_train, y_train['target'],
                    epochs=50, batch_size=512,
                    validation_split=0.2,
                    callbacks=[early_stopping, reduce_lr])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1643/1643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 35ms/step - accuracy: 0.3124 - loss: 2.9450 - val_accuracy: 0.3162 - val_loss: 1.6558 - learning_rate: 0.0010
Epoch 2/50
[1m1643/1643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 39ms/step - accuracy: 0.3741 - loss: 1.6101 - val_accuracy: 0.3935 - val_loss: 1.8261 - learning_rate: 0.0010
Epoch 3/50
[1m1643/1643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 45ms/step - accuracy: 0.3753 - loss: 1.5970 - val_accuracy: 0.3935 - val_loss: 14.1234 - learning_rate: 0.0010
Epoch 4/50
[1m1643/1643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 41ms/step - accuracy: 0.3756 - loss: 1.5903 - val_accuracy: 0.0211 - val_loss: 11.2020 - learning_rate: 0.0010
Epoch 5/50
[1m1643/1643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 41ms/step - accuracy: 0.3798 - loss: 1.5697 - val_accuracy: 0.3935 - val_loss: 2.0086 - learning_rate: 5.0000e-04
Epoch 6/50
[1m1643/1643[0m [32m━━━━━━━━━━

Evaluating Model Performance on the Test Set

In [15]:
# Evaluate the model on the test set and print the accuracy
test_loss, test_accuracy = model.evaluate(X_test, y_test['target'])
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

[1m9788/9788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 3ms/step - accuracy: 0.3882 - loss: 1.5440
Test Accuracy: 38.93%


Generating Predictions Using the Trained Model

In [None]:
# Predict class probabilities and convert them to class labels
y_pred_dnn = model.predict(X_test)
y_pred_dnn_classes = np.argmax(y_pred_dnn, axis=1)

[1m9788/9788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3ms/step


Evaluation Function for Classification Models

In [None]:
# Define a function to calculate and return multiple evaluation metrics
def evaluate_model(y_true, y_pred, average='weighted'):
    metrics = {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred, average=average, zero_division=0),
        "Recall": recall_score(y_true, y_pred, average=average, zero_division=0),
        "F1 Score": f1_score(y_true, y_pred, average=average, zero_division=0),
        "Confusion Matrix": confusion_matrix(y_true, y_pred)
    }
    return metrics

Evaluating the DNN Model on the Test Set

In [None]:
dnn_metrics = evaluate_model(y_test['target'], y_pred_dnn_classes)

for metric, value in dnn_metrics.items():
    print(f"DNN {metric}: {value}")

DNN Accuracy: 0.35660006258102006
DNN Precision: 0.23936560766514514
DNN Recall: 0.35660006258102006
DNN F1 Score: 0.24693119354683185
DNN Confusion Matrix: [[    0     0     0     0   298     0     0     0     0     0    34     0
      0     0]
 [    0     0     0     0  3796     0     0     0     0     0  1670     0
      0     0]
 [    0     0     0     0  2463     0     0     0     0     0   332     0
      0     0]
 [    0     0     0     0 16913     0     0     0     0     0  2269     0
      0     0]
 [    0     0     0     0 96245     0     0     0     0     0 13379     0
      0     0]
 [    0     0     0     0  4708     0     0     0     0     0  1265     0
      0     0]
 [    0     0     0     0    78     0     0     0     0     0     3     0
      0     0]
 [    0     0     0     0 12846     0     0     0     0     0  2034     0
      0     0]
 [    0     0     0     0   385     0     0     0     0     0    69     0
      0     0]
 [    0     0     0     0 41101     0     