<a href="https://colab.research.google.com/github/orifelszer/CrimeData/blob/oriana-branch/DNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
import zipfile
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [2]:
# ✅ שלב 1: שכפול המאגר מה-GitHub
!git clone https://github.com/orifelszer/CrimeData.git

# ✅ שלב 2: פונקציה לפתיחת קובצי ZIP מהתיקייה המקומית
def load_zipped_csv_local(zip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        csv_name = zip_ref.namelist()[0]
        with zip_ref.open(csv_name) as file:
            return pd.read_csv(file)

# ✅ שלב 3: קריאת הנתונים לאחר השכפול (נתיב מקומי)
X_train = load_zipped_csv_local('CrimeData/X_train_supervised.zip')
X_test = load_zipped_csv_local('CrimeData/X_test_supervised.zip')
y_train = load_zipped_csv_local('CrimeData/y_train_supervised.zip')
y_test = load_zipped_csv_local('CrimeData/y_test_supervised.zip')

# ✅ בדיקה שהנתונים נטענו כראוי
print(f"X_train Shape: {X_train.shape}")
print(f"X_test Shape: {X_test.shape}")
print(f"y_train Shape: {y_train.shape}")
print(f"y_train Shape: {y_train.shape}")

Cloning into 'CrimeData'...
remote: Enumerating objects: 825, done.[K
remote: Counting objects: 100% (270/270), done.[K
remote: Compressing objects: 100% (120/120), done.[K
remote: Total 825 (delta 225), reused 150 (delta 150), pack-reused 555 (from 1)[K
Receiving objects: 100% (825/825), 298.02 MiB | 38.58 MiB/s, done.
Resolving deltas: 100% (435/435), done.
X_train Shape: (1051336, 15)
X_test Shape: (313194, 15)
y_train Shape: (1051336, 1)
y_train Shape: (1051336, 1)


In [3]:
from tensorflow.keras.models import Model

# הוספת EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# בניית המודל מחדש
model = Sequential()
model.add(Dense(512, kernel_regularizer=l2(0.01), input_shape=(X_train.shape[1],)))
model.add(LeakyReLU(alpha=0.01))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(256, kernel_regularizer=l2(0.01)))
model.add(LeakyReLU(alpha=0.01))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(128, kernel_regularizer=l2(0.01)))
model.add(LeakyReLU(alpha=0.01))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(y_train['target'].nunique(), activation='softmax'))

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# אימון המודל כולל EarlyStopping
history = model.fit(X_train, y_train['target'],
                    epochs=50, batch_size=512,
                    validation_split=0.2,
                    callbacks=[early_stopping, reduce_lr])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1643/1643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 33ms/step - accuracy: 0.3117 - loss: 2.9672 - val_accuracy: 0.3893 - val_loss: 1.5989 - learning_rate: 0.0010
Epoch 2/50
[1m1643/1643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 33ms/step - accuracy: 0.3693 - loss: 1.6179 - val_accuracy: 0.2875 - val_loss: 1.8073 - learning_rate: 0.0010
Epoch 3/50
[1m1643/1643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 34ms/step - accuracy: 0.3769 - loss: 1.5970 - val_accuracy: 0.3596 - val_loss: 1.7484 - learning_rate: 0.0010
Epoch 4/50
[1m1643/1643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 33ms/step - accuracy: 0.3768 - loss: 1.5896 - val_accuracy: 0.2974 - val_loss: 1.8381 - learning_rate: 0.0010
Epoch 5/50
[1m1643/1643[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 34ms/step - accuracy: 0.3796 - loss: 1.5720 - val_accuracy: 0.3935 - val_loss: 1.8094 - learning_rate: 5.0000e-04
Epoch 6/50
[1m1643/1643[0m [32m━━━━━━━━━━━━

In [4]:
# הערכת ביצועים על סט הבדיקה
test_loss, test_accuracy = model.evaluate(X_test, y_test['target'])
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

[1m9788/9788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3ms/step - accuracy: 0.3551 - loss: 1.6425
Test Accuracy: 35.66%


In [9]:
# Predictions
y_pred_dnn = model.predict(X_test)
y_pred_dnn_classes = np.argmax(y_pred_dnn, axis=1)  # Convert probabilities to class labels

[1m9788/9788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3ms/step


In [5]:
# Evaluation function
def evaluate_model(y_true, y_pred, average='weighted'):
    metrics = {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred, average=average, zero_division=0),
        "Recall": recall_score(y_true, y_pred, average=average, zero_division=0),
        "F1 Score": f1_score(y_true, y_pred, average=average, zero_division=0),
        "Confusion Matrix": confusion_matrix(y_true, y_pred)
    }
    return metrics

In [10]:
# Evaluate DNN model
dnn_metrics = evaluate_model(y_test['target'], y_pred_dnn_classes)
for metric, value in dnn_metrics.items():
    print(f"DNN {metric}: {value}")

DNN Accuracy: 0.35660006258102006
DNN Precision: 0.23936560766514514
DNN Recall: 0.35660006258102006
DNN F1 Score: 0.24693119354683185
DNN Confusion Matrix: [[    0     0     0     0   298     0     0     0     0     0    34     0
      0     0]
 [    0     0     0     0  3796     0     0     0     0     0  1670     0
      0     0]
 [    0     0     0     0  2463     0     0     0     0     0   332     0
      0     0]
 [    0     0     0     0 16913     0     0     0     0     0  2269     0
      0     0]
 [    0     0     0     0 96245     0     0     0     0     0 13379     0
      0     0]
 [    0     0     0     0  4708     0     0     0     0     0  1265     0
      0     0]
 [    0     0     0     0    78     0     0     0     0     0     3     0
      0     0]
 [    0     0     0     0 12846     0     0     0     0     0  2034     0
      0     0]
 [    0     0     0     0   385     0     0     0     0     0    69     0
      0     0]
 [    0     0     0     0 41101     0     