In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from skorch import NeuralNetClassifier
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import folium
from tqdm import tqdm

In [6]:
data = pd.read_csv("San_Francisco.csv")

In [7]:
# Load and preprocess data


data["Time"] = pd.to_datetime(data["Time"]).astype(int) / 10**9

encoder = LabelEncoder()
data["Category"] = encoder.fit_transform(data["Category"])
data["Part_of_Day"] = encoder.fit_transform(data["Part_of_Day"])
data["Day_of_Week"] = encoder.fit_transform(data["Day_of_Week"])

#Drop date
data.drop(["Date"], axis=1, inplace=True)

scaler = MinMaxScaler()
data[["Time", "Day_of_Week", "Part_of_Day", "Latitude", "Longitude"]] = scaler.fit_transform(
    data[["Time", "Day_of_Week", "Part_of_Day","Latitude", "Longitude"]]
)

In [8]:
# Prepare dataset and dataloader
class CrimeDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if idx >= len(self.data):
            raise IndexError
        features = torch.tensor(
            self.data.loc[idx, ['Time','Day_of_Week','Part_of_Day','Latitude','Longitude']].values, dtype=torch.float
        )
        label = torch.tensor(self.data.loc[idx, 'Category'], dtype=torch.long)
        return features, label


dataset = CrimeDataset(data)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Split dataset into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

train_data = train_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

train_dataset = CrimeDataset(train_data)
test_dataset = CrimeDataset(test_data)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [9]:
# Define Feedforward Neural Network
class CrimeNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(CrimeNet, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.layer1(x)
        out = self.relu(out)
        out = self.layer2(out)
        out = self.relu(out)
        out = self.layer3(out)
        return out


# Model parameters
input_size = 5
hidden_size = 64
num_classes = len(data["Category"].unique())

# Initialize model, loss function, and optimizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = CrimeNet(input_size, hidden_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [10]:

# Train the model
num_epochs = 2

for epoch in range(num_epochs):
    for i, (features, labels) in enumerate(dataloader):
        features = features.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(features)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print(
                f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(dataloader)}], Loss: {loss.item():.4f}"
            )

Epoch [1/2], Step [100/24597], Loss: 1.7730
Epoch [1/2], Step [200/24597], Loss: 1.5614
Epoch [1/2], Step [300/24597], Loss: 1.6186
Epoch [1/2], Step [400/24597], Loss: 1.6372
Epoch [1/2], Step [500/24597], Loss: 1.8768
Epoch [1/2], Step [600/24597], Loss: 1.6567
Epoch [1/2], Step [700/24597], Loss: 1.9221
Epoch [1/2], Step [800/24597], Loss: 1.6564
Epoch [1/2], Step [900/24597], Loss: 1.5791
Epoch [1/2], Step [1000/24597], Loss: 1.7263
Epoch [1/2], Step [1100/24597], Loss: 1.7002
Epoch [1/2], Step [1200/24597], Loss: 1.7832
Epoch [1/2], Step [1300/24597], Loss: 1.7567
Epoch [1/2], Step [1400/24597], Loss: 1.7832
Epoch [1/2], Step [1500/24597], Loss: 1.6087
Epoch [1/2], Step [1600/24597], Loss: 1.7449
Epoch [1/2], Step [1700/24597], Loss: 1.4472
Epoch [1/2], Step [1800/24597], Loss: 1.7862
Epoch [1/2], Step [1900/24597], Loss: 1.6197
Epoch [1/2], Step [2000/24597], Loss: 1.7601
Epoch [1/2], Step [2100/24597], Loss: 1.5872
Epoch [1/2], Step [2200/24597], Loss: 1.6879
Epoch [1/2], Step [

In [None]:
# Test the model
model.eval()
all_labels = []
all_predictions = []

with torch.no_grad():
    for features, labels in test_dataloader:
        features = features.unsqueeze(1)
        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)
        all_labels.extend(labels.numpy())
        all_predictions.extend(predicted.numpy())

In [None]:
# Save the trained model
torch.save(model.state_dict(), "model.ckpt")

In [None]:
# Calculate accuracy, confusion matrix, and classification report
all_labels_np = np.array(all_labels)
all_predictions_np = np.array(all_predictions)
accuracy = accuracy_score(all_labels_np, all_predictions_np)

conf_matrix = confusion_matrix(all_labels, all_predictions)
class_report = classification_report(all_labels, all_predictions)

print("Accuracy: {:.2f}".format(accuracy))
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)