# **Import Dataset and Libraries**

In [30]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
import seaborn as sns
import pandas as pd
import numpy as np
import folium
import torch
import time

device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

df = pd.read_csv("C:/Users/aidan_000/Desktop/UNCC/ML Project/Datasets/df6.csv", low_memory=False)
df.head()

Unnamed: 0,ZIP,LATITUDE_PUBLIC,LONGITUDE_PUBLIC,Year,Month,Day,DayOfWeek,CMPD_PATROL_DIVISION,LOCATION_TYPE_DESCRIPTION,PLACE_TYPE_DESCRIPTION,...,PctHome_Ownership,HighSchool,NonWhite,F65_,Poverty,Home_Ownership,Score,VulnerableBinary,Violent-Crime,People
0,28215.0,35.258195,-80.726229,2023,12,4,0,1,1,1,...,0.75,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0,63548.0
1,28213.0,35.283274,-80.762293,2023,12,4,0,2,1,2,...,0.76,1.0,1.0,0.0,1.0,0.0,3.0,0.0,0,44553.0
2,28210.0,35.153683,-80.839845,2023,12,4,0,5,3,4,...,0.53,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0,48214.0
3,28227.0,35.221991,-80.814908,2023,12,4,0,6,1,4,...,0.77,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,58524.0
4,28216.0,35.362708,-80.854438,2023,12,4,0,7,0,4,...,0.4,0.0,1.0,0.0,0.0,1.0,2.0,0.0,1,51477.0


# **Pre-processing**

In [31]:
df['HIGHEST_NIBRS_DESCRIPTION'] = LabelEncoder().fit_transform(df['HIGHEST_NIBRS_DESCRIPTION'])

y = df.pop('HIGHEST_NIBRS_DESCRIPTION').values
x = StandardScaler().fit_transform(df.values)

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

# **Model Declaration and Instantiation**

In [None]:
# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define the model
class SpatiotemporalModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(SpatiotemporalModel, self).__init__()
        self.conv1d = nn.Conv1d(input_size, 32, kernel_size=3, padding=1)
        self.lstm = nn.LSTM(32, 50, batch_first=True)
        self.fc = nn.Linear(50, output_size)

    def forward(self, x):
        x = x.unsqueeze(2)  # Add a channel dimension for Conv1d
        x = F.relu(self.conv1d(x))
        x, _ = self.lstm(x.permute(0, 2, 1))  # LSTM expects input in (batch, seq_len, features) format
        x = x[:, -1, :]  # Take the output of the last time step
        x = self.fc(x)
        return x

# Instantiate the model for multi-class classification (46 classes)
input_size = X_train.shape[1]
output_size = len(np.unique(y_train))  # Number of classes
model = SpatiotemporalModel(input_size, output_size).to(device)

# Use CrossEntropyLoss for multi-class classification
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    start_time = time.time()
    
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        inputs, labels = inputs.to(device), labels.to(device)  # Move both inputs and labels to the device
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test_tensor.to(device))
        _, predicted_labels = torch.max(y_pred, 1)  # Choose the class with the highest probability
        val_accuracy = accuracy_score(y_test, predicted_labels.cpu().numpy())
    
    end_time = time.time()
    epoch_time = end_time - start_time

    # Print epoch information
    print(f"Epoch {epoch + 1}/{epochs}, Time: {epoch_time:.2f}s, Validation Accuracy: {val_accuracy:.4f}")


Epoch 1/20, Time: 13.19s, Validation Accuracy: 0.4384
Epoch 2/20, Time: 12.59s, Validation Accuracy: 0.4435
Epoch 3/20, Time: 13.32s, Validation Accuracy: 0.4483
Epoch 4/20, Time: 13.28s, Validation Accuracy: 0.4495
Epoch 5/20, Time: 13.52s, Validation Accuracy: 0.4477
Epoch 6/20, Time: 13.28s, Validation Accuracy: 0.4505
Epoch 7/20, Time: 13.10s, Validation Accuracy: 0.4530
Epoch 8/20, Time: 13.85s, Validation Accuracy: 0.4521
Epoch 9/20, Time: 14.03s, Validation Accuracy: 0.4565
Epoch 10/20, Time: 12.97s, Validation Accuracy: 0.4541
Epoch 11/20, Time: 13.03s, Validation Accuracy: 0.4541
Epoch 12/20, Time: 13.47s, Validation Accuracy: 0.4541
Epoch 13/20, Time: 13.74s, Validation Accuracy: 0.4556
Epoch 14/20, Time: 13.47s, Validation Accuracy: 0.4560
Epoch 15/20, Time: 13.86s, Validation Accuracy: 0.4570
Epoch 16/20, Time: 13.33s, Validation Accuracy: 0.4564


In [None]:
# Calculate confusion matrix
predicted_labels = predicted_labels.cpu().numpy()
conf_matrix = confusion_matrix(y_test, predicted_labels)

report = classification_report(y_test, predicted_labels, zero_division=1)
print(report)

# Plot the confusion matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Prediction')
plt.ylabel('Ground Truths')
plt.title('Confusion Matrix')
plt.show()

# **Data Visualization**

Mapping

In [None]:
# coordinates = df.iloc[:1000, 1:3].values

# minLatitude = df.iloc[:, 1].min()
# maxLatitude = df.iloc[:, 1].max()

# minLongitude = df.iloc[:, 2].min()
# maxLongitude = df.iloc[:, 2].max()

# centerLatitude = (minLatitude + maxLatitude) / 2
# centerLongitude = (minLongitude + maxLongitude) / 2

# charlotte_map = folium.Map(location=[centerLatitude, centerLongitude], zoom_start=10)

# for coord in coordinates:
#     folium.Marker(location=coord, popup=str(coord)).add_to(charlotte_map)
# charlotte_map