# **Import Dataset and Libraries**

In [230]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from folium.plugins import HeatMap
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn as nn
import seaborn as sns
import pandas as pd
import numpy as np
import folium
import torch
import time

device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

df = pd.read_csv("C:/Users/aidan_000/Desktop/UNCC/ML Project/Datasets/df4.csv", low_memory=False)
df = df.drop('HIGHEST_NIBRS_DESCRIPTION',axis=1)

# Create a new DataFrame for Coordinates
coordinates = pd.DataFrame(df[ ['ZIP', 'LATITUDE_PUBLIC', 'LONGITUDE_PUBLIC', 'Violent-Crime']].values, columns= ['ZIP', 'LATITUDE_PUBLIC', 'LONGITUDE_PUBLIC', 'Violent-Crime'])
df.head()

Unnamed: 0,ZIP,LATITUDE_PUBLIC,LONGITUDE_PUBLIC,Year,Month,Day,DayOfWeek,CMPD_PATROL_DIVISION,LOCATION_TYPE_DESCRIPTION,PLACE_TYPE_DESCRIPTION,...,PctHome_Ownership,HighSchool,NonWhite,F65_,Poverty,Home_Ownership,Score,VulnerableBinary,Violent-Crime,People
0,28215.0,35.258195,-80.726229,2023,12,4,0,1,1,1,...,0.75,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0,63548.0
1,28213.0,35.283274,-80.762293,2023,12,4,0,2,1,2,...,0.76,1.0,1.0,0.0,1.0,0.0,3.0,0.0,0,44553.0
2,28210.0,35.153683,-80.839845,2023,12,4,0,5,3,4,...,0.53,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0,48214.0
3,28215.0,35.248899,-80.667217,2023,10,23,0,1,2,4,...,0.74,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1,63548.0
4,28227.0,35.221991,-80.814908,2023,12,4,0,6,1,4,...,0.77,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,58524.0


# **Pre-processing**

In [231]:
y = df.pop('Violent-Crime').values
x = df.values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=42)

X_train = StandardScaler().fit_transform(x_train)
X_test = StandardScaler().fit_transform(x_test)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

# **Model Declaration and Instantiation**

In [149]:
# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor.unsqueeze(1))  # Add an extra dimension to labels
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define the model
class SpatiotemporalModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(SpatiotemporalModel, self).__init__()
        self.conv1d = nn.Conv1d(input_size, 32, kernel_size=3, padding=1)
        self.lstm = nn.LSTM(32, 50, batch_first=True)
        self.fc = nn.Linear(50, output_size)

    def forward(self, x):
        x = x.unsqueeze(2)  # Add a channel dimension for Conv1d
        x = F.relu(self.conv1d(x))
        x, _ = self.lstm(x.permute(0, 2, 1))  # LSTM expects input in (batch, seq_len, features) format
        x = x[:, -1, :]  # Take the output of the last time step
        x = self.fc(x)
        return x

# Instantiate the model for multi-class classification (46 classes)
input_size = X_train.shape[1]
output_size = 1  # Number of classes
model = SpatiotemporalModel(input_size, output_size).to(device)

criterion = nn.BCEWithLogitsLoss()  # Binary cross-entropy loss for multi-label classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 10
for epoch in range(epochs):
    start_time = time.time()
    
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs.to(device))
        labels = labels.to(device, dtype=torch.float32)  # Ensure labels are of type float
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test_tensor.to(device))
        predicted_labels = (torch.sigmoid(y_pred) > 0.5).float().cpu().numpy()
        val_accuracy = accuracy_score(y_test, predicted_labels)
    
    end_time = time.time()
    epoch_time = end_time - start_time

    # Print epoch information
    print(f"Epoch {epoch + 1}/{epochs}, Time: {epoch_time:.2f}s, Validation Accuracy: {val_accuracy:.4f}")

Epoch 1/10, Time: 15.45s, Validation Accuracy: 0.7325
Epoch 2/10, Time: 15.57s, Validation Accuracy: 0.7331
Epoch 3/10, Time: 15.66s, Validation Accuracy: 0.7384
Epoch 4/10, Time: 16.10s, Validation Accuracy: 0.7366
Epoch 5/10, Time: 16.46s, Validation Accuracy: 0.7379
Epoch 6/10, Time: 15.89s, Validation Accuracy: 0.7387
Epoch 7/10, Time: 16.00s, Validation Accuracy: 0.7385
Epoch 8/10, Time: 16.15s, Validation Accuracy: 0.7388
Epoch 9/10, Time: 16.99s, Validation Accuracy: 0.7396
Epoch 10/10, Time: 15.45s, Validation Accuracy: 0.7389


# **Data Visualization**

Most Important Features

In [228]:
prediction_df = pd.DataFrame({
    'ZIP': x_test[:, 0],
    'LATITUDE_PUBLIC': x_test[:, 1],
    'LONGITUDE_PUBLIC': x_test[:, 2],
    'Violent-Crime': predicted_labels.flatten()
})

In [None]:
# Get the column names of the input data
feature_names = df.columns

# Assuming your model has a linear layer named 'fc'
linear_weights = model.fc.weight.detach().cpu().numpy().flatten()
linear_weights = linear_weights[:input_size]

# Create a DataFrame for easier sorting
weights_df = pd.DataFrame({'Feature': feature_names, 'Weight': linear_weights})

# Sort the DataFrame by weight values in descending order
weights_df = weights_df.sort_values(by='Weight', ascending=False)

# Plotting feature importance with labels
plt.figure(figsize=(12, 6))
plt.bar(weights_df['Feature'], weights_df['Weight'])
plt.xlabel('Feature Name')
plt.ylabel('Weight Value')
plt.title('Linear Layer Weights (Sorted)')
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for better visibility
plt.show()

In [None]:
conf_matrix = confusion_matrix(y_test, predicted_labels)

report = classification_report(y_test, predicted_labels, zero_division=1)
print(report)

# Plot the confusion matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Nonviolent', 'Violent'], yticklabels=['Nonviolent', 'Violent'])
plt.xlabel('Prediction')
plt.ylabel('Ground Truths')
plt.title('Confusion Matrix')
plt.show()

In [None]:
Prediction Based Heat Map

In [221]:
minLatitude = df.iloc[:, 1].min()
maxLatitude = df.iloc[:, 1].max()

minLongitude = df.iloc[:, 2].min()
maxLongitude = df.iloc[:, 2].max()

centerLatitude = (minLatitude + maxLatitude) / 2
centerLongitude = (minLongitude + maxLongitude) / 2

# Create a base map
PredictionMap = folium.Map(location=[centerLatitude, centerLongitude], zoom_start=10)

# Separate Prediction and Actual violent crime points
PredictedPoints = prediction_df[prediction_df['Violent-Crime'] == 1][['LATITUDE_PUBLIC', 'LONGITUDE_PUBLIC']].head(250).values.tolist()

# Add Predicted points to the map as a HeatMap layer
HeatMap(PredictedPoints, min_opacity=0.2, radius=15, blur=25, gradient={0.4: 'purple', 0.65: 'orange', 1: 'yellow'}).add_to(PredictionMap)

PredictionMap.save('PredictionHeatmap_BinaryModel.html')

Ground Truths Heat Map

In [223]:
ActualMap = folium.Map(location=[centerLatitude, centerLongitude], zoom_start=10)

ActualPoints = coordinates[coordinates['Violent-Crime'] == 1][['LATITUDE_PUBLIC', 'LONGITUDE_PUBLIC']].head(250).values.tolist()

# Add Actual points to the map as a HeatMap layer
HeatMap(ActualPoints, min_opacity=0.2, radius=15, blur=25, gradient={0.4: 'blue', 0.65: 'lime', 1: 'red'}).add_to(ActualMap)

ActualMap.save('ActualHeatmap_BinaryModel.html')