In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Importing the dataset
data = pd.read_csv('crime_head.csv')
data.head()

Unnamed: 0,Latitude,Longitude,CrimeGroup_Name
0,14.598719,76.711283,CASES OF HURT
1,12.962117,77.571638,MISSING PERSON
2,15.429984,75.432539,ATTEMPT TO MURDER
3,12.265922,75.825952,CASES OF HURT
4,13.004756,77.694535,THEFT


In [3]:
# Encoding the crime type labels using LabelEncoder
label_encoder = LabelEncoder()
data['encoded_crime_type'] = label_encoder.fit_transform(data['CrimeGroup_Name'])

In [4]:
# Perform one-hot encoding on the encoded crime type labels
one_hot_encoder = OneHotEncoder()
crime_type_encoded = one_hot_encoder.fit_transform(data[['encoded_crime_type']]).toarray()

In [5]:
# Concatenate the one-hot encoded crime type labels with the longitude and latitude
features = np.hstack((data[['Longitude', 'Latitude']].values.astype(np.float32), crime_type_encoded))

In [6]:
# Splitting the data into features (X) and target variable (y)
X = features[:, :2]  # Features (longitude and latitude)
y = features[:, 2:]  # Target variable (one-hot encoded crime type labels)

In [7]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Data Augmentation
# Applying Data augmentation techniques to increase the diversity of our training data
# Adding noise to latitude and longitude features
X_train_augmented = X_train + np.random.normal(0, 0.01, size=X_train.shape)

# Combining original and augmented data
X_train_combined = np.vstack((X_train, X_train_augmented))
y_train_combined = np.vstack((y_train, y_train))

# Shuffling the combined data
combined_data = list(zip(X_train_combined, y_train_combined))
np.random.shuffle(combined_data)
X_train_combined, y_train_combined = zip(*combined_data)
X_train_combined = np.array(X_train_combined)
y_train_combined = np.array(y_train_combined)

In [9]:
# Defining our neural network model
class CrimePredictionModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(CrimePredictionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, output_size)

    def forward(self, x):
        # Forward pass
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return torch.softmax(x, dim=1)  # Apply softmax to get probabilities

In [10]:
# Hyperparameters
input_size = 2  # Longitude and latitude
output_size = len(label_encoder.classes_)  # Number of crime types

In [11]:
# Initializing the model
model = CrimePredictionModel(input_size, output_size)

In [12]:
# Defining loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss for multi-label classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
# Training the model
num_epochs = 50
batch_size = 64
for epoch in range(num_epochs):
    # Mini-batch training
    for i in range(0, len(X_train_combined), batch_size):
        inputs = torch.tensor(X_train_combined[i:i+batch_size], dtype=torch.float32)
        targets = torch.tensor(y_train_combined[i:i+batch_size], dtype=torch.float32)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Print loss
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/50], Loss: 0.0447
Epoch [20/50], Loss: 0.0444
Epoch [30/50], Loss: 0.0448
Epoch [40/50], Loss: 0.0445
Epoch [50/50], Loss: 0.0442


In [14]:
# Model evaluation
model.eval()
with torch.no_grad():
    inputs = torch.tensor(X_test, dtype=torch.float32)
    outputs = model(inputs)
    predicted_labels = np.argmax(outputs.numpy(), axis=1)

In [15]:
# Converting predicted labels back to original crime types
predicted_crime_types = label_encoder.inverse_transform(predicted_labels)

In [16]:
# Input longitude and latitude coordinates
input_coordinates = np.array([[76.526989, 16.534887]])  # [longitude, latitude]

# Converting input coordinates to tensor
input_tensor = torch.tensor(input_coordinates, dtype=torch.float32)

# Making predictions with the model
with torch.no_grad():
    output_probabilities = model(input_tensor)
    predicted_labels = np.argmax(output_probabilities.numpy(), axis=1)

# Decoding predicted labels to crime types
predicted_crime_types = label_encoder.inverse_transform(predicted_labels)

# Displaying the input coordinates along with the predicted crime types
for i in range(len(input_coordinates)):
    print(f"Input Coordinates: {input_coordinates[i]}, Predicted Crime Type: {predicted_crime_types[i]}")

Input Coordinates: [76.526989 16.534887], Predicted Crime Type: MOTOR VEHICLE ACCIDENTS NON-FATAL
