In [10]:
import pickle

# Load the LabelEncoder
with open("label_encoder.pkl", "rb") as file:
    label_encoder_ward = pickle.load(file)

# Load the Random Forest model
with open("model.pkl", "rb") as file:
    classifier = pickle.load(file)

print("LabelEncoder and Random Forest model successfully loaded!")


LabelEncoder and Random Forest model successfully loaded!


In [14]:
import pandas as pd
import numpy as np

# Sample input data
sample_input_data = {
    'WARD': [4],
    'DATE OF OCCURRENCE': ['1/16/2025 1:00'],
    'LATITUDE': [41.79329893],
    'LONGITUDE': [-87.66456619]
}

# Convert input data to DataFrame
input_df = pd.DataFrame(sample_input_data)

# Preprocess the input data (without encoding 'WARD')
def preprocess_input(input_data):
    # Ensure 'DATE OF OCCURRENCE' is in datetime format
    input_data['DATE'] = pd.to_datetime(input_data['DATE OF OCCURRENCE'])
    input_data['HOUR'] = input_data['DATE'].dt.floor('h')

    # Generate cyclical features (sine and cosine of time and month)
    input_data['TIME_SIN'] = np.sin(2 * np.pi * input_data['HOUR'].dt.hour / 24)
    input_data['TIME_COS'] = np.cos(2 * np.pi * input_data['HOUR'].dt.hour / 24)
    input_data['MONTH_SIN'] = np.sin(2 * np.pi * input_data['HOUR'].dt.month / 12)
    input_data['MONTH_COS'] = np.cos(2 * np.pi * input_data['HOUR'].dt.month / 12)

    # Don't encode 'WARD', just keep it as it is
    # input_data['WARD_ENCODED'] = label_encoder_ward.transform(input_data['WARD'])

    # Add 'DAY_OF_WEEK' feature
    input_data['DAY_OF_WEEK'] = input_data['HOUR'].dt.dayofweek

    # Add placeholder features (you can replace them with actual calculations)
    input_data['CRIME_COUNT_LAG1'] = 0
    input_data['CRIME_COUNT_LAG24'] = 0
    input_data['ROLLING_7DAY'] = 0
    input_data['DISTANCE_TO_POLICE'] = 0

    return input_data

# Preprocess the data
preprocessed_input = preprocess_input(input_df)

# Extract the features for prediction (including 'WARD')
X_input = preprocessed_input[['TIME_SIN', 'TIME_COS', 'CRIME_COUNT_LAG1', 'CRIME_COUNT_LAG24', 'ROLLING_7DAY',
                              'DISTANCE_TO_POLICE', 'WARD', 'DAY_OF_WEEK', 'MONTH_SIN', 'MONTH_COS']]

# Generate predictions
probas = classifier.predict_proba(X_input)

# Get the top 5 predictions
top_5_idx = np.argsort(probas[0])[-5:][::-1]  # Sort probabilities in descending order
top_5_classes = classifier.classes_[top_5_idx]
top_5_probabilities = probas[0][top_5_idx]

# Display results
result = {
    'Top 5 Crimes': {top_5_classes[i]: top_5_probabilities[i] for i in range(5)}
}

# Map the predicted labels to offense names using the LabelEncoder
top_5_crimes = {
    label_encoder_ward.inverse_transform([top_5_classes[i]])[0]: top_5_probabilities[i] 
    for i in range(5)
}

print("Top 5 Predicted Crimes:")
print(top_5_crimes)

Top 5 Predicted Crimes:
{'BATTERY': 0.25625, 'CRIMINAL DAMAGE': 0.21958333333333332, 'ASSAULT': 0.18125, 'OTHER OFFENSE': 0.10124999999999998, 'MOTOR VEHICLE THEFT': 0.09583333333333334}
