<a href="https://colab.research.google.com/github/sakshamo5/red-zones/blob/main/redzones.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

# Load the dataset
file_path = '/content/Crime_Data.csv'
crime_data = pd.read_csv(file_path)


In [None]:

# Data preprocessing
# Encoding categorical columns
label_encoders = {}
categorical_cols = ["Crime_Type", "Day_of_Week", "Severity", "Nearby_Landmark"]

for col in categorical_cols:
    le = LabelEncoder()
    crime_data[col] = le.fit_transform(crime_data[col])
    label_encoders[col] = le

# Extracting the hour from "Time_of_Happening"
crime_data['Hour_of_Happening'] = pd.to_datetime(crime_data['Time_of_Happening'], format='%H:%M').dt.hour

# Defining the target variable: Red Zone (1) or Not (0)
# Criteria: Severe crimes OR frequency > 10 in the last 30 days
crime_data['Red_Zone'] = ((crime_data['Severity'] == label_encoders['Severity'].transform(['Severe'])[0]) |
                          (crime_data['Frequency_in_Last_30_Days'] > 10)).astype(int)

# Feature columns
feature_cols = ["Crime_Type", "Day_of_Week", "Hour_of_Happening",
                "Severity", "Frequency_in_Last_30_Days"]

# Splitting the data
X = crime_data[feature_cols]
y = crime_data['Red_Zone']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training the Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = rf_model.predict(X_test)
classification_rep = classification_report(y_test, y_pred)

# Output results
print("Classification Report:")
print(classification_rep)

# Save the model and encoders (if needed)
import joblib
joblib.dump(rf_model, 'red_zone_rf_model.pkl')
for col, le in label_encoders.items():
    joblib.dump(le, f'label_encoder_{col}.pkl')


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       286
           1       1.00      1.00      1.00      1714

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [None]:
import numpy as np
import joblib

# Load the trained model
rf_model = joblib.load('red_zone_rf_model.pkl')

# Define new data (manually created feature vectors)
# Each vector corresponds to ["Crime_Type", "Day_of_Week", "Hour_of_Happening", "Severity", "Frequency_in_Last_30_Days"]
new_data = np.array([
    [2, 3, 23, 1, 15],  # Example 1: A crime at night with high frequency
    [5, 1, 12, 0, 5],   # Example 2: A moderate crime during the day
])

# Make predictions
predictions = rf_model.predict(new_data)

# Output the predictions
for i, prediction in enumerate(predictions):
    zone_status = "Red Zone" if prediction == 1 else "Not Red Zone"
    print(f"Example {i + 1}: {zone_status}")


Example 1: Red Zone
Example 2: Not Red Zone


