In [190]:
import folium
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import math
from datetime import timedelta
# Mapping
import geopandas
import geopy
from geopy.geocoders import Nominatim
import folium
from geopy.extra.rate_limiter import RateLimiter
from folium import plugins
from folium.plugins import MarkerCluster
# Statistical OLS Regression Analysis
%matplotlib inline
import statsmodels.api as sm
from statsmodels.compat import lzip
from statsmodels.formula.api import ols
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [191]:
crime_data = pd.read_csv('/Users/rupaliwadhawan/Downloads/AI_locations.csv')

In [192]:
crime_data.head()

Unnamed: 0,X,Y,OBJECTID,EVENT_UNIQUE_ID,OCC_DATE,OCC_YEAR,OCC_MONTH,OCC_DOW,OCC_DOY,OCC_DAY,...,OCC_TIME_RANGE,DIVISION,DEATH,INJURIES,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,LONG_WGS84,LAT_WGS84
0,-79.234955,43.781528,1,GO-2004397105,2004/05/15 04:00:00+00,2004,May,Saturday,136,15,...,Night,D41,0,0,142,Woburn North,137,Woburn (137),-79.234955,43.781528
1,-79.618218,43.733547,2,GO-2004600109,2004/07/26 04:00:00+00,2004,July,Monday,208,26,...,Evening,D23,0,0,1,West Humber-Clairville,1,West Humber-Clairville (1),-79.618218,43.733547
2,-79.518759,43.769157,3,GO-2004311851,2004/05/14 04:00:00+00,2004,May,Friday,135,14,...,Afternoon,D31,1,1,24,Black Creek,24,Black Creek (24),-79.518759,43.769157
3,-79.37887,43.660665,4,GO-2004736004,2004/12/19 05:00:00+00,2004,December,Sunday,354,19,...,Night,D51,1,0,168,Downtown Yonge East,75,Church-Yonge Corridor (75),-79.37887,43.660665
4,-79.212435,43.812075,5,GO-2004303455,2004/04/12 04:00:00+00,2004,April,Monday,103,12,...,Evening,D42,0,0,146,Malvern East,132,Malvern (132),-79.212435,43.812075


In [193]:
# Convert time objects
crime_data['OCC_DATE'] = pd.to_datetime(crime_data['OCC_DATE'])
crime_data['date'] = [d.date() for d in crime_data['OCC_DATE']]
crime_data['time'] = [d.time() for d in crime_data['OCC_DATE']]
crime_data['day'] = crime_data['OCC_DATE'].dt.day_name().to_string()
# Find Fractions of Day
crime_data['timeint'] = (crime_data['OCC_DATE']-crime_data['OCC_DATE'].dt.normalize()).dt.total_seconds()/timedelta(days=1).total_seconds()

In [194]:
crime_data.head()

Unnamed: 0,X,Y,OBJECTID,EVENT_UNIQUE_ID,OCC_DATE,OCC_YEAR,OCC_MONTH,OCC_DOW,OCC_DOY,OCC_DAY,...,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,LONG_WGS84,LAT_WGS84,date,time,day,timeint
0,-79.234955,43.781528,1,GO-2004397105,2004-05-15 04:00:00+00:00,2004,May,Saturday,136,15,...,142,Woburn North,137,Woburn (137),-79.234955,43.781528,2004-05-15,04:00:00,0 Saturday\n1 Monday\n2 ...,0.166667
1,-79.618218,43.733547,2,GO-2004600109,2004-07-26 04:00:00+00:00,2004,July,Monday,208,26,...,1,West Humber-Clairville,1,West Humber-Clairville (1),-79.618218,43.733547,2004-07-26,04:00:00,0 Saturday\n1 Monday\n2 ...,0.166667
2,-79.518759,43.769157,3,GO-2004311851,2004-05-14 04:00:00+00:00,2004,May,Friday,135,14,...,24,Black Creek,24,Black Creek (24),-79.518759,43.769157,2004-05-14,04:00:00,0 Saturday\n1 Monday\n2 ...,0.166667
3,-79.37887,43.660665,4,GO-2004736004,2004-12-19 05:00:00+00:00,2004,December,Sunday,354,19,...,168,Downtown Yonge East,75,Church-Yonge Corridor (75),-79.37887,43.660665,2004-12-19,05:00:00,0 Saturday\n1 Monday\n2 ...,0.208333
4,-79.212435,43.812075,5,GO-2004303455,2004-04-12 04:00:00+00:00,2004,April,Monday,103,12,...,146,Malvern East,132,Malvern (132),-79.212435,43.812075,2004-04-12,04:00:00,0 Saturday\n1 Monday\n2 ...,0.166667


In [195]:
missing_values = crime_data.isnull().any()

# Display columns with missing values (if any)
print("Columns with Missing Values:")
print(missing_values[missing_values].index.tolist())

Columns with Missing Values:
[]


In [196]:
crime_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5707 entries, 0 to 5706
Data columns (total 25 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   X                  5707 non-null   float64            
 1   Y                  5707 non-null   float64            
 2   OBJECTID           5707 non-null   int64              
 3   EVENT_UNIQUE_ID    5707 non-null   object             
 4   OCC_DATE           5707 non-null   datetime64[ns, UTC]
 5   OCC_YEAR           5707 non-null   int64              
 6   OCC_MONTH          5707 non-null   object             
 7   OCC_DOW            5707 non-null   object             
 8   OCC_DOY            5707 non-null   int64              
 9   OCC_DAY            5707 non-null   int64              
 10  OCC_HOUR           5707 non-null   int64              
 11  OCC_TIME_RANGE     5707 non-null   object             
 12  DIVISION           5707 non-null   object       

In [197]:
crime_data['OCC_TIME_RANGE']=crime_data['OCC_TIME_RANGE'].astype(str)

In [198]:
crime_data['OCC_TIME_RANGE'].unique()

array(['Night', 'Evening', 'Afternoon', 'Morning'], dtype=object)

In [199]:
# separate features and target
categorical_features = ['OCC_TIME_RANGE']
numerical_features = ['LONG_WGS84']
target = 'LONG_WGS84'

In [200]:
X_train_long, X_test_long, y_train_long, y_test_long = train_test_split(crime_data[categorical_features + numerical_features]
                                                    , crime_data[target],
                                                    test_size = 0.3, random_state=1234)

In [201]:
cat_transformer = Pipeline(steps = [('onehot', OneHotEncoder(handle_unknown='ignore'))])
num_transformer = Pipeline(steps = [('scaler', StandardScaler())])

In [202]:
preprocessor = ColumnTransformer(transformers = [('cat',cat_transformer, categorical_features),
                                                 ('num', num_transformer, numerical_features)])

In [203]:
G_B_Reg = Pipeline(steps = [
    ('preprocessor', preprocessor),
    ('regressor', GradientBoostingRegressor())
])

In [204]:
G_B_Reg.fit(X_train_long, y_train_long)



In [205]:
pred_long = G_B_Reg.predict(X_test_long)

In [206]:
pred_long

array([-79.39733889, -79.44024849, -79.55377158, ..., -79.44024849,
       -79.47766698, -79.39958252])

In [207]:
np.sqrt(mean_squared_error(pred_long,y_test_long))

0.0015471714004854514

In [208]:
r2 = r2_score(pred_long, y_test_long)
r2

0.9999932582820825

In [209]:
categorical_feat = ['OCC_TIME_RANGE']
numerical_feat = ['LAT_WGS84']
target_ = 'LAT_WGS84'

X_train_lat, X_test_lat, y_train_lat, y_test_lat = train_test_split(crime_data[categorical_feat + numerical_feat]
                                                    , crime_data[target_],
                                                    test_size = 0.3, random_state=123)

cat_transform = Pipeline(steps = [('onehot', OneHotEncoder(handle_unknown='ignore'))])
num_transform = Pipeline(steps = [('scaler', StandardScaler())])

preprocess = ColumnTransformer(transformers = [('cat',cat_transform, categorical_feat),
                                                 ('num', num_transform, numerical_feat)])

G_B_Reg_lat = Pipeline(steps = [
    ('preprocess', preprocess),
    ('regress', GradientBoostingRegressor())
])

G_B_Reg_lat.fit(X_train_lat, y_train_lat)

pred_lat = G_B_Reg_lat.predict(X_test_lat)

In [216]:
pred_lat

array([43.67453553, 43.75125719, 43.70108873, ..., 43.65246369,
       43.6877517 , 43.69867075])

In [217]:
pred_long

array([-79.39733889, -79.44024849, -79.55377158, ..., -79.44024849,
       -79.47766698, -79.39958252])

In [210]:
np.sqrt(mean_squared_error(pred_lat,y_test_lat))


0.0007663703720548479

In [211]:
r2_score(pred_lat,y_test_lat)

0.9999999667947782

In [212]:
from folium.plugins import HeatMap

In [213]:
predicted_points = list(zip(pred_lat, pred_long))

# Create a folium map centered on Toronto
toronto_map = folium.Map(location=[43.653225, -79.383186], zoom_start=12)

# Create a heatmap layer with the predicted points
heat_map_layer = HeatMap(predicted_points, radius=15)

# Add the heatmap layer to the map
toronto_map.add_child(heat_map_layer)

# Display the map
toronto_map.save('predicted_locations_heatmap.html')


In [214]:
danger_variable = ['Low', 'High', 'Medium']

# Combine the latitude, longitude, and danger variable into a list of points
predicted_points = list(zip(pred_lat, pred_long, danger_variable))

# Create a folium map centered on Toronto
toronto_map = folium.Map(location=[43.653225, -79.383186], zoom_start=12)

# Create FeatureGroups based on the danger variable
for danger_level in set(danger_variable):
    feature_group = folium.FeatureGroup(name=danger_level)

    # Filter points based on the danger variable and create a HeatMap for each group
    points_group = [(lat, lon) for lat, lon, danger in predicted_points if danger == danger_level]
    heat_map_layer = HeatMap(points_group, radius=15)
    feature_group.add_child(heat_map_layer)

    # Add the FeatureGroup to the map
    toronto_map.add_child(feature_group)

# Add layer control to toggle the display of different danger levels
folium.LayerControl().add_to(toronto_map)

# Save the map as an HTML file
toronto_map.save('dynamic_heatmap.html')

In [224]:
# Create a folium map centered on the first neighborhood (you can choose any neighborhood)
neighborhood_map = folium.Map(location=[crime_data['LAT_WGS84'].mean(), crime_data['LONG_WGS84'].mean()], zoom_start=12)

# Add a blue marker for the police car parking location for each neighborhood
centroid_data = crime_data.groupby('NEIGHBOURHOOD_158')[['LAT_WGS84', 'LONG_WGS84']].mean()
for neighborhood, centroid in centroid_data.iterrows():
    folium.Marker(
        location=[centroid['LAT_WGS84'], centroid['LONG_WGS84']],
        popup=f'{neighborhood} - Police Car Parking',
        icon=folium.Icon(color='darkblue', icon='car')
    ).add_to(neighborhood_map)

# Create a list to store the heatmap data
heatmap_data = []

# Add the crime location coordinates to the heatmap data list
for index, row in crime_data.iterrows():
    heatmap_data.append([row['LAT_WGS84'], row['LONG_WGS84']])

# Create the heatmap layer and add it to the map
HeatMap(heatmap_data).add_to(neighborhood_map)

# Display the map
neighborhood_map.save('crime_heatmap_with_police_car_park.html')

In [238]:
centroid_data

Unnamed: 0_level_0,LAT_WGS84,LONG_WGS84
NEIGHBOURHOOD_158,Unnamed: 1_level_1,Unnamed: 2_level_1
Agincourt North,43.804764,-79.266462
Agincourt South-Malvern West,43.787643,-79.262417
Alderwood,43.602379,-79.541462
Annex,43.670285,-79.401407
Avondale,43.757384,-79.406288
...,...,...
Yonge-Doris,43.773561,-79.413987
Yonge-Eglinton,43.701449,-79.406424
Yonge-St.Clair,43.687890,-79.395057
York University Heights,43.762383,-79.490218


In [236]:
pdd = {
    'latitude': pred_lat,
    'longitude': pred_long,
}

predicted_data_ = pd.DataFrame(pdd)
predicted_data_

Unnamed: 0,latitude,longitude
0,43.674536,-79.397339
1,43.751257,-79.440248
2,43.701089,-79.553772
3,43.751257,-79.509094
4,43.697111,-79.509094
...,...,...
1708,43.730875,-79.178968
1709,43.715369,-79.399501
1710,43.652464,-79.440248
1711,43.687752,-79.477667


In [237]:
from geopy.distance import geodesic


In [243]:
centroid_data = crime_data.groupby('NEIGHBOURHOOD_158')[['LAT_WGS84', 'LONG_WGS84']].mean().reset_index()

In [245]:
# Function to calculate distance between two points using Haversine formula
def calculate_distance(lat1, lon1, lat2, lon2):
    return geodesic((lat1, lon1), (lat2, lon2)).meters

# Function to find the closest neighborhood for a given location point
def find_closest_neighborhood(latitude, longitude, neighborhood_data):
    distances = []
    for _, row in neighborhood_data.iterrows():
        distance = calculate_distance(latitude, longitude, row['LAT_WGS84'], row['LONG_WGS84'])
        distances.append(distance)
    closest_index = distances.index(min(distances))
    return neighborhood_data.loc[closest_index, 'NEIGHBOURHOOD_158']


# Add a new column 'neighborhood' to the predicted_data DataFrame
predicted_data_['neighborhood'] = predicted_data_.apply(lambda row: find_closest_neighborhood(row['latitude'], row['longitude'], centroid_data), axis=1)



In [246]:

predicted_data_

Unnamed: 0,latitude,longitude,neighborhood
0,43.674536,-79.397339,Annex
1,43.751257,-79.440248,Clanton Park
2,43.701089,-79.553772,Kingsview Village-The Westway
3,43.751257,-79.509094,Glenfield-Jane Heights
4,43.697111,-79.509094,Weston
...,...,...,...
1708,43.730875,-79.178968,Guildwood
1709,43.715369,-79.399501,North Toronto
1710,43.652464,-79.440248,Dufferin Grove
1711,43.687752,-79.477667,Beechborough-Greenbrook


In [248]:
# Calculate centroids for each neighborhood in the predicted_data
centroid_data_predicted = predicted_data_.groupby('neighborhood')[['latitude', 'longitude']].mean().reset_index()

# Create a folium map centered on the first predicted neighborhood (you can choose any neighborhood)
predicted_map = folium.Map(location=[predicted_data_.iloc[0]['latitude'], predicted_data_.iloc[0]['longitude']], zoom_start=12)

# Convert predicted crime data to list of points for the HeatMap
predicted_crime_points = predicted_data_[['latitude', 'longitude']].values.tolist()

# Create HeatMap for predicted crime locations
HeatMap(predicted_crime_points, radius=15).add_to(predicted_map)

# Add blue markers for the centroids (police car parking locations) of each predicted neighborhood
for _, row in centroid_data_predicted.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f'{row["neighborhood"]} - Police Car Parking',
        icon=folium.Icon(color='blue', icon='car')
    ).add_to(predicted_map)

# Display the map
predicted_map.save('predicted_crime_map_heatmap_1.html')


In [250]:
# Convert predicted crime data to list of points for the HeatMap
predicted_crime_points = predicted_data_[['latitude', 'longitude']].values.tolist()

# Initialize danger variable to 'high' (change it to 'low' once police car is parked)
danger = 'high'

# Function to change HeatMap color based on danger level
def get_heatmap_color(danger):
    if danger == 'high':
        return 'red'
    elif danger == 'low':
        return 'green'
    else:
        return 'red'  # Default to red for unknown danger level

# Create HeatMap for predicted crime locations with initial color based on danger variable
HeatMap(predicted_crime_points, radius=15, gradient={0.2: 'blue', 0.5: get_heatmap_color(danger)}).add_to(predicted_map)

# Add blue markers for the centroids (police car parking locations) of each predicted neighborhood
for _, row in centroid_data_predicted.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f'{row["neighborhood"]} - Police Car Parking',
        icon=folium.Icon(color='blue', icon='car')
    ).add_to(predicted_map)

# Display the map
predicted_map.save('dynamic_predicted_crime_map_heatmap.html')


In [257]:
predicted_data_.neighborhood.value_counts().median()


7.0

In [251]:
# Create HeatMap for predicted crime locations with initial color based on danger variable
heat_map = HeatMap(predicted_crime_points, radius=15, gradient={0.2: 'blue', 0.5: get_heatmap_color(danger)})
heat_map.add_to(predicted_map)

# Add blue markers for the centroids (police car parking locations) of each predicted neighborhood
for _, row in centroid_data_predicted.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f'{row["neighborhood"]} - Police Car Parking',
        icon=folium.Icon(color='blue', icon='car')
    ).add_to(predicted_map)

# Display the map
predicted_map.save('dynamic_predicted_crime_map_heatmap.html')

# Assuming you've chosen 'Neighborhood A' to park the police car, update the danger level
parked_neighborhood = 'North Toronto'
predicted_data_.loc[predicted_data_['neighborhood'] == parked_neighborhood, 'danger'] = 'low'


In [258]:
crime_counts = predicted_data_['neighborhood'].value_counts()

# Function to assign danger level based on the number of crimes
def assign_danger_level(crime_count):
    if crime_count > 15:
        return 'high'
    elif crime_count > 7:
        return 'medium'
    else:
        return 'low'

# Create a dictionary to map neighborhood to danger level
neighborhood_danger = {neighborhood: assign_danger_level(crime_count) for neighborhood, crime_count in crime_counts.items()}

# Create the 'danger' column in the predicted_data DataFrame
predicted_data_['danger'] = predicted_data_['neighborhood'].map(neighborhood_danger)

predicted_data_


Unnamed: 0,latitude,longitude,neighborhood,danger
0,43.674536,-79.397339,Annex,low
1,43.751257,-79.440248,Clanton Park,high
2,43.701089,-79.553772,Kingsview Village-The Westway,high
3,43.751257,-79.509094,Glenfield-Jane Heights,high
4,43.697111,-79.509094,Weston,high
...,...,...,...,...
1708,43.730875,-79.178968,Guildwood,medium
1709,43.715369,-79.399501,North Toronto,low
1710,43.652464,-79.440248,Dufferin Grove,low
1711,43.687752,-79.477667,Beechborough-Greenbrook,medium


In [265]:
predicted_data_['neighborhood'].value_counts()

neighborhood
Black Creek                78
Birchcliffe-Cliffside      73
Humber Summit              57
Glenfield-Jane Heights     54
The Beaches                51
                           ..
Yonge-Doris                 1
North Riverdale             1
Palmerston-Little Italy     1
Downtown Yonge East         1
Mimico-Queensway            1
Name: count, Length: 154, dtype: int64

In [266]:
crime_counts

neighborhood
Black Creek                78
Birchcliffe-Cliffside      73
Humber Summit              57
Glenfield-Jane Heights     54
The Beaches                51
                           ..
Yonge-Doris                 1
North Riverdale             1
Palmerston-Little Italy     1
Downtown Yonge East         1
Mimico-Queensway            1
Name: count, Length: 154, dtype: int64

In [268]:
centroid_danger = {neighborhood: assign_danger_level(crime_count) for neighborhood, crime_count in crime_counts.items()}

# Create the 'danger' column in the centroid DataFrame
centroid_data_predicted['danger'] = predicted_data_['neighborhood'].map(neighborhood_danger)

In [292]:
# Create HeatMap for predicted crime locations with initial color based on danger variable
heat_map = HeatMap(predicted_crime_points, radius=15, gradient={0.2: 'blue', 0.5: get_heatmap_color(danger)})
heat_map.add_to(predicted_map)

for _, row in centroid_data_predicted.iterrows():
    if row['danger'] == 'low':
        # Park the police car in 'Neighborhood A' and change the icon color to green
        marker_color = 'green'
    else:
        marker_color = 'lightred'
    
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f'{row["neighborhood"]} - Police Car Parking',
        icon=folium.Icon(color=marker_color, icon='car')
    ).add_to(predicted_map)

# Function to update 'danger' column to 'low' when marker is clicked
js_code = """
function updateDanger(e) {
    // Get the clicked marker's lat and lon
    var lat = e.target._latlng.lat;
    var lon = e.target._latlng.lng;

    // Find the corresponding row in the DataFrame and update 'danger' to 'low'
    for (var i = 0; i < predicted_data_.length; i++) {
        if (predicted_data_[i].latitude === lat && predicted_data_[i].longitude === lon) {
            predicted_data_[i].danger = 'low';
            break;
        }
    }
}

// Get all markers on the map
var markers = document.querySelectorAll('.leaflet-marker-icon');

// Add a click event listener to each marker
for (var i = 0; i < markers.length; i++) {
    markers[i].addEventListener('click', updateDanger);
}
"""


# Embed the JavaScript code in the map
predicted_map.get_root().html.add_child(folium.Element('<script>' + js_code + '</script>'))

# Display the map
predicted_map.save('dynamic_predicted_crime_map_heatmap.html')



In [285]:
predicted_data_.loc[predicted_data_['neighborhood'] == 'Annex', 'danger'] = 'high'

In [286]:
predicted_map.save('dynamic_predicted_crime_map_heatmap.html')