In [262]:
import folium
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import math
from datetime import timedelta
# Mapping
import geopandas
import geopy
from geopy.geocoders import Nominatim
import folium
from geopy.extra.rate_limiter import RateLimiter
from folium import plugins
from folium.plugins import MarkerCluster
# Statistical OLS Regression Analysis
%matplotlib inline
import statsmodels.api as sm
from statsmodels.compat import lzip
from statsmodels.formula.api import ols
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [263]:
crime_data = pd.read_csv('/Users/rupaliwadhawan/Downloads/AI_locations.csv')

In [264]:
crime_data.head()

Unnamed: 0,X,Y,OBJECTID,EVENT_UNIQUE_ID,OCC_DATE,OCC_YEAR,OCC_MONTH,OCC_DOW,OCC_DOY,OCC_DAY,...,OCC_TIME_RANGE,DIVISION,DEATH,INJURIES,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,LONG_WGS84,LAT_WGS84
0,-79.234955,43.781528,1,GO-2004397105,2004/05/15 04:00:00+00,2004,May,Saturday,136,15,...,Night,D41,0,0,142,Woburn North,137,Woburn (137),-79.234955,43.781528
1,-79.618218,43.733547,2,GO-2004600109,2004/07/26 04:00:00+00,2004,July,Monday,208,26,...,Evening,D23,0,0,1,West Humber-Clairville,1,West Humber-Clairville (1),-79.618218,43.733547
2,-79.518759,43.769157,3,GO-2004311851,2004/05/14 04:00:00+00,2004,May,Friday,135,14,...,Afternoon,D31,1,1,24,Black Creek,24,Black Creek (24),-79.518759,43.769157
3,-79.37887,43.660665,4,GO-2004736004,2004/12/19 05:00:00+00,2004,December,Sunday,354,19,...,Night,D51,1,0,168,Downtown Yonge East,75,Church-Yonge Corridor (75),-79.37887,43.660665
4,-79.212435,43.812075,5,GO-2004303455,2004/04/12 04:00:00+00,2004,April,Monday,103,12,...,Evening,D42,0,0,146,Malvern East,132,Malvern (132),-79.212435,43.812075


In [265]:
# Convert time objects
crime_data['OCC_DATE'] = pd.to_datetime(crime_data['OCC_DATE'])
crime_data['date'] = [d.date() for d in crime_data['OCC_DATE']]
crime_data['time'] = [d.time() for d in crime_data['OCC_DATE']]
crime_data['day'] = crime_data['OCC_DATE'].dt.day_name().to_string()
# Find Fractions of Day
crime_data['timeint'] = (crime_data['OCC_DATE']-crime_data['OCC_DATE'].dt.normalize()).dt.total_seconds()/timedelta(days=1).total_seconds()

In [266]:
crime_data.head()

Unnamed: 0,X,Y,OBJECTID,EVENT_UNIQUE_ID,OCC_DATE,OCC_YEAR,OCC_MONTH,OCC_DOW,OCC_DOY,OCC_DAY,...,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,LONG_WGS84,LAT_WGS84,date,time,day,timeint
0,-79.234955,43.781528,1,GO-2004397105,2004-05-15 04:00:00+00:00,2004,May,Saturday,136,15,...,142,Woburn North,137,Woburn (137),-79.234955,43.781528,2004-05-15,04:00:00,0 Saturday\n1 Monday\n2 ...,0.166667
1,-79.618218,43.733547,2,GO-2004600109,2004-07-26 04:00:00+00:00,2004,July,Monday,208,26,...,1,West Humber-Clairville,1,West Humber-Clairville (1),-79.618218,43.733547,2004-07-26,04:00:00,0 Saturday\n1 Monday\n2 ...,0.166667
2,-79.518759,43.769157,3,GO-2004311851,2004-05-14 04:00:00+00:00,2004,May,Friday,135,14,...,24,Black Creek,24,Black Creek (24),-79.518759,43.769157,2004-05-14,04:00:00,0 Saturday\n1 Monday\n2 ...,0.166667
3,-79.37887,43.660665,4,GO-2004736004,2004-12-19 05:00:00+00:00,2004,December,Sunday,354,19,...,168,Downtown Yonge East,75,Church-Yonge Corridor (75),-79.37887,43.660665,2004-12-19,05:00:00,0 Saturday\n1 Monday\n2 ...,0.208333
4,-79.212435,43.812075,5,GO-2004303455,2004-04-12 04:00:00+00:00,2004,April,Monday,103,12,...,146,Malvern East,132,Malvern (132),-79.212435,43.812075,2004-04-12,04:00:00,0 Saturday\n1 Monday\n2 ...,0.166667


In [267]:
missing_values = crime_data.isnull().any()

# Display columns with missing values (if any)
print("Columns with Missing Values:")
print(missing_values[missing_values].index.tolist())

Columns with Missing Values:
[]


In [268]:
crime_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5707 entries, 0 to 5706
Data columns (total 25 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   X                  5707 non-null   float64            
 1   Y                  5707 non-null   float64            
 2   OBJECTID           5707 non-null   int64              
 3   EVENT_UNIQUE_ID    5707 non-null   object             
 4   OCC_DATE           5707 non-null   datetime64[ns, UTC]
 5   OCC_YEAR           5707 non-null   int64              
 6   OCC_MONTH          5707 non-null   object             
 7   OCC_DOW            5707 non-null   object             
 8   OCC_DOY            5707 non-null   int64              
 9   OCC_DAY            5707 non-null   int64              
 10  OCC_HOUR           5707 non-null   int64              
 11  OCC_TIME_RANGE     5707 non-null   object             
 12  DIVISION           5707 non-null   object       

In [269]:
crime_data['OCC_TIME_RANGE']=crime_data['OCC_TIME_RANGE'].astype(str)

In [270]:
crime_data['OCC_TIME_RANGE'].unique()

array(['Night', 'Evening', 'Afternoon', 'Morning'], dtype=object)

In [271]:
# separate features and target
categorical_features = ['OCC_TIME_RANGE']
numerical_features = ['LONG_WGS84','timeint']
target = 'LONG_WGS84'

In [272]:
X_train_long, X_test_long, y_train_long, y_test_long = train_test_split(crime_data[categorical_features + numerical_features]
                                                    , crime_data[target],
                                                    test_size = 0.3, random_state=1234)

In [273]:
cat_transformer = Pipeline(steps = [('onehot', OneHotEncoder(handle_unknown='ignore'))])
num_transformer = Pipeline(steps = [('scaler', StandardScaler())])

In [274]:
preprocessor = ColumnTransformer(transformers = [('cat',cat_transformer, categorical_features),
                                                 ('num', num_transformer, numerical_features)])

In [275]:
G_B_Reg = Pipeline(steps = [
    ('preprocessor', preprocessor),
    ('regressor', GradientBoostingRegressor())
])

In [276]:
G_B_Reg.fit(X_train_long, y_train_long)



In [277]:
pred_long = G_B_Reg.predict(X_test_long)

In [278]:
pred_long

array([-79.39733889, -79.44024849, -79.55377158, ..., -79.44024849,
       -79.47766698, -79.39958252])

In [279]:
np.sqrt(mean_squared_error(pred_long,y_test_long))

0.0015471714004854514

In [280]:
r2 = r2_score(pred_long, y_test_long)
r2

0.9999932582820825

In [281]:
categorical_feat = ['OCC_TIME_RANGE']
numerical_feat = ['LAT_WGS84','timeint']
target_ = 'LAT_WGS84'

X_train_lat, X_test_lat, y_train_lat, y_test_lat = train_test_split(crime_data[categorical_feat + numerical_feat]
                                                    , crime_data[target_],
                                                    test_size = 0.3, random_state=123)

cat_transform = Pipeline(steps = [('onehot', OneHotEncoder(handle_unknown='ignore'))])
num_transform = Pipeline(steps = [('scaler', StandardScaler())])

preprocess = ColumnTransformer(transformers = [('cat',cat_transform, categorical_feat),
                                                 ('num', num_transform, numerical_feat)])

G_B_Reg_lat = Pipeline(steps = [
    ('preprocess', preprocess),
    ('regress', GradientBoostingRegressor())
])

G_B_Reg_lat.fit(X_train_lat, y_train_lat)

pred_lat = G_B_Reg_lat.predict(X_test_lat)

In [282]:
pred_lat

array([43.67453553, 43.75125719, 43.70108873, ..., 43.65246369,
       43.6877517 , 43.69867075])

In [283]:
pred_long

array([-79.39733889, -79.44024849, -79.55377158, ..., -79.44024849,
       -79.47766698, -79.39958252])

In [284]:
np.sqrt(mean_squared_error(pred_lat,y_test_lat))


0.0007663720372540109

In [285]:
r2_score(pred_lat,y_test_lat)

0.999999966794634

In [286]:
from folium.plugins import HeatMap

In [287]:
predicted_points = list(zip(pred_lat, pred_long))

# Create a folium map centered on Toronto
toronto_map = folium.Map(location=[43.653225, -79.383186], zoom_start=12)

# Create a heatmap layer with the predicted points
heat_map_layer = HeatMap(predicted_points, radius=15)

# Add the heatmap layer to the map
toronto_map.add_child(heat_map_layer)

# Display the map
toronto_map.save('predictions_final_1.html')


In [288]:
# Add a blue marker for the police car parking location for each neighborhood
centroid_data = crime_data.groupby('NEIGHBOURHOOD_158')[['LAT_WGS84', 'LONG_WGS84']].mean()

# Create a list to store the heatmap data
heatmap_data = []

# Add the crime location coordinates to the heatmap data list
for index, row in crime_data.iterrows():
    heatmap_data.append([row['LAT_WGS84'], row['LONG_WGS84']])

# Create the heatmap layer and add it to the map
HeatMap(heatmap_data).add_to(toronto_map)

# Display the map
toronto_map.save('predictions_final_1.html')

In [289]:
centroid_data

Unnamed: 0_level_0,LAT_WGS84,LONG_WGS84
NEIGHBOURHOOD_158,Unnamed: 1_level_1,Unnamed: 2_level_1
Agincourt North,43.804764,-79.266462
Agincourt South-Malvern West,43.787643,-79.262417
Alderwood,43.602379,-79.541462
Annex,43.670285,-79.401407
Avondale,43.757384,-79.406288
...,...,...
Yonge-Doris,43.773561,-79.413987
Yonge-Eglinton,43.701449,-79.406424
Yonge-St.Clair,43.687890,-79.395057
York University Heights,43.762383,-79.490218


In [290]:
pdd = {
    'latitude': pred_lat,
    'longitude': pred_long,
}

predicted_data_ = pd.DataFrame(pdd)
predicted_data_

Unnamed: 0,latitude,longitude
0,43.674536,-79.397339
1,43.751257,-79.440248
2,43.701089,-79.553772
3,43.751257,-79.509094
4,43.697111,-79.509094
...,...,...
1708,43.730875,-79.178968
1709,43.715369,-79.399501
1710,43.652464,-79.440248
1711,43.687752,-79.477667


In [291]:
from geopy.distance import geodesic


In [292]:
centroid_data = crime_data.groupby('NEIGHBOURHOOD_158')[['LAT_WGS84', 'LONG_WGS84']].mean().reset_index()

In [293]:
# Function to calculate distance between two points using Haversine formula
def calculate_distance(lat1, lon1, lat2, lon2):
    return geodesic((lat1, lon1), (lat2, lon2)).meters

# Function to find the closest neighborhood for a given location point
def find_closest_neighborhood(latitude, longitude, neighborhood_data):
    distances = []
    for _, row in neighborhood_data.iterrows():
        distance = calculate_distance(latitude, longitude, row['LAT_WGS84'], row['LONG_WGS84'])
        distances.append(distance)
    closest_index = distances.index(min(distances))
    return neighborhood_data.loc[closest_index, 'NEIGHBOURHOOD_158']

# Add a new column 'neighborhood' to the predicted_data DataFrame
predicted_data_['neighborhood'] = predicted_data_.apply(lambda row: find_closest_neighborhood(row['latitude'], row['longitude'], centroid_data), axis=1)

In [294]:
predicted_data_

Unnamed: 0,latitude,longitude,neighborhood
0,43.674536,-79.397339,Annex
1,43.751257,-79.440248,Clanton Park
2,43.701089,-79.553772,Kingsview Village-The Westway
3,43.751257,-79.509094,Glenfield-Jane Heights
4,43.697111,-79.509094,Weston
...,...,...,...
1708,43.730875,-79.178968,Guildwood
1709,43.715369,-79.399501,North Toronto
1710,43.652464,-79.440248,Dufferin Grove
1711,43.687752,-79.477667,Beechborough-Greenbrook


In [295]:
predicted_data_

Unnamed: 0,latitude,longitude,neighborhood
0,43.674536,-79.397339,Annex
1,43.751257,-79.440248,Clanton Park
2,43.701089,-79.553772,Kingsview Village-The Westway
3,43.751257,-79.509094,Glenfield-Jane Heights
4,43.697111,-79.509094,Weston
...,...,...,...
1708,43.730875,-79.178968,Guildwood
1709,43.715369,-79.399501,North Toronto
1710,43.652464,-79.440248,Dufferin Grove
1711,43.687752,-79.477667,Beechborough-Greenbrook


In [296]:
# Calculate centroids for each neighborhood in the predicted_data
centroid_data_predicted = predicted_data_.groupby('neighborhood')[['latitude', 'longitude']].mean().reset_index()

# Convert predicted crime data to list of points for the HeatMap
predicted_crime_points = predicted_data_[['latitude', 'longitude']].values.tolist()

# Create HeatMap for predicted crime locations
HeatMap(predicted_crime_points, radius=15).add_to(toronto_map)

# Display the map
toronto_map.save('predictions_final_1.html')


In [297]:
predicted_crime_points

[[43.67453552666002, -79.39733888521287],
 [43.75125719005796, -79.44024848534681],
 [43.70108873316162, -79.55377157609438],
 [43.75125719005796, -79.50909441406617],
 [43.69711139971001, -79.50909441406617],
 [43.69103939457374, -79.32668572393476],
 [43.811446492048916, -79.24842168774384],
 [43.63416936725679, -79.21226455147382],
 [43.69308173993112, -79.3024627990297],
 [43.70367706584088, -79.55801092475012],
 [43.75478419746817, -79.33170959247681],
 [43.70934022407598, -79.61042213015598],
 [43.67042500879449, -79.36839885108968],
 [43.69366852645224, -79.3024627990297],
 [43.648767485375764, -79.38800468758464],
 [43.758939400854615, -79.22709406233564],
 [43.73745436006534, -79.39958251914867],
 [43.71997277170111, -79.50368233528353],
 [43.65833850538834, -79.44033783394447],
 [43.71997277170111, -79.50945853400805],
 [43.763908919224626, -79.3024627990297],
 [43.69103939457374, -79.51410648497391],
 [43.75125719005796, -79.5188585510681],
 [43.78881412637156, -79.447535093

In [298]:
centroid_data_predicted

Unnamed: 0,neighborhood,latitude,longitude
0,Agincourt North,43.803298,-79.263350
1,Agincourt South-Malvern West,43.785955,-79.258910
2,Alderwood,43.598280,-79.582069
3,Annex,43.670023,-79.398264
4,Avondale,43.755939,-79.402948
...,...,...,...
149,Yonge-Doris,43.769810,-79.419960
150,Yonge-Eglinton,43.698671,-79.405105
151,Yonge-St.Clair,43.691201,-79.396334
152,York University Heights,43.761609,-79.489876


In [299]:
# Assuming you've chosen 'Neighborhood A' to park the police car, update the danger level
parked_neighborhood = 'North Toronto'
predicted_data_.loc[predicted_data_['neighborhood'] == parked_neighborhood, 'danger'] = 'low'


In [300]:
crime_counts = predicted_data_['neighborhood'].value_counts()

# Function to assign danger level based on the number of crimes
def assign_danger_level(crime_count):
    if crime_count > 15:
        return 'high'
    elif crime_count > 7:
        return 'medium'
    else:
        return 'low'

# Create a dictionary to map neighborhood to danger level
neighborhood_danger = {neighborhood: assign_danger_level(crime_count) for neighborhood, crime_count in crime_counts.items()}

# Create the 'danger' column in the predicted_data DataFrame
predicted_data_['danger'] = predicted_data_['neighborhood'].map(neighborhood_danger)

predicted_data_


Unnamed: 0,latitude,longitude,neighborhood,danger
0,43.674536,-79.397339,Annex,low
1,43.751257,-79.440248,Clanton Park,high
2,43.701089,-79.553772,Kingsview Village-The Westway,high
3,43.751257,-79.509094,Glenfield-Jane Heights,high
4,43.697111,-79.509094,Weston,high
...,...,...,...,...
1708,43.730875,-79.178968,Guildwood,medium
1709,43.715369,-79.399501,North Toronto,low
1710,43.652464,-79.440248,Dufferin Grove,low
1711,43.687752,-79.477667,Beechborough-Greenbrook,medium


In [301]:
predicted_data_['neighborhood'].value_counts()

neighborhood
Black Creek                78
Birchcliffe-Cliffside      73
Humber Summit              57
Glenfield-Jane Heights     54
The Beaches                51
                           ..
Yonge-Doris                 1
North Riverdale             1
Palmerston-Little Italy     1
Downtown Yonge East         1
Mimico-Queensway            1
Name: count, Length: 154, dtype: int64

In [302]:
crime_counts

neighborhood
Black Creek                78
Birchcliffe-Cliffside      73
Humber Summit              57
Glenfield-Jane Heights     54
The Beaches                51
                           ..
Yonge-Doris                 1
North Riverdale             1
Palmerston-Little Italy     1
Downtown Yonge East         1
Mimico-Queensway            1
Name: count, Length: 154, dtype: int64

In [303]:
centroid_danger = {neighborhood: assign_danger_level(crime_count) for neighborhood, crime_count in crime_counts.items()}

# Create the 'danger' column in the centroid DataFrame
centroid_data_predicted['danger'] = predicted_data_['neighborhood'].map(neighborhood_danger)

In [304]:
centroid_data_predicted

Unnamed: 0,neighborhood,latitude,longitude,danger
0,Agincourt North,43.803298,-79.263350,low
1,Agincourt South-Malvern West,43.785955,-79.258910,high
2,Alderwood,43.598280,-79.582069,high
3,Annex,43.670023,-79.398264,high
4,Avondale,43.755939,-79.402948,high
...,...,...,...,...
149,Yonge-Doris,43.769810,-79.419960,medium
150,Yonge-Eglinton,43.698671,-79.405105,high
151,Yonge-St.Clair,43.691201,-79.396334,high
152,York University Heights,43.761609,-79.489876,low


In [305]:
danger=['high','medium','low']

In [306]:
# Convert predicted crime data to list of points for the HeatMap
predicted_crime_points = predicted_data_[['latitude', 'longitude']].values.tolist()

# Function to change HeatMap color based on danger level
def get_heatmap_color(danger):
    if danger == 'high':
        return 'red'
    elif danger == 'low':
        return 'green'
    else:
        return 'blue' # Default to red for unknown danger level

# Create HeatMap for predicted crime locations with initial color based on danger variable
HeatMap(predicted_crime_points, radius=15, gradient={0.2: 'blue', 0.5: get_heatmap_color(danger)}).add_to(toronto_map)

# Add blue markers for the centroids (police car parking locations) of each predicted neighborhood
for _, row in centroid_data_predicted.iterrows():
    if row['danger'] == 'high':
            color = 'red'
    elif row['danger'] == 'medium':
            color = 'orange'
    else:  # low danger
        color = 'green'
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f'{row["neighborhood"]} - Police Car Parking',
        icon=folium.Icon(color=color, icon='car')
    ).add_to(toronto_map)

# Display the map
toronto_map.save('predictions_final_1.html')


In [307]:
police_stations = pd.read_csv('/Users/rupaliwadhawan/Downloads/TPS_Facilities.csv')

In [308]:
police_stations.head()

Unnamed: 0,FACILITY,ORGANIZATION,LATITUDE,LONGITUDE
0,11 Division,Toronto Police Services,43.671071,-79.460825
1,12 Division,Toronto Police Services,43.694579,-79.486876
2,13 Division,Toronto Police Services,43.698327,-79.436683
3,14 Division,Toronto Police Services,43.651297,-79.425978
4,22 Division,Toronto Police Services,43.643107,-79.529185


In [309]:
for _, row in police_stations.iterrows():
    folium.Marker(
        location=[row['LATITUDE'], row['LONGITUDE']],
        popup=f'{row["FACILITY"]} - Police Station',
        icon=folium.Icon(color='black', icon_color='white', icon='home')
    ).add_to(toronto_map)
    toronto_map.save('predictions_final_1.html')

In [310]:
police_stations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   FACILITY      17 non-null     object 
 1   ORGANIZATION  17 non-null     object 
 2   LATITUDE      17 non-null     float64
 3   LONGITUDE     17 non-null     float64
dtypes: float64(2), object(2)
memory usage: 676.0+ bytes


In [311]:
def update_heatmap(m, lat, lon, danger_level):
    # Remove previous heatmap layer if it exists
    if hasattr(m, '_children') and len(m._children) > 0:
        for key in list(m._children.keys()):
            if 'heatmap' in key:
                m._children.pop(key)

  
    toronto_map.add_child(heat_map)


In [312]:
centroid_data_predicted

Unnamed: 0,neighborhood,latitude,longitude,danger
0,Agincourt North,43.803298,-79.263350,low
1,Agincourt South-Malvern West,43.785955,-79.258910,high
2,Alderwood,43.598280,-79.582069,high
3,Annex,43.670023,-79.398264,high
4,Avondale,43.755939,-79.402948,high
...,...,...,...,...
149,Yonge-Doris,43.769810,-79.419960,medium
150,Yonge-Eglinton,43.698671,-79.405105,high
151,Yonge-St.Clair,43.691201,-79.396334,high
152,York University Heights,43.761609,-79.489876,low


In [313]:
centroid_data_predicted

Unnamed: 0,neighborhood,latitude,longitude,danger
0,Agincourt North,43.803298,-79.263350,low
1,Agincourt South-Malvern West,43.785955,-79.258910,high
2,Alderwood,43.598280,-79.582069,high
3,Annex,43.670023,-79.398264,high
4,Avondale,43.755939,-79.402948,high
...,...,...,...,...
149,Yonge-Doris,43.769810,-79.419960,medium
150,Yonge-Eglinton,43.698671,-79.405105,high
151,Yonge-St.Clair,43.691201,-79.396334,high
152,York University Heights,43.761609,-79.489876,low


In [314]:
predicted_crime_points = pd.DataFrame(predicted_crime_points)

In [315]:
predicted_crime_points

Unnamed: 0,0,1
0,43.674536,-79.397339
1,43.751257,-79.440248
2,43.701089,-79.553772
3,43.751257,-79.509094
4,43.697111,-79.509094
...,...,...
1708,43.730875,-79.178968
1709,43.715369,-79.399501
1710,43.652464,-79.440248
1711,43.687752,-79.477667


In [316]:
new_column_names = {
    0: 'latitude',
    1: 'longitude'
}
predicted_crime_points.rename(columns=new_column_names, inplace=True)

In [317]:
predicted_crime_points.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1713 entries, 0 to 1712
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   latitude   1713 non-null   float64
 1   longitude  1713 non-null   float64
dtypes: float64(2)
memory usage: 26.9 KB


In [318]:
police_stations

Unnamed: 0,FACILITY,ORGANIZATION,LATITUDE,LONGITUDE
0,11 Division,Toronto Police Services,43.671071,-79.460825
1,12 Division,Toronto Police Services,43.694579,-79.486876
2,13 Division,Toronto Police Services,43.698327,-79.436683
3,14 Division,Toronto Police Services,43.651297,-79.425978
4,22 Division,Toronto Police Services,43.643107,-79.529185
5,23 Division,Toronto Police Services,43.743866,-79.583522
6,31 Division,Toronto Police Services,43.75675,-79.527474
7,32 Division,Toronto Police Services,43.77172,-79.415084
8,33 Division,Toronto Police Services,43.751082,-79.350069
9,41 Division,Toronto Police Services,43.730806,-79.27711


In [319]:
def find_nearest_station(row):
    min_distance = float('inf')
    nearest_station = None

    for index, station in police_stations.iterrows():
        crime_point = (row['latitude'], row['longitude'])
        station_point = (station['LATITUDE'], station['LONGITUDE'])
        distance = geodesic(crime_point, station_point).kilometers

        if distance < min_distance:
            min_distance = distance
            nearest_station = station
    
    return nearest_station['FACILITY'], min_distance, nearest_station['LATITUDE'], nearest_station['LONGITUDE']

predicted_crime_points[['nearest_station', 'distance', 'PS_latitude', 'PS_longitude']] = predicted_crime_points.apply(
    lambda row: pd.Series(find_nearest_station(row)), axis=1
)


In [320]:
predicted_crime_points['response time'] = predicted_crime_points['distance']/100

In [321]:
predicted_crime_points['response time'] = predicted_crime_points['response time']*60

In [322]:
predicted_crime_points

Unnamed: 0,latitude,longitude,nearest_station,distance,PS_latitude,PS_longitude,response time
0,43.674536,-79.397339,52 Division,2.340555,43.654209,-79.389718,1.404333
1,43.751257,-79.440248,32 Division,3.045540,43.771720,-79.415084,1.827324
2,43.701089,-79.553772,23 Division,5.323191,43.743866,-79.583522,3.193914
3,43.751257,-79.509094,31 Division,1.601149,43.756750,-79.527474,0.960690
4,43.697111,-79.509094,12 Division,1.813110,43.694579,-79.486876,1.087866
...,...,...,...,...,...,...,...
1708,43.730875,-79.178968,43 Division,4.456543,43.770827,-79.174053,2.673926
1709,43.715369,-79.399501,53 Division,1.038501,43.706060,-79.400662,0.623101
1710,43.652464,-79.440248,14 Division,1.158532,43.651297,-79.425978,0.695119
1711,43.687752,-79.477667,12 Division,1.061426,43.694579,-79.486876,0.636856


In [323]:
centroid_data_predicted

Unnamed: 0,neighborhood,latitude,longitude,danger
0,Agincourt North,43.803298,-79.263350,low
1,Agincourt South-Malvern West,43.785955,-79.258910,high
2,Alderwood,43.598280,-79.582069,high
3,Annex,43.670023,-79.398264,high
4,Avondale,43.755939,-79.402948,high
...,...,...,...,...
149,Yonge-Doris,43.769810,-79.419960,medium
150,Yonge-Eglinton,43.698671,-79.405105,high
151,Yonge-St.Clair,43.691201,-79.396334,high
152,York University Heights,43.761609,-79.489876,low


In [324]:

centroid_data_predicted[['nearest_station', 'distance', 'PS_latitude', 'PS_longitude']] = centroid_data_predicted.apply(
    lambda row: pd.Series(find_nearest_station(row)), axis=1
)


In [325]:
centroid_data_predicted

Unnamed: 0,neighborhood,latitude,longitude,danger,nearest_station,distance,PS_latitude,PS_longitude
0,Agincourt North,43.803298,-79.263350,low,42 Division,2.433326,43.789374,-79.240012
1,Agincourt South-Malvern West,43.785955,-79.258910,high,42 Division,1.567864,43.789374,-79.240012
2,Alderwood,43.598280,-79.582069,high,22 Division,6.559446,43.643107,-79.529185
3,Annex,43.670023,-79.398264,high,52 Division,1.887467,43.654209,-79.389718
4,Avondale,43.755939,-79.402948,high,32 Division,2.007235,43.771720,-79.415084
...,...,...,...,...,...,...,...,...
149,Yonge-Doris,43.769810,-79.419960,medium,32 Division,0.446252,43.771720,-79.415084
150,Yonge-Eglinton,43.698671,-79.405105,high,53 Division,0.895702,43.706060,-79.400662
151,Yonge-St.Clair,43.691201,-79.396334,high,53 Division,1.687425,43.706060,-79.400662
152,York University Heights,43.761609,-79.489876,low,31 Division,3.075548,43.756750,-79.527474


In [326]:
predicted_crime_points.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1713 entries, 0 to 1712
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   latitude         1713 non-null   float64
 1   longitude        1713 non-null   float64
 2   nearest_station  1713 non-null   object 
 3   distance         1713 non-null   float64
 4   PS_latitude      1713 non-null   float64
 5   PS_longitude     1713 non-null   float64
 6   response time    1713 non-null   float64
dtypes: float64(6), object(1)
memory usage: 93.8+ KB


In [327]:
predicted_crime_points

Unnamed: 0,latitude,longitude,nearest_station,distance,PS_latitude,PS_longitude,response time
0,43.674536,-79.397339,52 Division,2.340555,43.654209,-79.389718,1.404333
1,43.751257,-79.440248,32 Division,3.045540,43.771720,-79.415084,1.827324
2,43.701089,-79.553772,23 Division,5.323191,43.743866,-79.583522,3.193914
3,43.751257,-79.509094,31 Division,1.601149,43.756750,-79.527474,0.960690
4,43.697111,-79.509094,12 Division,1.813110,43.694579,-79.486876,1.087866
...,...,...,...,...,...,...,...
1708,43.730875,-79.178968,43 Division,4.456543,43.770827,-79.174053,2.673926
1709,43.715369,-79.399501,53 Division,1.038501,43.706060,-79.400662,0.623101
1710,43.652464,-79.440248,14 Division,1.158532,43.651297,-79.425978,0.695119
1711,43.687752,-79.477667,12 Division,1.061426,43.694579,-79.486876,0.636856


In [328]:
centroid_data_predicted

Unnamed: 0,neighborhood,latitude,longitude,danger,nearest_station,distance,PS_latitude,PS_longitude
0,Agincourt North,43.803298,-79.263350,low,42 Division,2.433326,43.789374,-79.240012
1,Agincourt South-Malvern West,43.785955,-79.258910,high,42 Division,1.567864,43.789374,-79.240012
2,Alderwood,43.598280,-79.582069,high,22 Division,6.559446,43.643107,-79.529185
3,Annex,43.670023,-79.398264,high,52 Division,1.887467,43.654209,-79.389718
4,Avondale,43.755939,-79.402948,high,32 Division,2.007235,43.771720,-79.415084
...,...,...,...,...,...,...,...,...
149,Yonge-Doris,43.769810,-79.419960,medium,32 Division,0.446252,43.771720,-79.415084
150,Yonge-Eglinton,43.698671,-79.405105,high,53 Division,0.895702,43.706060,-79.400662
151,Yonge-St.Clair,43.691201,-79.396334,high,53 Division,1.687425,43.706060,-79.400662
152,York University Heights,43.761609,-79.489876,low,31 Division,3.075548,43.756750,-79.527474


In [329]:
predicted_crime_points

Unnamed: 0,latitude,longitude,nearest_station,distance,PS_latitude,PS_longitude,response time
0,43.674536,-79.397339,52 Division,2.340555,43.654209,-79.389718,1.404333
1,43.751257,-79.440248,32 Division,3.045540,43.771720,-79.415084,1.827324
2,43.701089,-79.553772,23 Division,5.323191,43.743866,-79.583522,3.193914
3,43.751257,-79.509094,31 Division,1.601149,43.756750,-79.527474,0.960690
4,43.697111,-79.509094,12 Division,1.813110,43.694579,-79.486876,1.087866
...,...,...,...,...,...,...,...
1708,43.730875,-79.178968,43 Division,4.456543,43.770827,-79.174053,2.673926
1709,43.715369,-79.399501,53 Division,1.038501,43.706060,-79.400662,0.623101
1710,43.652464,-79.440248,14 Division,1.158532,43.651297,-79.425978,0.695119
1711,43.687752,-79.477667,12 Division,1.061426,43.694579,-79.486876,0.636856


In [330]:
import requests
import json

def get_driving_distance(row):
    lat1, lon1 = row['PS_latitude'], row['PS_longitude']
    lat2, lon2 = row['latitude'], row['longitude']

    origin = f"{lat1},{lon1}"
    destination = f"{lat2},{lon2}"

    # Your API key goes here (replace 'YOUR_API_KEY' with your actual key)
    # You can obtain a key from the Google Cloud Console (https://cloud.google.com/console)
    api_key = 'AIzaSyC_EKpVyr5EIWjGfHz7hp17-OLsq8DfKR4'
    
    base_url = "https://maps.googleapis.com/maps/api/distancematrix/json?units=metric"

    request_url = f"{base_url}&origins={origin}&destinations={destination}&key={api_key}"

    response = requests.get(request_url)
    data = json.loads(response.text)

    if not data['rows'][0]['elements'][0]['status'] == 'OK':
        return None
    else:
        distance = data['rows'][0]['elements'][0]['distance']['value']  # The numeric distance in meters
        return distance / 1000  # Convert meters to kilometers

# Apply the function to each row in the dataframe
centroid_data_predicted['distance'] = centroid_data_predicted.apply(get_driving_distance, axis=1)


In [331]:
columns_to_subset = ['latitude', 'longitude']
result = centroid_data_predicted[columns_to_subset]

In [332]:
cdp = centroid_data_predicted.groupby('nearest_station')[columns_to_subset].mean().reset_index()

In [333]:
cdp

Unnamed: 0,nearest_station,latitude,longitude
0,11 Division,43.661718,-79.463606
1,12 Division,43.69238,-79.504675
2,13 Division,43.700962,-79.44025
3,14 Division,43.646283,-79.425515
4,22 Division,40.725617,-79.534312
5,23 Division,43.736576,-79.946942
6,31 Division,43.746279,-79.513077
7,32 Division,43.77769,-79.421278
8,33 Division,43.7676,-79.344694
9,41 Division,43.736641,-79.258455


In [334]:
centroid_data_predicted

Unnamed: 0,neighborhood,latitude,longitude,danger,nearest_station,distance,PS_latitude,PS_longitude
0,Agincourt North,43.803298,-79.263350,low,42 Division,3.983,43.789374,-79.240012
1,Agincourt South-Malvern West,43.785955,-79.258910,high,42 Division,2.293,43.789374,-79.240012
2,Alderwood,43.598280,-79.582069,high,22 Division,8.240,43.643107,-79.529185
3,Annex,43.670023,-79.398264,high,52 Division,2.430,43.654209,-79.389718
4,Avondale,43.755939,-79.402948,high,32 Division,2.985,43.771720,-79.415084
...,...,...,...,...,...,...,...,...
149,Yonge-Doris,43.769810,-79.419960,medium,32 Division,0.961,43.771720,-79.415084
150,Yonge-Eglinton,43.698671,-79.405105,high,53 Division,1.262,43.706060,-79.400662
151,Yonge-St.Clair,43.691201,-79.396334,high,53 Division,2.157,43.706060,-79.400662
152,York University Heights,43.761609,-79.489876,low,31 Division,3.419,43.756750,-79.527474


In [335]:
cdp = pd.DataFrame()

In [336]:
cdp['nearest_station']=centroid_data_predicted['nearest_station']

In [337]:
cdp['danger'] = centroid_data_predicted['danger']

In [341]:
def assign_danger_level_division(crime_count):
    if crime_count > 100:
        return 'high'
    elif crime_count > 50:
        return 'medium'
    else:
        return 'low'


In [342]:
crime_counts = predicted_crime_points['nearest_station'].value_counts()
cdp = {nearest_station: assign_danger_level_division(latitude) for nearest_station, latitude in crime_counts.items()}

In [343]:
cdp

{'31 Division': 'high',
 '12 Division ': 'high',
 '22 Division': 'high',
 '23 Division': 'high',
 '32 Division': 'high',
 '41 Division': 'high',
 '33 Division': 'high',
 '55 Division': 'high',
 '53 Division': 'medium',
 '42 Division': 'medium',
 '13 Division': 'medium',
 '54 Division': 'medium',
 '43 Division': 'low',
 '14 Division': 'low',
 '51 Division': 'low',
 '52 Division': 'low',
 '11 Division': 'low'}

In [344]:
cdp_division = predicted_crime_points.groupby('nearest_station')[columns_to_subset].mean().reset_index()

In [345]:
cdp_division

Unnamed: 0,nearest_station,latitude,longitude
0,11 Division,43.66259,-79.468442
1,12 Division,43.699495,-79.50676
2,13 Division,43.710249,-79.443218
3,14 Division,43.641864,-79.43765
4,22 Division,39.286694,-79.53082
5,23 Division,43.731646,-80.207582
6,31 Division,43.763556,-79.516319
7,32 Division,43.774743,-79.424715
8,33 Division,43.754792,-79.346517
9,41 Division,43.725143,-79.249041


In [346]:
cdp_division['danger'] = cdp_division['nearest_station'].map(cdp)

In [347]:
cdp_division

Unnamed: 0,nearest_station,latitude,longitude,danger
0,11 Division,43.66259,-79.468442,low
1,12 Division,43.699495,-79.50676,high
2,13 Division,43.710249,-79.443218,medium
3,14 Division,43.641864,-79.43765,low
4,22 Division,39.286694,-79.53082,high
5,23 Division,43.731646,-80.207582,high
6,31 Division,43.763556,-79.516319,high
7,32 Division,43.774743,-79.424715,high
8,33 Division,43.754792,-79.346517,high
9,41 Division,43.725143,-79.249041,high


In [348]:

cdp_division[['nearest_station', 'distance', 'PS_latitude', 'PS_longitude']] = cdp_division.apply(
    lambda row: pd.Series(find_nearest_station(row)), axis=1
)

In [349]:
cdp_division

Unnamed: 0,nearest_station,latitude,longitude,danger,distance,PS_latitude,PS_longitude
0,11 Division,43.66259,-79.468442,low,1.124815,43.671071,-79.460825
1,12 Division,43.699495,-79.50676,high,1.693455,43.694579,-79.486876
2,13 Division,43.710249,-79.443218,medium,1.425498,43.698327,-79.436683
3,14 Division,43.641864,-79.43765,low,1.409001,43.651297,-79.425978
4,22 Division,39.286694,-79.53082,high,483.8362,43.643107,-79.529185
5,23 Division,43.731646,-80.207582,high,50.291688,43.743866,-79.583522
6,31 Division,43.763556,-79.516319,high,1.174178,43.75675,-79.527474
7,32 Division,43.774743,-79.424715,high,0.845091,43.77172,-79.415084
8,33 Division,43.754792,-79.346517,high,0.501733,43.751082,-79.350069
9,41 Division,43.725143,-79.249041,high,2.347513,43.730806,-79.27711


In [350]:
cdp_division['distance'] = cdp_division.apply(get_driving_distance, axis=1)

In [351]:
cdp_division

Unnamed: 0,nearest_station,latitude,longitude,danger,distance,PS_latitude,PS_longitude
0,11 Division,43.66259,-79.468442,low,1.637,43.671071,-79.460825
1,12 Division,43.699495,-79.50676,high,2.061,43.694579,-79.486876
2,13 Division,43.710249,-79.443218,medium,3.232,43.698327,-79.436683
3,14 Division,43.641864,-79.43765,low,2.735,43.651297,-79.425978
4,22 Division,39.286694,-79.53082,high,700.446,43.643107,-79.529185
5,23 Division,43.731646,-80.207582,high,75.601,43.743866,-79.583522
6,31 Division,43.763556,-79.516319,high,1.809,43.75675,-79.527474
7,32 Division,43.774743,-79.424715,high,1.452,43.77172,-79.415084
8,33 Division,43.754792,-79.346517,high,0.719,43.751082,-79.350069
9,41 Division,43.725143,-79.249041,high,4.06,43.730806,-79.27711


In [352]:
cdp_division['response time'] = cdp_division['distance']/70

In [353]:
cdp_division['response time'] = cdp_division['response time'] + 5

In [354]:
cdp_division

Unnamed: 0,nearest_station,latitude,longitude,danger,distance,PS_latitude,PS_longitude,response time
0,11 Division,43.66259,-79.468442,low,1.637,43.671071,-79.460825,5.023386
1,12 Division,43.699495,-79.50676,high,2.061,43.694579,-79.486876,5.029443
2,13 Division,43.710249,-79.443218,medium,3.232,43.698327,-79.436683,5.046171
3,14 Division,43.641864,-79.43765,low,2.735,43.651297,-79.425978,5.039071
4,22 Division,39.286694,-79.53082,high,700.446,43.643107,-79.529185,15.006371
5,23 Division,43.731646,-80.207582,high,75.601,43.743866,-79.583522,6.080014
6,31 Division,43.763556,-79.516319,high,1.809,43.75675,-79.527474,5.025843
7,32 Division,43.774743,-79.424715,high,1.452,43.77172,-79.415084,5.020743
8,33 Division,43.754792,-79.346517,high,0.719,43.751082,-79.350069,5.010271
9,41 Division,43.725143,-79.249041,high,4.06,43.730806,-79.27711,5.058


In [355]:
cdp_division.rename(columns={'nearest_station': 'division'}, inplace=True)

In [356]:
# If you want to display specific columns, you can select those
df_subset = cdp_division[['division', 'danger', 'response time']]

# Convert the DataFrame to HTML
html = df_subset.to_html()

with open('output.html', 'w') as f:
    f.write(html)

In [None]:
cdp_division.to_csv('new_response_times_per_divsion.csv')

In [None]:
centroid_danger = {neighborhood: assign_danger_level(crime_count) for neighborhood, crime_count in crime_counts.items()}
