# Create Prediction View for the best Location

This section attempts to predict where a suitable place might be to open a restaurant.<br>The existing crimes, restaurants and hot spot records are reused.

In [None]:
import pandas as pd
import numpy as np

# All the SciKit Learn Libraries Required
from sklearn import metrics
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, cross_val_score

!pip install geopy  # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import datetime
from random import randint
!pip install radar
import radar

# Use Folium to display the Maps for Visualisation
import folium
from folium import plugins
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from folium.plugins import HeatMap

from sklearn.neighbors import DistanceMetric

In [None]:
def distanceBetween2Point(row):
    kms = 6367
    dist = DistanceMetric.get_metric('haversine')
   
    lat1, lon1, lat2, lon2 = map(np.radians, [row['LATITUDE'], row['LONGITUDE'], row['LOC_LATITUDE'], row['LOC_LONGITUDE']])

    X = [[lat1, lon1], [lat2, lon2]]
    
    return kms * dist.pairwise(X)[0][1]

## Import the DataFrames

Reuse the saved dataframes<br>
We use only crime data from the year = 2018

In [None]:
# Import the MCI in the DataFrame;We use only crime data from the year = 2018
dfMCI = pd.read_csv('./data/Toronto_MCI.csv', parse_dates=['reportedda'])
#dfMCI = dfMCI[dfMCI['reportedye'] == 2018]
dfMCI = dfMCI.rename(columns={'Lat': 'LATITUDE', 'Long': 'LONGITUDE', 'reportedmo' : 'month', 'reportedho' : 'hour', 'reported_3': 'week_day'})

In [None]:
dfMCI.head()

In [None]:
#dfDinaLoc = dfMCI[dfMCI['Division']==54]
dfDinaLoc = dfMCI.copy()
dfDinaLoc = dfDinaLoc.rename(columns={'Neighbourh': 'NAME'})

df_predict = dfDinaLoc[['Division', 'NAME', 'LATITUDE', 'LONGITUDE']].copy()
df_predict.drop_duplicates(keep=False, inplace=True)

In [None]:
dfDinaLoc.shape, df_predict.shape, dfMCI.shape

## Create One-hot encoding DataFrames for MCI

Create a on hot Dataframe with the Latitude and Longitude, hour, day, months

In [None]:
df_onhotCrime_dmy = dfMCI[['Division','LATITUDE', 'LONGITUDE']]

df_onhotCrime_dmy = df_onhotCrime_dmy.join(pd.get_dummies(dfMCI.hour, prefix='h'))
df_onhotCrime_dmy = df_onhotCrime_dmy.join(pd.get_dummies(dfMCI.week_day))
df_onhotCrime_dmy = df_onhotCrime_dmy.join(pd.get_dummies(dfMCI.month))

# copy df_onhotCrime_dmy as a train dataset; used below
df_Train = df_onhotCrime_dmy.copy()

# Finally add the ward column, copied from the original Primary Description column
#df_onhotCrime_dmy['offence'] = dfMCI[['offence']]

In [None]:
df_onhotCrime_dmy.head()

## Generating crime data to calculate a prediction

In this section we generate artificial crime data. The crimes will be split between an act `0` and no act `1`.<br>
The newly created artificial record will be assigned to the restaurant and hot spot. The next step is to assign a random date.

At the end a prediction should be made whether a crime will be committed in the desired place or not.

In [None]:
df_onhotCrime_dmy['art_crime'] = np.random.randint(0, 2, df_onhotCrime_dmy.shape[0])

In [None]:
df_onhotCrime_dmy.head()

### Use the copied on hot dataset as train dataset

In [None]:
# Normalise df_onhotCrime_dmy
df_Train = df_Train.astype(float)
prep = preprocessing.StandardScaler()
fit = prep.fit(df_Train)
df_Train = fit.transform(df_Train)

# as Result we use the artificial generated crime data column
y_Train = df_onhotCrime_dmy.art_crime.values.astype(float)

### Fit a Random Forest Model

In [None]:
FM = RandomForestClassifier(n_estimators = 22, max_features = 'sqrt')
ForestModel = FM.fit(df_Train, y_Train)

### Add to the Restaurant DataFrame a random visit (Year 2018)

In [None]:
# Generate random datetime (parsing date from str values) for each entry in the dataframe
lstOfRndlstOfRndDates = []
for i in range(0, df_predict.shape[0]):
    date = radar.random_datetime(start='2018-01-01T00:00:00', stop='2018-12-31T23:59:59')
    lstOfRndlstOfRndDates.append(date)
    
df_predict['next_crime_date'] = pd.to_datetime(lstOfRndlstOfRndDates, format='%m-%d-%Y %H:%M:%S')

In [None]:
df_predict.head()

In [None]:
# Create the same structure as the original MCI dataset
df_predict['hour'] = df_predict['next_crime_date'].dt.hour
df_predict['week_day'] = df_predict['next_crime_date'].dt.day_name()
df_predict['reported_1'] = df_predict['next_crime_date'].dt.dayofweek + 1
df_predict['month'] = df_predict['next_crime_date'].dt.month_name()
df_predict['reportedye'] = df_predict['next_crime_date'].dt.year

In [None]:
df_predict.reset_index(inplace=True)
df_predict.drop('index', inplace=True, axis=1)
df_predict.head()

## Create One-hot encoding DataFrames for Restaurants

Create a on hot Dataframe with the Latitude and Longitude, hour, day, months

In [None]:
df_onhotDinas_dmy = df_predict[['Division','LATITUDE', 'LONGITUDE']]

df_onhotDinas_dmy = df_onhotDinas_dmy.join(pd.get_dummies(df_predict.hour, prefix='h'))
df_onhotDinas_dmy = df_onhotDinas_dmy.join(pd.get_dummies(df_predict.week_day))
df_onhotDinas_dmy = df_onhotDinas_dmy.join(pd.get_dummies(df_predict.month))

In [None]:
df_onhotDinas_dmy.head()

# Calculate the prediction for a crime to a location

A predictions value `0` is for possible crime at this location and `1` is for safe location

In [None]:
yhat = ForestModel.predict(df_onhotDinas_dmy)

In [None]:
#import sys
#np.set_printoptions(threshold=sys.maxsize)
yhat

## Predictions Analysis

In total 42 location were predicted as potentially safe locations to visit (19%) and 180 were classified as potentially susceptible to crime (81%).

In [None]:
# Add the predictions back to the datafame
df_predict['prediction'] = yhat.tolist()

In [None]:
df_predict.shape

In [None]:
df_predict.groupby('prediction').count()

## Visualisation of Predictions

2963 places were identified as potentially dangerous as to open a restaurant.
To present the predictions, we will visualize the data again.

We will look at the following 4 places:
1. 1000 Pape Ave, Toronto, East York, Ontario, Kanada
1. 100 hilton Avenue, Toronto, Ontario, Kanada
1. 75 Spencer Avenue, Toronto, Ontario, Kanada
1. 34 coulter Avenue, Toronto, Ontario, Kanada

The Distance Dataframe is recreated again but this time all crimes are included.

In [None]:
new_Dina_places = pd.DataFrame({'Name': ['1000 Pape Ave, Toronto, East York, Ontario, Kanada',
                  '100 hilton Avenue, Toronto, Ontario, Kanada',
                  '75 Spencer Avenue, Toronto, Ontario, Kanada',
                  '34 coulter Avenue, Toronto, Ontario, Kanada']})

geolocator = Nominatim(user_agent="Jupyter")

new_Dina_places['Location'] = new_Dina_places['Name'].apply(geolocator.geocode)
new_Dina_places['Point'] = new_Dina_places['Location'].apply(lambda loc: tuple(loc.point) if loc else None)

new_Dina_places.head()

In [None]:
df_dist = pd.DataFrame()

for name, point in zip(new_Dina_places.Name, new_Dina_places.Point):
    print('Evaluate: ', name)
    df_temp = dfMCI.copy()
    df_temp['LOC_LATITUDE'] = point[0]
    df_temp['LOC_LONGITUDE'] = point[1]
    df_dist[name] = df_temp.apply(distanceBetween2Point, axis=1)

In [None]:
df_dist.head()

### 1000 Pape Ave, Toronto, East York

In [None]:
new_Dina_places.Name[0], new_Dina_places.Point[0][0], new_Dina_places.Point[0][1]

In [None]:
dfMCI_venue.head()

In [None]:
dfMCI_venue['dist'].head()

In [None]:
# Get the Information for the first new places
new_place_name = new_Dina_places.Name[0]
new_place_latitude = new_Dina_places.Point[0][0]  
new_place_longitude = new_Dina_places.Point[0][1] 

# Create the Folium Map
heatmap = folium.Map(location=[new_place_latitude, new_place_longitude], zoom_start=16) 

# List comprehension to make out list of lists of Crimes
heat_data = [[row['LATITUDE'], 
              row['LONGITUDE']] for index, row in dfMCI_venue.iterrows()]

# Plot the crimes on the map
HeatMap(heat_data,
        min_opacity=0.5,
        max_zoom=18, 
        max_val=1.0, 
        radius=20,
        blur=30,
        gradient=None,
        overlay=True).add_to(heatmap)

# Add the Venue to the Map
folium.Marker(
    location=[new_place_latitude, new_place_longitude],
    popup=new_place_name,
    icon=folium.Icon(color='blue', icon='info-sign')
).add_to(heatmap)

heatmap.save('./data/heatNewPlace1_map.html')

# Display the map
heatmap