In [3]:
import json
import requests
import pickle
import datetime
import pandas as pd
import numpy as np
import gmaps
import os

# Weather API Key
# api_key uniquely belongs to Sean
api_key_loc = './../api/darksky_api.txt'

f = open(api_key_loc, "r")
key = f.readline()
f.close()

# api_key uniquely belongs to Sean
map_api_key_loc = './../api/googlemap_api.txt'

f = open(map_api_key_loc, "r")
map_key = f.readline()
f.close()

In [4]:
def get_weather_info(lat, lng, year, month, day, hr):
    dt = datetime.date(year, month, day)
    str_hr = ""
    if(hr < 10):
        str_hr = "0"+str(hr)
    else:
        str_hr = str(hr)
    tm = dt.strftime("%Y-%m-%d") + "T" + str_hr + ":00:01"  # get the exact hour's forecast

    url = "https://api.darksky.net/forecast/" + key + \
    "/" + str(lat) + "," + str(lng) + "," + tm + \
    "?exclude=minutely,flags,daily,alerts,hourly"
    
    response = requests.get(url)
    data = response.json()
    return data

In [5]:
def calculate_distance(src_lat, src_lng, cluster):
    cluster['Distance'] = cluster.apply(lambda row: \
                                        np.sqrt(np.square(row.Latitude - src_lat) + np.square(row.Longitude - src_lng)),\
                                        axis=1)
    
    return cluster

In [8]:
classifier_path = './../models/'
filename = 'rf_classifier.sav'
rf_model = pickle.load(open(os.path.join(classifier_path, filename), 'rb'))

In [9]:
rf_model

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=6,
                       oob_score=False, random_state=None, verbose=0,
                       warm_start=False)

In [10]:
# time retrieval
curr_time = datetime.datetime.now()
curr_time

datetime.datetime(2019, 11, 17, 17, 9, 22, 267259)

In [11]:
curr_time.weekday()

6

In [12]:
# get weather data
# this location is a current building location
curr_lat = 37.336431
curr_lng = -121.883980

curr_weather = get_weather_info(curr_lat, curr_lng, curr_time.year, curr_time.month, curr_time.day, curr_time.hour)

In [13]:
curr_weather

{'latitude': 37.336431,
 'longitude': -121.88398,
 'timezone': 'America/Los_Angeles',
 'currently': {'time': 1574038801,
  'summary': 'Clear',
  'icon': 'clear-night',
  'precipIntensity': 0,
  'precipProbability': 0,
  'temperature': 67.17,
  'apparentTemperature': 67.17,
  'dewPoint': 43.97,
  'humidity': 0.43,
  'pressure': 1016.2,
  'windSpeed': 4.21,
  'windGust': 8.47,
  'windBearing': 338,
  'cloudCover': 0,
  'uvIndex': 0,
  'visibility': 9.973,
  'ozone': 245.2},
 'offset': -8}

In [14]:
features = ['Year', 'Month', 'Day', 'Day_of_Year', 'Weekday', 'Hour',\
            'Humidity(%)', 'Temperature(F)', 'Visibility(mi)', 'Wind_Speed(mph)']

_year = curr_time.year
_month = curr_time.month
_day = curr_time.day
_day_of_year = curr_time.timetuple().tm_yday
_weekday = curr_time.weekday()
_hour = curr_time.hour

_humidity = curr_weather['currently']['humidity']
_temperature = curr_weather['currently']['temperature']
_visibility = curr_weather['currently']['visibility']
_windspeed = curr_weather['currently']['windSpeed']

X = pd.DataFrame([[_year, _month, _day, _day_of_year, _weekday, _hour,\
                  _humidity, _temperature, _visibility, _windspeed]],\
                  columns = features)

In [15]:
gmaps.configure(api_key=map_key)

locations = pd.DataFrame([[curr_lat, curr_lng]], columns = ['Start_Lat', 'Start_Lng'])
locations = locations[['Start_Lat','Start_Lng']]

fig = gmaps.figure(map_type='SATELLITE', center=(curr_lat, curr_lng), zoom_level=18)

symbols = gmaps.symbol_layer(locations, fill_color='red', stroke_color='red', info_box_content=str(curr_weather))
fig.add_layer(symbols)
fig

Figure(layout=FigureLayout(height='420px'))

In [16]:
result = (rf_model.predict(X) == 1.0)
print('Will there be an accident? ', result[0])

Will there be an accident?  False


## Finding Nearest Accident Hotspot

In [19]:
hotspots = pd.read_csv('./../data/cluster_info.csv')

In [20]:
hotspots = calculate_distance(curr_lat, curr_lng, hotspots)
hotspots = hotspots.sort_values(by=['Distance'])
hotspots

Unnamed: 0,Cluster,Longitude,Latitude,Distance
102,102,-121.880839,37.327054,0.009889
10,10,-121.878099,37.327630,0.010585
137,137,-121.875633,37.327972,0.011884
116,116,-121.896916,37.332725,0.013456
32,32,-121.889776,37.324095,0.013629
...,...,...,...,...
122,122,-121.772612,37.243817,0.144846
64,64,-121.769187,37.241972,0.148661
61,61,-122.027229,37.295801,0.148900
41,41,-122.032101,37.300768,0.152354


In [21]:
gmaps.configure(api_key=map_key)

hotspot = hotspots.iloc[0:1][['Latitude', 'Longitude']]
curr_location = (curr_lat, curr_lng)
hotspot = (hotspot['Latitude'].values[0], hotspot['Longitude'].values[0])
print("closest hotspot = ",hotspot)

fig = gmaps.figure()
src_to_dest = gmaps.directions_layer(curr_location, hotspot)
fig.add_layer(src_to_dest)
fig

closest hotspot =  (37.327054132075475, -121.88083898113207)


Figure(layout=FigureLayout(height='420px'))

# Different Approach

In [23]:
hotspots = pd.read_csv('./../data/cluster_info.csv')
hotspots = calculate_distance(curr_lat, curr_lng, hotspots)
hotspots

Unnamed: 0,Cluster,Longitude,Latitude,Distance
0,0,-121.911475,37.250607,0.090120
1,1,-121.870144,37.328977,0.015716
2,2,-121.904376,37.382332,0.050228
3,3,-121.908851,37.401425,0.069590
4,4,-121.852372,37.339455,0.031752
...,...,...,...,...
133,133,-121.858940,37.262721,0.077847
134,134,-121.938527,37.294895,0.068561
135,135,-121.930717,37.374337,0.060177
136,136,-121.814369,37.301270,0.077987


In [24]:
activated_hotspots = pd.DataFrame(columns=hotspots.columns)
activated_hotspots

Unnamed: 0,Cluster,Longitude,Latitude,Distance


In [26]:
for i in range(0, hotspots.shape[0]):
    curr_hotspot = hotspots.iloc[i]
    
    filepath = './../models/classifier_by_cluster/'
    filename = 'rf_classf_cluster' + str(curr_hotspot['Cluster'].astype(int)) + '.sav'
    path = os.path.join(filepath, filename)
    rf_model = pickle.load(open(path, 'rb'))
    
    result = rf_model.predict(X)[0]
    if(result == True):
        act = pd.DataFrame([[curr_hotspot.Cluster.astype(int), curr_hotspot.Longitude,\
                            curr_hotspot.Latitude, curr_hotspot.Distance]],\
                            columns = hotspots.columns)
        activated_hotspots = activated_hotspots.append(act, ignore_index = True)

In [27]:
activated_hotspots

Unnamed: 0,Cluster,Longitude,Latitude,Distance


In [28]:
gmaps.configure(api_key=map_key)

activated_hotspots = activated_hotspots[['Latitude', 'Longitude']]
curr_location = (curr_lat, curr_lng)
print("Total Number of Activated Clusters = ", activated_hotspots.shape[0])

fig = gmaps.figure(map_type='SATELLITE', center=(curr_lat, curr_lng), zoom_level=18)
symbols = gmaps.symbol_layer(activated_hotspots, fill_color='red', stroke_color='red')
fig.add_layer(symbols)
fig

Total Number of Activated Clusters =  0


Figure(layout=FigureLayout(height='420px'))

In [29]:
activated_hotspots = calculate_distance(curr_lat, curr_lng, activated_hotspots)
activated_hotspots = activated_hotspots.sort_values(by=['Distance'])

ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series

In [44]:
gmaps.configure(api_key=map_key)

activated_hotspots = activated_hotspots.iloc[0:1][['Latitude', 'Longitude']]
curr_location = (curr_lat, curr_lng)
activated_hotspot = (activated_hotspots['Latitude'].values[0], activated_hotspots['Longitude'].values[0])

print("Closest Hotspot = ", activated_hotspot)

fig = gmaps.figure()
src_to_dest = gmaps.directions_layer(curr_location, activated_hotspot)
fig.add_layer(src_to_dest)
fig

Closest Hotspot =  (37.337220153846154, -121.8977756923077)


Figure(layout=FigureLayout(height='420px'))