In [18]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import datetime as datetime
import requests
import ds100_utils
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

### Getting the Data
We first pulled all the crime data from the City of Berkeley public dataset.


In [19]:
calls_url = 'https://data.cityofberkeley.info/api/views/k2nh-s5h5/rows.csv?accessType=DOWNLOAD'
calls_file = ds100_utils.fetch_and_cache(calls_url, 'calls.csv')
calls = pd.read_csv(calls_file, warn_bad_lines=True)
calls.head()

Using cached version that was downloaded (UTC): Sun Oct 27 00:04:32 2019


Unnamed: 0,CASENO,OFFENSE,EVENTDT,EVENTTM,CVLEGEND,CVDOW,InDbDate,Block_Location,BLKADDR,City,State
0,19045459,DOMESTIC VIOLENCE,08/16/2019 12:00:00 AM,14:40,FAMILY OFFENSE,5,10/26/2019 07:01:07 AM,"FRONTAGE &UNIVERSITY AVE\nBerkeley, CA",FRONTAGE &UNIVERSITY AVE,Berkeley,CA
1,19091565,BURGLARY AUTO,08/03/2019 12:00:00 AM,21:45,BURGLARY - VEHICLE,6,10/26/2019 07:01:07 AM,"CENTER ST\nBerkeley, CA",CENTER ST,Berkeley,CA
2,19059951,DISTURBANCE,10/22/2019 12:00:00 AM,12:06,DISORDERLY CONDUCT,2,10/26/2019 07:01:08 AM,"2950 DURANT ST\nBerkeley, CA",2950 DURANT ST,Berkeley,CA
3,19091397,IDENTITY THEFT,06/20/2019 12:00:00 AM,12:45,FRAUD,4,10/26/2019 07:01:06 AM,"PO BOX 5884\nBerkeley, CA",PO BOX 5884,Berkeley,CA
4,19026262,ROBBERY,05/13/2019 12:00:00 AM,18:44,ROBBERY,1,10/26/2019 07:01:09 AM,"CALIFORNIA STREET & 62ND ST\nBerkeley, CA",CALIFORNIA STREET & 62ND ST,Berkeley,CA


How many records did we get?

In [20]:
len(calls)

4979

### Preliminary observations on the data?

1. `EVENTDT` -- Contains the incorrect time
1. `EVENTTM` -- Contains the time in 24 hour format (What timezone?)
1. `CVDOW` -- Encodes the day of the week (see data documentation).
1. `InDbDate` -- Appears to be correctly formatted and appears pretty consistent in time.
1. `Block_Location` -- a multi-line string that contains coordinates.
1. `BLKADDR` -- Appears to be the address in `Block Location`.
1. `City` and `State` seem redundant given this is supposed to be the city of Berkeley dataset.

### Making the polygon

In order to create a constraint on the rideOS API, we need to make a region. So, for each crime event, we create a small box that encompasses the intersection where the crime occurred. The dataframe contains all 4 corners of the crime event.

In [21]:
calls_lat_lon = (
    calls['Block_Location']
    .str.extract("\((\d+\.\d+)\, (-\d+\.\d+)\)")
)
val = 0.0001
calls_lat_lon.columns = ['Lat', 'Lon']
calls_lat_lon
calls['Lat'] = calls_lat_lon['Lat']
calls['Lon'] = calls_lat_lon['Lon']
calls_dropped =calls.dropna()
calls_dropped['lat++'] = calls_dropped['Lat'].apply(float).round(4)+val
calls_dropped['lon++'] = calls_dropped['Lon'].apply(float).round(4)+val
calls_dropped['lat+-'] = calls_dropped['Lat'].apply(float).round(4)+val
calls_dropped['lon+-'] = calls_dropped['Lon'].apply(float).round(4)-val
calls_dropped['lat--'] = calls_dropped['Lat'].apply(float).round(4)-val
calls_dropped['lon--'] = calls_dropped['Lon'].apply(float).round(4)-val
calls_dropped['lat-+'] = calls_dropped['Lat'].apply(float).round(4)-val
calls_dropped['lon-+'] = calls_dropped['Lon'].apply(float).round(4)+val
calls_dropped['lat++2'] = calls_dropped['Lat'].apply(float).round(4)+val
calls_dropped['lon++2'] = calls_dropped['Lon'].apply(float).round(4)+val

calls_final = calls_dropped.drop('CASENO', axis=1).drop('EVENTTM', axis=1).drop('CVLEGEND', axis=1).drop('CVDOW', axis=1)
calls_square = calls_final.drop('InDbDate', axis=1).drop('Block_Location', axis=1).drop('BLKADDR', axis=1).drop('City', axis=1).drop('State', axis=1).drop('Lat', axis=1).drop('Lon', axis=1)
calls_square= calls_square.reset_index().drop('index', axis=1)

In [22]:
calls_square.head()

Unnamed: 0,OFFENSE,EVENTDT,lat++,lon++,lat+-,lon+-,lat--,lon--,lat-+,lon-+,lat++2,lon++2
0,ROBBERY,05/30/2019 12:00:00 AM,37.871,-122.2659,37.871,-122.2661,37.8708,-122.2661,37.8708,-122.2659,37.871,-122.2659
1,ASSAULT/BATTERY MISD.,09/22/2019 12:00:00 AM,37.883,-122.2793,37.883,-122.2795,37.8828,-122.2795,37.8828,-122.2793,37.883,-122.2793
2,VEHICLE STOLEN,09/08/2019 12:00:00 AM,37.8597,-122.2972,37.8597,-122.2974,37.8595,-122.2974,37.8595,-122.2972,37.8597,-122.2972
3,THEFT FELONY (OVER $950),08/13/2019 12:00:00 AM,37.8914,-122.2799,37.8914,-122.2801,37.8912,-122.2801,37.8912,-122.2799,37.8914,-122.2799
4,DISTURBANCE,08/01/2019 12:00:00 AM,37.8738,-122.2685,37.8738,-122.2687,37.8736,-122.2687,37.8736,-122.2685,37.8738,-122.2685


### Filtering the data

In order to not overpopulate the map with crime events, we only took the data from the first three days of July. There were 61 crimes in the span of these three days.

In [37]:
# filtering the calls_square dataframe based on time
calls_square['EVENTDT'] =  calls_square['EVENTDT'].apply(pd.Timestamp)
calls_square_date_filtered = calls_square[(calls_square['EVENTDT']>pd.Timestamp(year=2019, month=7, day=1)) & (calls_square['EVENTDT']< pd.Timestamp(year=2019, month=7, day=3))]  
calls_square_date_filtered = calls_square_date_filtered.reset_index().drop("index", axis=1)
calls_square_date_filtered.head(5)

Unnamed: 0,OFFENSE,EVENTDT,lat++,lon++,lat+-,lon+-,lat--,lon--,lat-+,lon-+,lat++2,lon++2
0,MISSING JUVENILE,2019-07-01 12:00:00,37.8644,-122.2917,37.8644,-122.2919,37.8642,-122.2919,37.8642,-122.2917,37.8644,-122.2917
1,THEFT FELONY (OVER $950),2019-07-01 12:00:00,37.87,-122.3005,37.87,-122.3007,37.8698,-122.3007,37.8698,-122.3005,37.87,-122.3005
2,BURGLARY AUTO,2019-07-02 12:00:00,37.8716,-122.2726,37.8716,-122.2728,37.8714,-122.2728,37.8714,-122.2726,37.8716,-122.2726
3,BURGLARY AUTO,2019-07-02 12:00:00,37.8586,-122.2933,37.8586,-122.2935,37.8584,-122.2935,37.8584,-122.2933,37.8586,-122.2933
4,BURGLARY AUTO,2019-07-01 12:00:00,37.8802,-122.2974,37.8802,-122.2976,37.88,-122.2976,37.88,-122.2974,37.8802,-122.2974


In [265]:
# DATA USED TO FORMAT BOUNDING BOX FOR API REQUEST
def format_request_data(id_str, plusplus, plusminus, minusminus, minusplus):
    data_template = {
      "constraints": [
        {
          "id": id_str,
          "avoidArea": {
            "positions": [
              {
                "longitude": plusplus[0],
                "latitude": plusplus[1]
              },
              {
                "longitude": plusminus[0],
                "latitude": plusminus[1]
              },
              {
                "longitude": minusminus[0],
                "latitude": minusminus[1]
              },
              {
                "longitude": minusplus[0],
                "latitude": minusplus[1]
              },
              {
                "longitude": plusplus[0],
                "latitude": plusplus[1]
              }
            ]
          }
        }
      ]
    }
    return data_template

In [264]:
# AUTHORIZATION HEADERS USED IN APPLICATION
headers = {
    "Accept": "application/json",
    "X-Api-Key": "t0vu9A7eqyLtDjLPLa6vdtC2Qhe1ZLYq:Gi-96xXJzAH99imHt09zFmZTSLHo-oLqdQ7UxKxA4nMZvC2X8Tyxrb0sCJ7hLwE2",
    "Content-Type": "application/json"
}

In [261]:
# USE CONSTRAINT DATA API TO ASSOCIATE WITH OPERATOR LEVEL 
def add_constraint(auth, data):
    response = requests.post('https://api.rideos.ai/constraints/v1/AddOrReplaceConstraints', headers=auth, json=data)
    return response 

# USE RIDE HAIL OPERATIONS TO SET FLEET ROUTING CONSTRAINTS
def set_fleet(auth, fleet_id, id_start, id_end):
    ids = ["demo" + str(i) for i in range(id_start, id_end+1)]
    data = {
      "fleetId": fleet_id,
      "constraints": {
        "blacklistConstraintIds": ids
      }
    }
    response = requests.post('https://api.rideos.ai/ride-hail-operations/v1/SetFleetRoutingConstraints', headers=auth, json=data)
    return response

In [263]:
# CALLS CURL COMMAND FOR EACH ROW IN THE CRIME DATASET
def curl_commands(crime_df):
    for index, row in crime_df.iterrows():
        data = format_request_data("demo" + str(index), 
                                    (round(row["lon++"], 4), round(row["lat++"], 4)), 
                                    (round(row["lon+-"], 4), round(row["lat+-"], 4)), 
                                    (round(row["lon--"], 4), round(row["lat--"], 4)),
                                    (round(row["lon-+"], 4), round(row["lat-+"], 4)))
        response = add_constraint(headers, data)
        print(response)
    return index

In [248]:
index = curl_commands(calls_square_date_filtered)

{'constraints': [{'id': 'demo0', 'avoidArea': {'positions': [{'longitude': -122.2917, 'latitude': 37.8644}, {'longitude': -122.2919, 'latitude': 37.8644}, {'longitude': -122.2919, 'latitude': 37.8642}, {'longitude': -122.2917, 'latitude': 37.8642}, {'longitude': -122.2917, 'latitude': 37.8644}]}}]}
<Response [200]>
{'constraints': [{'id': 'demo1', 'avoidArea': {'positions': [{'longitude': -122.3005, 'latitude': 37.87}, {'longitude': -122.3007, 'latitude': 37.87}, {'longitude': -122.3007, 'latitude': 37.8698}, {'longitude': -122.3005, 'latitude': 37.8698}, {'longitude': -122.3005, 'latitude': 37.87}]}}]}
<Response [200]>
{'constraints': [{'id': 'demo2', 'avoidArea': {'positions': [{'longitude': -122.2726, 'latitude': 37.8716}, {'longitude': -122.2728, 'latitude': 37.8716}, {'longitude': -122.2728, 'latitude': 37.8714}, {'longitude': -122.2726, 'latitude': 37.8714}, {'longitude': -122.2726, 'latitude': 37.8716}]}}]}
<Response [200]>
{'constraints': [{'id': 'demo3', 'avoidArea': {'positio

<Response [200]>
{'constraints': [{'id': 'demo26', 'avoidArea': {'positions': [{'longitude': -122.2606, 'latitude': 37.8649}, {'longitude': -122.2608, 'latitude': 37.8649}, {'longitude': -122.2608, 'latitude': 37.8647}, {'longitude': -122.2606, 'latitude': 37.8647}, {'longitude': -122.2606, 'latitude': 37.8649}]}}]}
<Response [200]>
{'constraints': [{'id': 'demo27', 'avoidArea': {'positions': [{'longitude': -122.2711, 'latitude': 37.8489}, {'longitude': -122.2713, 'latitude': 37.8489}, {'longitude': -122.2713, 'latitude': 37.8487}, {'longitude': -122.2711, 'latitude': 37.8487}, {'longitude': -122.2711, 'latitude': 37.8489}]}}]}
<Response [200]>
{'constraints': [{'id': 'demo28', 'avoidArea': {'positions': [{'longitude': -122.2693, 'latitude': 37.8803}, {'longitude': -122.2695, 'latitude': 37.8803}, {'longitude': -122.2695, 'latitude': 37.8801}, {'longitude': -122.2693, 'latitude': 37.8801}, {'longitude': -122.2693, 'latitude': 37.8803}]}}]}
<Response [200]>
{'constraints': [{'id': 'demo

<Response [200]>
{'constraints': [{'id': 'demo52', 'avoidArea': {'positions': [{'longitude': -122.2781, 'latitude': 37.8945}, {'longitude': -122.2783, 'latitude': 37.8945}, {'longitude': -122.2783, 'latitude': 37.8943}, {'longitude': -122.2781, 'latitude': 37.8943}, {'longitude': -122.2781, 'latitude': 37.8945}]}}]}
<Response [200]>
{'constraints': [{'id': 'demo53', 'avoidArea': {'positions': [{'longitude': -122.2842, 'latitude': 37.8702}, {'longitude': -122.2844, 'latitude': 37.8702}, {'longitude': -122.2844, 'latitude': 37.87}, {'longitude': -122.2842, 'latitude': 37.87}, {'longitude': -122.2842, 'latitude': 37.8702}]}}]}
<Response [200]>
{'constraints': [{'id': 'demo54', 'avoidArea': {'positions': [{'longitude': -122.2625, 'latitude': 37.8629}, {'longitude': -122.2627, 'latitude': 37.8629}, {'longitude': -122.2627, 'latitude': 37.8627}, {'longitude': -122.2625, 'latitude': 37.8627}, {'longitude': -122.2625, 'latitude': 37.8629}]}}]}
<Response [200]>
{'constraints': [{'id': 'demo55',

In [262]:
FLEET_ID = "0"
START_INDEX = 0 
END_INDEX = index

In [280]:
fleet_response = set_fleet(headers, "sim_fleet_dee8f8", START_INDEX, END_INDEX)
print(fleet_response)

<Response [200]>
