# Boston_Potholes_DataRobot_Model

Boston Pothole age predictor

In [1]:
!pip install datarobot

[33mYou are using pip version 8.1.2, however version 9.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import pandas as pd
import pandas_profiling as pdp
import matplotlib
import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
df = pd.read_csv("../../data/311__Service_Requests.csv", parse_dates=True, encoding='UTF-8', infer_datetime_format=True)
print('Loaded 311 Service Requests RAW')
print('(Rows,Columns):{}'.format(df.shape))

Loaded 311 Service Requests RAW
(Rows,Columns):(973993, 33)


In [4]:
df.head(1)

Unnamed: 0,CASE_ENQUIRY_ID,OPEN_DT,TARGET_DT,CLOSED_DT,OnTime_Status,CASE_STATUS,CLOSURE_REASON,CASE_TITLE,SUBJECT,REASON,...,precinct,land_usage,LOCATION_STREET_NAME,LOCATION_ZIPCODE,Property_Type,Property_ID,LATITUDE,LONGITUDE,Source,Geocoded_Location
0,101000493034,10/16/2012 02:45:25 PM,10/18/2012 02:45:24 PM,10/17/2012 07:36:59 AM,ONTIME,Closed,Case Resolved,Sticker Request,Public Works Department,Recycling,...,1602,,36 Park St,2122.0,,,42.3011,-71.0543,Self Service,


In [5]:
# Find the PotHoles 
potdf = df.loc[df['TYPE'].str.contains('Pot')]
potdf.shape

(77677, 33)

In [6]:
# Only care about potholes reported by Citizen sources
non_employee_source = ['Citizens Connect App', 'Constituent Call', 'Self Service', 'Twitter']

# Fixed Potholes Data Parsing for Citizens
drdf = potdf.loc[(potdf['Source'].isin(non_employee_source)) ]



In [7]:
# Drop Unused Data Columns
drdf= drdf.drop('TARGET_DT', 1,errors='ignore')
drdf= drdf.drop('expected_age', 1,errors='ignore')
drdf= drdf.drop('SubmittedPhoto', 1,errors='ignore')
drdf= drdf.drop('ClosedPhoto', 1,errors='ignore') # cleaned_df['ClosedPhoto'][147]
drdf= drdf.drop('age_in_days', 1,errors='ignore') # cleaned_df['ClosedPhoto'][147]
drdf= drdf.drop('land_usage', 1,errors='ignore')
drdf= drdf.drop('Property_Type', 1,errors='ignore')
drdf= drdf.drop('Property_ID', 1,errors='ignore')
drdf= drdf.drop('age', 1,errors='ignore')
drdf= drdf.drop('performance', 1,errors='ignore')
drdf= drdf.drop('LATITUDE', 1,errors='ignore')
drdf= drdf.drop('LONGITUDE', 1,errors='ignore')
drdf= drdf.drop('Geocoded_Location', 1,errors='ignore')

# non_employee_source = ['Citizens Connect App','Constituent Call', 'Self Service', 'Twitter']
# drdf = drdf.loc[(potdf['CASE_STATUS'] == 'Closed' ) &(potdf['Source'].isin(non_employee_source)) ]
drdf = drdf.drop('CLOSURE_REASON',1,errors='ignore')

drdf= drdf.drop('CASE_STATUS', 1, errors='ignore') #we dont need this anymore
drdf.shape

(20853, 22)

In [8]:
# Compute Age

def get_closed_age(df):
    return pd.to_datetime(df['CLOSED_DT']) - pd.to_datetime(df['OPEN_DT'])

def get_todays_age(df):
    return pd.to_datetime(df['CLOSED_DT']) - pd.to_datetime(df['OPEN_DT'])

potdf['age'] = get_closed_age(df)

drdf= drdf.drop('CLOSED_DT', 1,errors='ignore')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [9]:
# Make Label

closedpotdf = potdf['age']
drdf['age_in_days'] = closedpotdf.apply(lambda x: x.days if isinstance(x, pd.Timedelta) else 0)
drdf['age_in_days'] = drdf['age_in_days'] #.dropna()# potdf['expected_age']

drdf['age_in_hours'] = closedpotdf.apply(lambda x: int(x.to_pytimedelta().total_seconds() /60 /60 ) if isinstance(x, pd.Timedelta) else 0)
drdf['age_in_hours'] = drdf['age_in_hours'] #.dropna()# potdf['expected_age']

In [10]:
drdf = drdf.loc[(potdf['CASE_STATUS'] == 'Closed' )] # filter out closed

In [11]:
# Regression: Age in Hours Label

drdfHours = drdf.loc[(potdf['CASE_STATUS'] == 'Closed' )] # filter out closed
drdf.head(10)
drdf.to_csv('../../data/dr_potholes_hours.csv')

In [12]:
# Binary Age Classifier (24, 48, 72, 168 hours old)

drdrAgeLabel = drdf.loc[(potdf['CASE_STATUS'] == 'Closed' )] # filter out closed
drdrAgeLabel= drdrAgeLabel.drop('age', 1,errors='ignore')
drdrAgeLabel= drdrAgeLabel.drop('age_in_days', 1,errors='ignore')

age_thresholds = [24, 48, 72, 96, 120, 144, 168]

for thresholds in age_thresholds:
    drdrAgeLabel['fast_fix'] = drdf['age_in_hours'] <= thresholds
    drdrAgeLabel= drdrAgeLabel.drop('age_in_hours', 1,errors='ignore')

    drdrAgeLabel.to_csv('../../data/dr_potholes_{}hrs.csv'.format(thresholds))

In [13]:
import datarobot as dr
dr.Client(token='VAB03kNIRA4tyHqvuvm_sJPmhKu0LVqQ', endpoint='https://uniapp.datarobot.com/api/v2')

<datarobot.rest.RESTClientObject at 0x7f76acfc7668>

In [19]:
dr.Project.list()

[Project(Pothole_Fixed_In_168hr?),
 Project(Pothole_Fixed_In_144hr?),
 Project(Pothole_Fixed_In_120hr?),
 Project(Pothole_Fixed_In_96hr?),
 Project(Pothole_Fixed_In_72hr?),
 Project(Pothole_Fixed_In_48hr?),
 Project(Pothole_Fixed_In_24hr?),
 Project(BostonHousingData),
 Project(Pothole_Fixed_In_168hr?),
 Project(Pothole_Fixed_In_144hr?),
 Project(Pothole_Fixed_In_120hr?),
 Project(Pothole_Fixed_In_96hr?),
 Project(Pothole_Fixed_In_72hr?),
 Project(Pothole_Fixed_In_48hr?),
 Project(Pothole_Fixed_In_24hr?),
 Project(168hrFix-v1 Boston Potholes),
 Project(72hrFix-v1 Boston Potholes),
 Project(48hrFix-v1 Boston Potholes),
 Project(24hrFix-v1 Boston Potholes),
 Project(pots-v2 Accurate Boston Potholes),
 Project(pots-v1 Boston Potholes Age In Hours),
 Project(10k_),
 Project(cars)]

In [15]:
import time
projects = []
for threshold in age_thresholds:
    p = None
    print(threshold)
    file = "../../data/dr_potholes_{}hrs.csv".format(threshold)
    print(file)
    name ="Pothole_Fixed_In_{}hr?".format(threshold)
    print(name)
    p = dr.Project.start(file, "fast_fix", project_name=name)
    projects.append(p)

24
../../data/dr_potholes_24hrs.csv
Pothole_Fixed_In_24hr?
48
../../data/dr_potholes_48hrs.csv
Pothole_Fixed_In_48hr?
72
../../data/dr_potholes_72hrs.csv
Pothole_Fixed_In_72hr?
96
../../data/dr_potholes_96hrs.csv
Pothole_Fixed_In_96hr?
120
../../data/dr_potholes_120hrs.csv
Pothole_Fixed_In_120hr?
144
../../data/dr_potholes_144hrs.csv
Pothole_Fixed_In_144hr?
168
../../data/dr_potholes_168hrs.csv
Pothole_Fixed_In_168hr?


In [18]:
projects

[Project(Pothole_Fixed_In_24hr?),
 Project(Pothole_Fixed_In_48hr?),
 Project(Pothole_Fixed_In_72hr?),
 Project(Pothole_Fixed_In_96hr?),
 Project(Pothole_Fixed_In_120hr?),
 Project(Pothole_Fixed_In_144hr?),
 Project(Pothole_Fixed_In_168hr?)]

In [17]:
projects[0].get_features()

[Feature(neighborhood),
 Feature(CASE_ENQUIRY_ID),
 Feature(LOCATION_ZIPCODE),
 Feature(QUEUE),
 Feature(Source),
 Feature(Location),
 Feature(city_council_district),
 Feature(Department),
 Feature(OPEN_DT (Year)),
 Feature(ward),
 Feature(LOCATION_STREET_NAME),
 Feature(neighborhood_services_district),
 Feature(Unnamed: 0),
 Feature(OPEN_DT (Day of Month)),
 Feature(OnTime_Status),
 Feature(fast_fix),
 Feature(police_district),
 Feature(CASE_TITLE),
 Feature(OPEN_DT (Day of Week)),
 Feature(OPEN_DT),
 Feature(REASON),
 Feature(precinct),
 Feature(pwd_district),
 Feature(TYPE),
 Feature(OPEN_DT (Month)),
 Feature(fire_district),
 Feature(SUBJECT)]

In [21]:
!pip install ckanapi


Collecting ckanapi
  Downloading ckanapi-4.0.tar.gz
Collecting docopt (from ckanapi)
  Downloading docopt-0.6.2.tar.gz
Building wheels for collected packages: ckanapi, docopt
  Running setup.py bdist_wheel for ckanapi ... [?25ldone
[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/bd/4e/04/39e24be1f24ec7c2ad568824672415b403f01787cc5195da6c
  Running setup.py bdist_wheel for docopt ... [?25ldone
[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/b2/16/5f/c33a2bb5f2dce71205f8e65cbfd05647d79d441282be31fd82
Successfully built ckanapi docopt
Installing collected packages: docopt, ckanapi
Successfully installed ckanapi-4.0 docopt-0.6.2
[33mYou are using pip version 8.1.2, however version 9.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [32]:
# Get data
from ckanapi import RemoteCKAN
ua = 'ckanapiexample/1.0 (+http://example.com/my/website)'

demo = RemoteCKAN('https://data.boston.gov', user_agent=ua)
groups = demo.action.datastore_search(id='2968e2c0-d479-49ba-a884-4ef523ada3c0')

{'fields': [{'id': '_id', 'type': 'int4'}, {'id': 'CASE_ENQUIRY_ID', 'info': {'label': '', 'notes': 'Case reference number, which is unique. Form of number may indicate software application ownership.'}, 'type': 'text'}, {'id': 'open_dt', 'info': {'label': '', 'notes': 'Date time case was created.'}, 'type': 'timestamp'}, {'id': 'target_dt', 'info': {'label': '', 'notes': "Due date service request is expected to be resolved. Set by various entities, including Mayor's Office and Commissioners."}, 'type': 'timestamp'}, {'id': 'closed_dt', 'info': {'label': '', 'notes': 'Date case was closed.'}, 'type': 'timestamp'}, {'id': 'OnTime_Status', 'info': {'label': '', 'notes': 'Indicates whether case was closed/resolved on time.'}, 'type': 'text'}, {'id': 'CASE_STATUS', 'info': {'label': '', 'notes': 'Indicates whether case is closed.'}, 'type': 'text'}, {'id': 'CLOSURE_REASON', 'info': {'label': '', 'notes': 'Gives reason why case was closed.'}, 'type': 'text'}, {'id': 'CASE_TITLE', 'info': {'

In [33]:
groups

{'_links': {'next': '/api/action/datastore_search?offset=100',
  'start': '/api/action/datastore_search'},
 'fields': [{'id': '_id', 'type': 'int4'},
  {'id': 'CASE_ENQUIRY_ID',
   'info': {'label': '',
    'notes': 'Case reference number, which is unique. Form of number may indicate software application ownership.'},
   'type': 'text'},
  {'id': 'open_dt',
   'info': {'label': '', 'notes': 'Date time case was created.'},
   'type': 'timestamp'},
  {'id': 'target_dt',
   'info': {'label': '',
    'notes': "Due date service request is expected to be resolved. Set by various entities, including Mayor's Office and Commissioners."},
   'type': 'timestamp'},
  {'id': 'closed_dt',
   'info': {'label': '', 'notes': 'Date case was closed.'},
   'type': 'timestamp'},
  {'id': 'OnTime_Status',
   'info': {'label': '',
    'notes': 'Indicates whether case was closed/resolved on time.'},
   'type': 'text'},
  {'id': 'CASE_STATUS',
   'info': {'label': '', 'notes': 'Indicates whether case is closed

In [34]:
groups2= demo.action.datastore_search(id='2968e2c0-d479-49ba-a884-4ef523ada3c0', offset=100)

In [35]:
groups2

{'_links': {'next': '/api/action/datastore_search?offset=200',
  'start': '/api/action/datastore_search'},
 'fields': [{'id': '_id', 'type': 'int4'},
  {'id': 'CASE_ENQUIRY_ID',
   'info': {'label': '',
    'notes': 'Case reference number, which is unique. Form of number may indicate software application ownership.'},
   'type': 'text'},
  {'id': 'open_dt',
   'info': {'label': '', 'notes': 'Date time case was created.'},
   'type': 'timestamp'},
  {'id': 'target_dt',
   'info': {'label': '',
    'notes': "Due date service request is expected to be resolved. Set by various entities, including Mayor's Office and Commissioners."},
   'type': 'timestamp'},
  {'id': 'closed_dt',
   'info': {'label': '', 'notes': 'Date case was closed.'},
   'type': 'timestamp'},
  {'id': 'OnTime_Status',
   'info': {'label': '',
    'notes': 'Indicates whether case was closed/resolved on time.'},
   'type': 'text'},
  {'id': 'CASE_STATUS',
   'info': {'label': '', 'notes': 'Indicates whether case is closed