#Boston 311 Tutorial

This notebook will run you through the basic usage of this package to train 3 models on the Boston 311 data and use them to predict the outcome of cases from the last 30 days

##Install the package from github using pip

In [None]:
#This library is only needed for the Cox Regression Model, which is not included in this tutorial
! pip install lifelines

In [1]:
pwd()

'/home/briarmoss/Documents/Boston_311/models'

In [4]:
! pip install ../

Defaulting to user installation because normal site-packages is not writeable
Processing /home/briarmoss/Documents/Boston_311
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: boston311
  Building wheel for boston311 (pyproject.toml) ... [?25ldone
[?25h  Created wheel for boston311: filename=boston311-0.1.0-py3-none-any.whl size=19999 sha256=de53473995c392398a2cbcb68232c96c54c16d1f9dcd6f2a4025625f6f6947ce
  Stored in directory: /tmp/pip-ephem-wheel-cache-xs1j5d8c/wheels/3d/69/ee/0a6ac96b9c09c948fc0e74f2724a9703aa39749a41fa757c9e
Successfully built boston311
Installing collected packages: boston311
  Attempting uninstall: boston311
    Found existing installation: boston311 0.1
    Uninstalling boston311-0.1:
      Successfully uninstalled boston311-0.1
Successfully installed boston311-0.1.0


##Import the Boston311Model class

In [2]:
! pip show boston311

Name: boston311
Version: 0.1.0
Summary: A package for training machine learning models on Boston 311 data
Home-page: https://github.com/mindfulcoder49/Boston_311
Author: Alex Alcivar
Author-email: alex.g.alcivar49@gmail.com
License: UNKNOWN
Location: /home/briarmoss/.local/lib/python3.10/site-packages
Requires: matplotlib, numpy, pandas, scikit-learn, tensorflow
Required-by: 


In [3]:
from boston311 import Boston311LogReg, Boston311EventDecTree, Boston311SurvDecTree

2023-09-05 14:34:21.624060: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-05 14:34:21.982172: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-05 14:34:21.986261: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


##Get latest file URLS and Current Date Ranges

In [4]:
logregmodel = Boston311LogReg()
logregmodel.load('logregproperties.json', 'logreg.h5')

In [5]:
from datetime import datetime, timedelta
now = datetime.now()
thirty_days = timedelta(days=30)
thirty_days_ago = now - thirty_days
today_datestring = now.strftime("%Y-%m-%d")
thirty_days_ago_datestring = thirty_days_ago.strftime("%Y-%m-%d")

print(today_datestring, thirty_days_ago_datestring)

2023-09-05 2023-08-06


In [6]:
logregmodel.predict_date_range = {'start': thirty_days_ago_datestring, 'end': today_datestring}

In [7]:
logregmodel_prediction = logregmodel.predict()

Files with different number of columns from File 0:  []
Files with same number of columns as File 0:  [0]
Files with different column order from File 0:  []
Files with same column order as File 0:  [0]


In [8]:
logregmodel_prediction.head()

Unnamed: 0,case_enquiry_id,open_dt,sla_target_dt,closed_dt,on_time,case_status,closure_reason,case_title,subject,reason,...,location_zipcode,latitude,longitude,geom_4326,source,survival_time,event,ward_number,survival_time_hours,event_prediction
5,101005033782,2023-09-04 12:39:53,2023-09-07 04:30:00,NaT,ONTIME,Open,,Improper Storage of Trash (Barrels),Public Works Department,Code Enforcement,...,2125.0,42.32328,-71.059691,0101000020E6100000D35D38F8D1C351C03B8A35396129...,Citizens Connect App,NaT,0,7,,1.0
6,101005033789,2023-09-04 12:48:09,2023-09-07 04:30:00,NaT,ONTIME,Open,,Improper Storage of Trash (Barrels),Public Works Department,Code Enforcement,...,2125.0,42.3185,-71.060351,0101000020E61000008CDA82C8DCC351C0B168AC97C428...,Citizens Connect App,NaT,0,7,,1.0
7,101005033798,2023-09-04 12:51:00,,NaT,ONTIME,Open,,Mattress Pickup,Public Works Department,Sanitation,...,2126.0,42.26475,-71.109651,0101000020E61000008798AB8404C751C0CD869D50E321...,Constituent Call,NaT,0,18,,1.0
8,101005033801,2023-09-04 12:53:00,,NaT,ONTIME,Open,,Mattress Pickup,Public Works Department,Sanitation,...,2120.0,42.32829,-71.107271,0101000020E6100000B2909B85DDC651C0E4CEA964052A...,Constituent Call,NaT,0,10,,1.0
29114,101004973346,2023-08-07 12:40:00,,NaT,ONTIME,Open,,Mattress Pickup,Public Works Department,Sanitation,...,2124.0,42.29028,-71.074081,0101000020E61000000EB1B3BCBDC451C0D7B254E12725...,Constituent Call,NaT,0,17,,1.0


In [12]:
import pandas as pd

ml_model_name = 'logregmodel'
ml_model_type = 'logistic regression'
# set ml_model_id to 1 , an underscore and then today_datestring 
ml_model_id = '1_' + today_datestring

#create a new dataframe with new columns, ml_model_name, ml_model_type, ml_model_id
ml_model_df = pd.DataFrame({'ml_model_name': [ml_model_name], 
                                    'ml_model_type': [ml_model_type],
                                    'id': [ml_model_id],
                                    'ml_model_date': [today_datestring]})




In [13]:
#get new dataframe with just the event_prediction column from the logregmodel_prediction dataframe
logregmodel_prediction_event = logregmodel_prediction[['event_prediction','case_enquiry_id']].copy()
logregmodel_prediction_event['ml_model_id'] = ml_model_id
#add today's date to the dataframe
logregmodel_prediction_event['prediction_date'] = today_datestring
#rename the event_prediction column to prediction
logregmodel_prediction_event.rename(columns={'event_prediction': 'prediction'}, inplace=True)


In [14]:
#remove logregmodel_prediction event_prediction column
logregmodel_cases = logregmodel_prediction.drop('event_prediction', axis=1).copy()

In [19]:
#remove geom column in logregmodel_cases
logregmodel_cases = logregmodel_cases.drop('geom_4326', axis=1).copy()

In [20]:
logregmodel_cases.head()

Unnamed: 0,case_enquiry_id,open_dt,sla_target_dt,closed_dt,on_time,case_status,closure_reason,case_title,subject,reason,...,precinct,location_street_name,location_zipcode,latitude,longitude,source,survival_time,event,ward_number,survival_time_hours
5,101005033782,2023-09-04 12:39:53,2023-09-07 04:30:00,NaT,ONTIME,Open,,Improper Storage of Trash (Barrels),Public Works Department,Code Enforcement,...,709,68 Harvest St,2125.0,42.32328,-71.059691,Citizens Connect App,NaT,0,7,
6,101005033789,2023-09-04 12:48:09,2023-09-07 04:30:00,NaT,ONTIME,Open,,Improper Storage of Trash (Barrels),Public Works Department,Code Enforcement,...,709,20 Dawes St,2125.0,42.3185,-71.060351,Citizens Connect App,NaT,0,7,
7,101005033798,2023-09-04 12:51:00,,NaT,ONTIME,Open,,Mattress Pickup,Public Works Department,Sanitation,...,1806,274 Wood Ave,2126.0,42.26475,-71.109651,Constituent Call,NaT,0,18,
8,101005033801,2023-09-04 12:53:00,,NaT,ONTIME,Open,,Mattress Pickup,Public Works Department,Sanitation,...,1006,168A Fisher Ave,2120.0,42.32829,-71.107271,Constituent Call,NaT,0,10,
29114,101004973346,2023-08-07 12:40:00,,NaT,ONTIME,Open,,Mattress Pickup,Public Works Department,Sanitation,...,1705,50 Southern Ave,2124.0,42.29028,-71.074081,Constituent Call,NaT,0,17,


##Save the prediction data

In [21]:
logregmodel_cases.to_csv('311.csv', index=False)


In [17]:

logregmodel_prediction_event.to_csv('311_predictions.csv', index=False)

In [18]:

ml_model_df.to_csv('311_ml_models.csv', index=False)