#Boston 311 Tutorial

This notebook will run you through the basic usage of this package to train 3 models on the Boston 311 data and use them to predict the outcome of cases from the last 30 days

##Import the Boston311Model class

In [None]:
from boston311 import Boston311LogReg, Boston311EventDecTree, Boston311SurvDecTree

##Get latest file URLS and Current Date Ranges

In [None]:
latest_URLS = Boston311LogReg.Boston311LogReg.get311URLs()

In [None]:
print(latest_URLS)

{'2023': 'https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/e6013a93-1321-4f2a-bf91-8d8a02f1e62f/download/tmpfq283iq2.csv', '2022': 'https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/81a7b022-f8fc-4da5-80e4-b160058ca207/download/tmph4izx_fb.csv', '2021': 'https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/f53ebccd-bc61-49f9-83db-625f209c95f5/download/tmppgq9965_.csv', '2020': 'https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/6ff6a6fd-3141-4440-a880-6f60a37fe789/download/script_105774672_20210108153400_combine.csv', '2019': 'https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/ea2e4696-4a2d-429c-9807-d02eb92e0222/download/311_service_requests_2019.csv', '2018': 'https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/2be28d90-3a90-4af1-a3f6-f28c1e25880a/download/311_service_requests_2018.csv', '2017': 'https://data.boston.gov/data

In [None]:
from datetime import datetime, timedelta
now = datetime.now()
thirty_days = timedelta(days=30)
thirty_days_ago = now - thirty_days
today_datestring = now.strftime("%Y-%m-%d")
thirty_days_ago_datestring = thirty_days_ago.strftime("%Y-%m-%d")

print(today_datestring, thirty_days_ago_datestring)

2023-05-24 2023-04-24


In [None]:
#set model folder constant
MODEL_FOLDER = './daily_models'

##Define several models

In [None]:
linear_tree_model = Boston311SurvDecTree.Boston311SurvDecTree(train_date_range={'start':'2022-01-01','end':thirty_days_ago_datestring},
                            predict_date_range={'start':thirty_days_ago_datestring,'end':today_datestring},
                            feature_columns=['type','queue'],
                            scenario={'dropColumnValues': {'source':['City Worker App', 'Employee Generated']},
                                      'survivalTimeMin':0,
                                      'survivalTimeFill':'2023-05-22'},
                            files_dict=latest_URLS)

In [None]:
logistic_model = Boston311LogReg.Boston311LogReg(train_date_range={'start':'2022-01-01','end':thirty_days_ago_datestring},
                            predict_date_range={'start':thirty_days_ago_datestring,'end':today_datestring},
                            feature_columns=['type', 'queue'],
                            scenario={'dropColumnValues': {'source':['City Worker App', 'Employee Generated']},
                                      'survivalTimeMin':0},
                            files_dict=latest_URLS)

In [None]:
logistic_tree_model = Boston311EventDecTree.Boston311EventDecTree(train_date_range={'start':'2022-01-01','end':thirty_days_ago_datestring},
                            predict_date_range={'start':thirty_days_ago_datestring,'end':today_datestring},
                            feature_columns=['type', 'queue'],
                            scenario={'dropColumnValues': {'source':['City Worker App', 'Employee Generated']},
                                      'survivalTimeMin':0},
                            files_dict=latest_URLS)

##Train several models

In [None]:
logistic_tree_model.run_pipeline()

Files with different number of columns from File 0:  []
Files with same number of columns as File 0:  [0, 1]
Files with different column order from File 0:  []
Files with same column order as File 0:  [0, 1]
Starting Training at 2023-05-24 15:09:15.398923
Testing accuracy: 0.9493150684931507
Ending Training at 2023-05-24 15:09:51.630050
Training took 0:00:36.231127


In [None]:
logistic_tree_model.save(MODEL_FOLDER+'/logtree','logtree','logtreeproperties')

In [None]:
logistic_model.run_pipeline()

Files with different number of columns from File 0:  []
Files with same number of columns as File 0:  [0, 1]
Files with different column order from File 0:  []
Files with same column order as File 0:  [0, 1]
Starting Training at 2023-05-24 15:14:47.774563
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.9486145973205566
Ending Training at 2023-05-24 15:17:17.238739
Training took 0:02:29.464176


In [None]:
logistic_model.save(MODEL_FOLDER+'/logreg','logreg','logregproperties')

In [None]:
linear_tree_model.run_pipeline()

In [None]:
linear_tree_model.save(MODEL_FOLDER+'/linreg','linreg','linregproperties')