#Boston 311 Tutorial

This notebook will run you through the basic usage of this package to train 3 models on the Boston 311 mydata and use them to predict the outcome of cases from the last 30 days

In [1]:
! pip install ../

Defaulting to user installation because normal site-packages is not writeable
Processing /home/briarmoss/Documents/Boston_311
  Installing build dependencies ... [?25l

done
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hBuilding wheels for collected packages: boston311
  Building wheel for boston311 (pyproject.toml) ... [?25ldone
[?25h  Created wheel for boston311: filename=boston311-0.2.0-py3-none-any.whl size=24770 sha256=8ab99acf6fd882135c669bdd3c554ed7d515c9b9639466601ee07508ec99cfd5
  Stored in directory: /tmp/pip-ephem-wheel-cache-ni3k3092/wheels/3d/69/ee/0a6ac96b9c09c948fc0e74f2724a9703aa39749a41fa757c9e
Successfully built boston311
Installing collected packages: boston311
  Attempting uninstall: boston311
    Found existing installation: boston311 0.2.0
    Uninstalling boston311-0.2.0:
      Successfully uninstalled boston311-0.2.0
Successfully installed boston311-0.2.0


##Import the Boston311Model class

In [2]:
import os
import pandas as pd
import numpy as np
import pickle
import re
import sys
import time

In [3]:
from boston311 import Boston311KerasNN


2024-01-01 15:46:36.147052: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-01 15:46:36.147087: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-01 15:46:36.148142: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-01 15:46:36.153966: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Using TensorFlow backend


## Load extra features

In [4]:
today_datestring, tomorrow_datestring, thirty_days_ago_datestring = Boston311KerasNN().get_datestrings()

In [5]:
import glob
import os

def get_latest_model_files(model_dir="./daily_models/Boston311KerasNN/"):
    # Get all json and h5 files
    json_files = glob.glob(os.path.join(model_dir, "*.json"))
    h5_files = glob.glob(os.path.join(model_dir, "*.h5"))
    #also add .keras files to h5_files
    keras_files = glob.glob(os.path.join(model_dir, "*.keras"))
    h5_files.extend(keras_files)

    # Sort files by modification time
    json_files.sort(key=os.path.getmtime, reverse=True)
    h5_files.sort(key=os.path.getmtime, reverse=True)

    if json_files and h5_files:
        latest_json = json_files[0]
        latest_h5 = h5_files[0]
        return latest_json, latest_h5
    else:
        return None, None

# Use the function
json_file, model_file = get_latest_model_files()
print(json_file)

if json_file and model_file:
    KerasNN_model = Boston311KerasNN()
    KerasNN_model.load(json_file, model_file)
    KerasNN_model.predict_date_range['end'] = tomorrow_datestring


./daily_models/Boston311KerasNN/2024-01-01_15-39-39_Boston311KerasNN.json


2024-01-01 15:46:37.820207: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [6]:
case_data_file = 'case_data.pkl'
case_data_csv = 'all_311_cases.csv'
data = KerasNN_model.pkl_load_data(case_data_csv, case_data_file)

In [7]:

#parse CLS embedding column as array
predictions, prediction_data = KerasNN_model.predict(data=data)

columns in data before drop: Index(['case_enquiry_id', 'open_dt', 'sla_target_dt', 'closed_dt', 'on_time',
       'case_status', 'closure_reason', 'case_title', 'subject', 'reason',
       'type', 'queue', 'department', 'submitted_photo', 'closed_photo',
       'location', 'fire_district', 'pwd_district', 'city_council_district',
       'police_district', 'neighborhood', 'neighborhood_services_district',
       'ward', 'precinct', 'location_street_name', 'location_zipcode',
       'latitude', 'longitude', 'geom_4326', 'source', 'survival_time',
       'event', 'ward_number', 'survival_time_hours'],
      dtype='object')
columns to drop: Index(['case_status', 'case_title', 'city_council_district', 'closed_dt',
       'closed_photo', 'closure_reason', 'fire_district', 'geom_4326',
       'latitude', 'location', 'location_street_name', 'location_zipcode',
       'longitude', 'neighborhood', 'neighborhood_services_district',
       'on_time', 'open_dt', 'police_district', 'precinct', 'pwd_

  data = pd.concat([data, fake_df], ignore_index=True)




In [8]:
# Define a function to flatten an array into a string.
def array_to_string(arr):
    return ' '.join(map(str, arr))

# Apply the function along axis 1 (rows).
string_predictions = np.apply_along_axis(array_to_string, axis=1, arr=predictions)

# Now string_predictions is a 1D NumPy array where each element is a string
# that contains all the elements from the corresponding row in the original 2D array.
print(string_predictions)

['0.98571086 0.01112131 0.0012720719 0.0003417297 0.00013077813 0.00011743487 9.166865e-05 5.3575404e-05 2.763312e-05 3.4524404e-05 4.5948305e-05 5.9111717e-05 3.9228988e-05 3.5233887e-05 2.8057626e-05 2.4534385e-05 4.087682e-05 4.7892867e-05 3.2318778e-05 3.650561e-05 2.3478264e-05 1.3950757e-05 1.3428576e-05 8.118847e-06 1.1144418e-05 7.967572e-06 1.0177317e-05 9.87482e-06 1.128973e-05 9.5391915e-06 8.593722e-06 1.2715714e-05 5.947659e-06 8.805407e-06 9.292074e-06 6.19861e-06 9.326769e-06 7.3965116e-06 6.510644e-06 5.409634e-06 6.2378676e-06 1.5275195e-05 8.363891e-06 7.600942e-06 9.738493e-06 1.0703754e-05 4.2763495e-06 7.524434e-06 1.1431113e-05 1.13314845e-05 1.0096672e-05 8.334788e-06 8.070384e-06 4.930613e-06 1.174393e-05 8.6625205e-06 4.618142e-06 7.757868e-06 8.466696e-06 8.427464e-06 0.00033580855'
 '0.27253813 0.09703063 0.061388515 0.04948826 0.037689645 0.026524302 0.028119074 0.01824459 0.014532071 0.014634336 0.012943782 0.010535295 0.008613946 0.008687455 0.007904848 0.

In [9]:
case_enquiry_id = prediction_data['case_enquiry_id']

In [10]:
#combine case_enquiry_id and predictions into a dataframe
predictions_df = pd.DataFrame({'case_enquiry_id':case_enquiry_id, 'prediction':string_predictions})

In [11]:
import pandas as pd

#get model_name from json_file name and ml_model_date from json_file name first 8 characters which are YYYY-MM-DD and keep it to YYYY-MM-DD
model_name = json_file.split('/')[-1].split('.')[0]
ml_model_date = json_file.split('/')[-1].split('.')[0][:10]

#define an empt pandas dataframe ml_model_df
ml_model_df = pd.DataFrame(columns=['ml_model_name', 'ml_model_type', 'ml_model_date'])

#read contents of json_file into ml_model_json
with open(json_file, 'r') as f:
    ml_model_json = f.read()

ml_model_df = pd.concat([ml_model_df, pd.DataFrame([{'ml_model_name': model_name, 
                                    'ml_model_type': KerasNN_model.model_type,
                                    'ml_model_date': ml_model_date}])], ignore_index=True)

print(ml_model_df)

                          ml_model_name     ml_model_type ml_model_date  \
0  2024-01-01_15-39-39_Boston311KerasNN  Boston311KerasNN    2024-01-01   

                                       ml_model_json  
0  {"feature_columns": ["queue", "subject", "reas...  


In [12]:
all_model_cases = prediction_data.drop(['geom_4326','survival_time_hours', 'survival_time', 'event'], axis=1).copy()

In [13]:

all_model_predictions = predictions_df

In [14]:
all_model_predictions['ml_model_name'] = model_name

In [15]:
# %%
#get current datetime in Boston timezone as string
from datetime import datetime
from pytz import timezone
import pytz
boston = timezone('US/Eastern')
now = datetime.now(boston)
today_datestring = now.strftime("%Y-%m-%d")
#get time in Boston timezone as string for a filename
now = datetime.now(boston)
time_string = now.strftime("%H-%M-%S")
#define datetime string
my_datetime = today_datestring + '_' + time_string 

In [16]:
all_model_predictions['prediction_date'] = today_datestring

In [17]:


# %%
all_model_cases.to_csv(my_datetime+'_311_cases.csv', index=False)


# %%

all_model_predictions.to_csv(my_datetime+'_311_predictions.csv', index=False)

# %%

ml_model_df.to_csv(my_datetime+'_311_ml_models.csv', index=False)

# %%
#create datetime _manifest.txt file with one filename per line
with open(my_datetime+'_manifest.txt', 'w') as f:
    f.write(my_datetime+'_311_cases.csv\n')
    f.write(my_datetime+'_311_ml_models.csv\n')
    f.write(my_datetime+'_311_predictions.csv\n')

# %%
#create an export folder

SCP_COMMAND = "scp -i /home/briarmoss/.ssh/id_rsa_new" 
EXPORT_FOLDER = "briarmoss@10.0.0.81:/home/briarmoss/Documents/BODC-DEI-site/database/seeders/"

#copy the csv files to the export folder
! {SCP_COMMAND} {my_datetime}_311_cases.csv {EXPORT_FOLDER}
! {SCP_COMMAND} {my_datetime}_311_predictions.csv {EXPORT_FOLDER}
! {SCP_COMMAND} {my_datetime}_311_ml_models.csv {EXPORT_FOLDER}
! {SCP_COMMAND} {my_datetime}_manifest.txt {EXPORT_FOLDER}



# %% [markdown]
# ** Copy the files to the production server **

# %%
# Define constants for servers
PROD_USER = 'u353344964'
PROD_HOSTNAME = '195.179.236.61'
PORT_NUMBER = 65002
PROD_BASE_FOLDER = '/home/u353344964/domains/bodc-dei.org/laravel'
STAGE_BASE_FOLDER = '/home/u353344964/domains/bodc-dei.org/stagelaravel'
PROD_EXPORT_FOLDER = '/home/u353344964/domains/bodc-dei.org/laravel/database/seeders'
STAGE_EXPORT_FOLDER = '/home/u353344964/domains/bodc-dei.org/stagelaravel/database/seeders'

# %%







2024-01-01_15-46-43_311_cases.csv               0%    0     0.0KB/s   --:-- ETA

2024-01-01_15-46-43_311_cases.csv             100% 9367KB   2.7MB/s   00:03    
2024-01-01_15-46-43_311_predictions.csv       100%   15MB   2.4MB/s   00:06    
2024-01-01_15-46-43_311_ml_models.csv         100% 9675     1.7MB/s   00:00    
2024-01-01_15-46-43_manifest.txt              100%  112    13.2KB/s   00:00    


In [18]:
import os


def scp_to_server(filename, user=PROD_USER, hostname=PROD_HOSTNAME, port=PORT_NUMBER, export_folder=PROD_EXPORT_FOLDER):
    """Copy a file to the server using scp."""
    command = f"scp -P {port} {filename} {user}@{hostname}:{export_folder}"
    print(f"Executing: {command}")
    os.system(command)

# Use the function to scp files
files_to_copy = [
    f"{my_datetime}_311_cases.csv",
    f"{my_datetime}_311_ml_models.csv",
    f"{my_datetime}_311_predictions.csv",
    f"{my_datetime}_manifest.txt"
]

# Control where to copy
copy_to_prod = False
copy_to_stage = True

for file in files_to_copy:
    if copy_to_prod:
        scp_to_server(file, export_folder=PROD_EXPORT_FOLDER)
    if copy_to_stage:
        scp_to_server(file, export_folder=STAGE_EXPORT_FOLDER)


# %%
PORT_NUMBER

65002

In [19]:

if copy_to_prod:
    ! ssh -p {PORT_NUMBER} {PROD_USER}@{PROD_HOSTNAME} 'cd {PROD_BASE_FOLDER}; php artisan db:seed --class=ThreeOneOneSeeder'

if copy_to_stage:
    ! ssh -p {PORT_NUMBER} {PROD_USER}@{PROD_HOSTNAME} 'cd {STAGE_BASE_FOLDER}; php artisan db:seed --class=ThreeOneOneSeeder'