# PART 3: NAVIGATOR APPLICATION

**Objective:** This notebook will implement technician-navigation to study the potential benefits in productivity and operational costs.

---

In [167]:
# Data Management
import numpy as np
import pandas as pd

# Modeling
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

# Data Visualization
import geopandas as gpd
import folium
import folium.plugins as plugins
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Utils
import ast
import datetime
import ipywidgets as widgets
import random
import warnings
from datetime import datetime, timedelta
from IPython.display import display, HTML
from ipywidgets import interact, interact_manual
import pprint
from random import choice, choices, shuffle, randrange
from time import sleep
%matplotlib inline
sns.set_style('white')
sns.set_color_codes()
plt.style.use('default')
warnings.filterwarnings("ignore")
pp = pprint.PrettyPrinter(indent=0)

---
---

## 3A: Setup

**Objective**: Read in the clinic data & necessary patient records files.

---

In [168]:
past_patients_df = pd.read_csv('./uc_past_patients.csv', index_col='pt_id')
# past_patients_df

In [169]:
new_patients_df = pd.read_csv('./uc_new_patients.csv', index_col='pt_id')
# new_patients_df

In [170]:
clinics_df = pd.read_csv('./uc_clinics.csv', index_col='branch_name')
# clinics_df

---
---

## 3B: Data Processing

**Objective**: Execute minor cleaning implementations due to lost data characteristics from CSV read-in.

---

#### Convert nearby_clinics info from str to original list format:

In [171]:
clinics_df['nearby_clinics'] = clinics_df.nearby_clinics.apply(lambda x: ast.literal_eval(x))  

In [172]:
clinics_df

Unnamed: 0_level_0,lat,lon,to_denver,to_edgewater,to_wheatridge,to_rino,to_lakewood,nearby_clinics
branch_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
denver,39.739064,-104.989697,0,12,14,7,14,"[(rino, 7.0), (edgewater, 12.0), (wheatridge, ..."
edgewater,39.753954,-105.067788,14,0,5,12,9,"[(wheatridge, 5.0), (lakewood, 9.0), (rino, 12..."
wheatridge,39.766857,-105.081983,14,5,0,10,9,"[(edgewater, 5.0), (lakewood, 9.0), (rino, 10...."
rino,39.767328,-104.981132,6,14,14,0,14,"[(denver, 6.0), (edgewater, 14.0), (wheatridge..."
lakewood,39.704552,-105.079883,12,8,8,12,0,"[(edgewater, 8.0), (wheatridge, 8.0), (denver,..."


#### Create appropriate datetime objects for visit date & check-in/out times:

In [173]:
def clean_pt_records(df):
    """Convers datetime info to appropriate formats from csv read-in data."""
    
    # Create unified date/time attribute for sorting purposes
    datetime_str = []
    for i in zip(df.visit_date, df.checkin_time):
        datetime_str.append(i[0] + ' ' + i[1])
    df['datetime'] = datetime_str

    # Sort based on datetime strings
    df = df.sort_values('datetime') #[50:200]

    # Rearrange column order
    df = df[['datetime'] + df.columns.tolist()[:-1]]
    
    # Convert visit date & check-in/out columns to datetime objects (csv converts them into strings)
    df['visit_date'] = df.visit_date.apply(lambda x: datetime.strptime(x, '%Y-%m-%d')).apply(lambda x: x.date())
    df['checkin_time'] = df.checkin_time.apply(lambda x: datetime.strptime(x, '%H:%M:%S')).apply(lambda x: x.time())
    df['checkout_time'] = df.checkout_time.apply(lambda x: datetime.strptime(x, '%H:%M:%S')).apply(lambda x: x.time())
    
    return df

In [174]:
past_patients_df = clean_pt_records(past_patients_df)
new_patients_df = clean_pt_records(new_patients_df)

In [175]:
past_patients_df

Unnamed: 0_level_0,datetime,pt_name,pt_dob,pt_age,visit_location,visit_reason,visit_code,visit_date,visit_day,checkin_time,checkout_time,rolling_ct,rolling_code,assigned_num_techs,needed_num_techs
pt_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2000002,2021-05-01 08:00:14,Taylor Hamilton,1999-10-10,22,edgewater,cough,4,2021-05-01,Saturday,08:00:14,08:48:14,1,0.0,5,1
2000004,2021-05-01 08:00:49,Larry Hamilton,2011-04-04,11,edgewater,cough,4,2021-05-01,Saturday,08:00:49,08:57:49,2,4.0,5,1
2000009,2021-05-01 08:01:09,Connor Murray,1970-09-23,51,edgewater,cough,4,2021-05-01,Saturday,08:01:09,08:55:09,3,4.0,5,1
2000003,2021-05-01 08:03:05,Amanda Russell,1948-02-04,74,edgewater,cold/flu/fever,4,2021-05-01,Saturday,08:03:05,09:15:05,4,4.0,5,2
4000002,2021-05-01 08:03:35,Lucas Mccullough,2004-03-08,18,rino,drug-test,3,2021-05-01,Saturday,08:03:35,08:26:35,1,0.0,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5023061,2022-04-30 19:42:24,Johnny Dean,1942-05-13,79,lakewood,weakness/dizziness,5,2022-04-30,Saturday,19:42:24,21:05:00,8,4.3,5,3
5023069,2022-04-30 19:42:38,Stacy Roman,1989-03-20,33,lakewood,cold/flu/fever,4,2022-04-30,Saturday,19:42:38,20:50:26,9,4.4,5,3
5023075,2022-04-30 19:45:20,Andrew Orozco,2015-11-01,6,lakewood,UTI,4,2022-04-30,Saturday,19:45:20,20:37:56,10,4.3,5,4
5023062,2022-04-30 19:47:30,Samuel Duffy,2001-07-20,20,lakewood,covid-test,4,2022-04-30,Saturday,19:47:30,20:17:06,11,4.3,5,4


---
---

## 3C: Technician Navigation

**Objective**: Implement technician navigation strategies based on past patient records.

---

Use past patients for ML part, feed new patients to model.

In [176]:
df = past_patients_df.copy()  # making copy to be used for test run below

### Re-calibrate scheduling to a lower number of assigned techs per clinic:

In [177]:
### APPROACH 1: REDUCE NUMBER OF CLIENT-BASED SCHEDULED TECHNICIANS ("old way") BY SPECIFIED AMOUNT 

# Define variables based on desired specs
reduce_by = 1

# Reduce tech assignments by specified amount
df['new_num_techs'] = df.assigned_num_techs.apply(lambda x: x-1)

In [178]:
# ### APPROACH 2: REDUCE NUMBER OF CLIENT-BASED SCHEDULED TECHNICIANS ("old way") BY DESIRED QUANTILE 

# # Define variables based on desired specs
# q = 0.75

# # Retrieve specified quantile rolling count based on each day/location's aggregated data
# schedule_df = df.groupby(['visit_location', 'visit_date', 'visit_day']) \
#     .quantile(q=q).reset_index(drop=False)[['visit_location', 'visit_date', 'rolling_ct']]

# # Compute new tech assignments based on desired quantile of rolling patient count
# schedule_df['new_num_techs'] = schedule_df.rolling_ct.apply(lambda x: int(x/3)+1 if x%3 != 0 else int(x/3))

# # Create dictionary object consisting of assigned number of techs per date&location
# schedule_zipper = zip(schedule_df.visit_date, schedule_df.visit_location, schedule_df.new_num_techs)
# schedule_dict = {}
# for i in schedule_zipper:
#     schedule_dict[(i[0], i[1])] = i[2]

# # Assign techs in patient logs based on created dictionary
# df['new_num_techs'] = df[['visit_date', 'visit_location']] \
#     .apply(lambda x: (x[0], x[1]), axis=1) \
#     .map(schedule_dict)

In [179]:
df = df[:100]
df

Unnamed: 0_level_0,datetime,pt_name,pt_dob,pt_age,visit_location,visit_reason,visit_code,visit_date,visit_day,checkin_time,checkout_time,rolling_ct,rolling_code,assigned_num_techs,needed_num_techs,new_num_techs
pt_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2000002,2021-05-01 08:00:14,Taylor Hamilton,1999-10-10,22,edgewater,cough,4,2021-05-01,Saturday,08:00:14,08:48:14,1,0.0,5,1,4
2000004,2021-05-01 08:00:49,Larry Hamilton,2011-04-04,11,edgewater,cough,4,2021-05-01,Saturday,08:00:49,08:57:49,2,4.0,5,1,4
2000009,2021-05-01 08:01:09,Connor Murray,1970-09-23,51,edgewater,cough,4,2021-05-01,Saturday,08:01:09,08:55:09,3,4.0,5,1,4
2000003,2021-05-01 08:03:05,Amanda Russell,1948-02-04,74,edgewater,cold/flu/fever,4,2021-05-01,Saturday,08:03:05,09:15:05,4,4.0,5,2,4
4000002,2021-05-01 08:03:35,Lucas Mccullough,2004-03-08,18,rino,drug-test,3,2021-05-01,Saturday,08:03:35,08:26:35,1,0.0,4,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1000024,2021-05-01 11:38:52,Dawn Jackson,1998-05-06,24,denver,cold/flu/fever,4,2021-05-01,Saturday,11:38:52,12:50:28,4,4.3,5,2,4
3000003,2021-05-01 11:39:03,Karen Tucker,1966-04-25,56,wheatridge,chest-pain,5,2021-05-01,Saturday,11:39:03,13:03:03,3,4.0,4,1,3
5000021,2021-05-01 11:42:38,Rodney Stark,2000-02-23,22,lakewood,rash/allergy,5,2021-05-01,Saturday,11:42:38,12:36:38,4,5.0,4,2,3
1000015,2021-05-01 11:47:01,Tracey Winters,1988-08-12,33,denver,UTI,4,2021-05-01,Saturday,11:47:01,12:37:01,3,4.0,5,1,4


### Simulate tech-navigation for past patient records:

Thoughts, Ideas, Areas for Improvement:

- Right now, tech count is being updated instantaenously, rather than realistic 10-15 min interval after (to account for drive time, etc.)

- Implement time-series ML?? If nearest clinic has only 1 available tech to send, then conduct time series to predict if patient count for that clinic will increase or decrease. If increase, then use next location to pull tech from, otherwise, use the current (nearest) location. ML not needed if available count is >1 because that clinic still has extra hands available and it would be a waste of computational power to conduct ML at that point

In [180]:
# Define tracker dict that holds updated values at each patient check-in log
d = {'denver': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0},
     'edgewater': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0},
     'wheatridge': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0}, 
     'rino': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0},
     'lakewood': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0}}

# Iterate through each record (evaluate situation at each patient check-in)
for indx, row in df.iterrows():
    
    # Update tracker dict with new corresponding values
    location = row['visit_location']
    checkin_time = row['checkin_time']
    num_techs = row['new_num_techs']
    needed_techs = row['needed_num_techs']
    d[location]['checkin_time'] = checkin_time
    d[location]['num_techs'] = num_techs + d[location]['num_movements']
    d[location]['needed_techs'] = needed_techs + d[location]['num_movements']
    available_techs = d[location]['num_techs'] - d[location]['needed_techs']
    d[location]['available_techs'] = available_techs
    # pp.pprint(d[location])
    
    # If location was already flagged for hitting patient:tech threshold
    if d[location]['flag'] == 1:
        
        # debug(to be removed)
        # print(location)
        # print('flag was 1')
        # pp.pprint(d[location])
        # print()
        
        # If location of current patient check-in log exceeds threshold
        if d[location]['num_techs'] < d[location]['needed_techs']:
            print(f"{checkin_time}: {location.capitalize()} needs {needed_techs - num_techs} tech!")
            # avail = [(i, d[i]['available_techs']) for i in d.keys() if d[i]['available_techs'] > 0]
            
            # Get technician availability from nearby clinics, sorted by distance
            available = []
            for clinic in clinics_df.loc[location, 'nearby_clinics']:
                avail_techs = d[clinic[0]]['available_techs']
                if avail_techs > 0:
                    available.append(clinic)
            print("Check to see what's available: ", available)
            
            # If there is an available technician
            if len(available) > 0:
                
                # Pull tech from nearby clinic
                pull_from = available[0][0]
                num_avail = d[pull_from]["available_techs"]
                if num_avail == 1:
                    print(True)
                    # What is the prob that in the next hour, there would be (# patients to break threshold) at (pull from), based on 
                    # past data
                    # what day is it? What hour is it?
                    # Look around that time - what was the average rolling count in that range. Compare to our current rollign count. Above 3 dont move, below 3 move
                print('Pull tech from:' , pull_from.capitalize(), f' ({num_avail} available)')

                departure_time = datetime.strptime(str(checkin_time), '%H:%M:%S') + timedelta(minutes=2, seconds=random.choice([i for i in range(60)]))

                print(f'Tech from {pull_from} left at {departure_time.time()}')
                print(f'Original amount of techs at {location.capitalize()}: {num_techs}')
                print(f'Original amount of techs at {pull_from.capitalize()}: {d[pull_from]["num_techs"]}')


                # Update tracker dict by subtracting 1 tech from pull_from 
                d[pull_from]['num_movements'] = -1
                # Update tracker dict by adding 1 to location
                d[location]['num_movements'] = 1
                
                travel_time = int(clinics_df.loc[pull_from, 'to_'+location]) + random.choice([0, 2, 3, 4, 5, 6])
                arrival_time = departure_time + timedelta(minutes=int(travel_time), seconds=random.choice([i for i in range(60)]))
                print(f"Tech from {pull_from} arrived at {location} at {arrival_time.time()}")
                print(f'New amount of techs at {location.capitalize()}: {num_techs + d[location]["num_movements"]}')
                print(f'New amount of techs at {pull_from.capitalize()}: {d[pull_from]["num_techs"] + d[pull_from]["num_movements"]}')
                print()
                
            else:
                print('No movement at this time; all clinics are busy.')

    else:
        # If location is not yet flagged, flag the first threshold hit
        if d[location]['num_techs'] == d[location]['needed_techs']:
            d[location]['flag'] = 1
            # print(location)
            # print('flag = 1')
            # pp.pprint(d[location])
            # print()

08:23:53: Edgewater needs 1 tech!
Check to see what's available:  [('wheatridge', 5.0), ('rino', 12.0)]
Pull tech from: Wheatridge  (2 available)
Tech from wheatridge left at 08:26:48
Original amount of techs at Edgewater: 4
Original amount of techs at Wheatridge: 3
Tech from wheatridge arrived at edgewater at 08:34:55
New amount of techs at Edgewater: 5
New amount of techs at Wheatridge: 2

08:27:40: Edgewater needs 1 tech!
Check to see what's available:  [('wheatridge', 5.0), ('rino', 12.0)]
Pull tech from: Wheatridge  (2 available)
Tech from wheatridge left at 08:30:22
Original amount of techs at Edgewater: 4
Original amount of techs at Wheatridge: 3
Tech from wheatridge arrived at edgewater at 08:39:32
New amount of techs at Edgewater: 5
New amount of techs at Wheatridge: 2

08:32:26: Edgewater needs 1 tech!
Check to see what's available:  [('wheatridge', 5.0), ('rino', 12.0)]
Pull tech from: Wheatridge  (2 available)
Tech from wheatridge left at 08:35:01
Original amount of techs a