# PART 3: NAVIGATOR APPLICATION

**Objective:** This notebook will implement technician-navigation to study the potential benefits in productivity and operational costs.

---

In [1]:
# Data Management
import numpy as np
import pandas as pd

# Modeling
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

# Data Visualization
import geopandas as gpd
import folium
import folium.plugins as plugins
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Utils
import ast
import datetime
import ipywidgets as widgets
import random
import warnings
from datetime import datetime, timedelta
from IPython.display import display, HTML
from ipywidgets import interact, interact_manual
import pprint
from random import choice, choices, shuffle, randrange
from time import sleep
%matplotlib inline
sns.set_style('white')
sns.set_color_codes()
plt.style.use('default')
warnings.filterwarnings("ignore")
pp = pprint.PrettyPrinter(indent=0)

---
---

## 3A: Setup

**Objective**: Read in the clinic data & necessary patient records files.

---

In [2]:
past_patients_df = pd.read_csv('./uc_past_patients.csv', index_col='pt_id')
# past_patients_df

In [3]:
new_patients_df = pd.read_csv('./uc_new_patients.csv', index_col='pt_id')
# new_patients_df

In [4]:
clinics_df = pd.read_csv('./uc_clinics.csv', index_col='branch_name')
# clinics_df

---
---

## 3B: Data Processing

**Objective**: Execute minor cleaning implementations due to lost data characteristics from CSV read-in.

---

#### Convert nearby_clinics info from str to original list format:

In [5]:
clinics_df['nearby_clinics'] = clinics_df.nearby_clinics.apply(lambda x: ast.literal_eval(x))  

In [6]:
clinics_df

Unnamed: 0_level_0,lat,lon,to_denver,to_edgewater,to_wheatridge,to_rino,to_lakewood,nearby_clinics
branch_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
denver,39.739064,-104.989697,0.0,5.1,6.3,2.0,8.0,"[(rino, 2.0), (edgewater, 5.1), (wheatridge, 6..."
edgewater,39.753954,-105.067788,5.0,0.0,2.0,7.7,4.4,"[(wheatridge, 2.0), (lakewood, 4.4), (denver, ..."
wheatridge,39.766857,-105.081983,6.3,2.0,0.0,7.8,4.8,"[(edgewater, 2.0), (lakewood, 4.8), (denver, 6..."
rino,39.767328,-104.981132,1.9,7.8,7.5,0.0,10.7,"[(denver, 1.9), (wheatridge, 7.5), (edgewater,..."
lakewood,39.704552,-105.079883,7.5,4.3,12.0,11.0,0.0,"[(edgewater, 4.3), (denver, 7.5), (rino, 11.0)..."


#### Create appropriate datetime objects for visit date & check-in/out times:

In [7]:
def clean_pt_records(df):
    """Convers datetime info to appropriate formats from csv read-in data."""
    
    # Create unified date/time attribute for sorting purposes
    datetime_str = []
    for i in zip(df.visit_date, df.checkin_time):
        datetime_str.append(i[0] + ' ' + i[1])
    df['datetime'] = datetime_str

    # Sort based on datetime strings
    df = df.sort_values('datetime') #[50:200]

    # Rearrange column order
    df = df[['datetime'] + df.columns.tolist()[:-1]]
    
    # Convert visit date & check-in/out columns to datetime objects (csv converts them into strings)
    df['visit_date'] = df.visit_date.apply(lambda x: datetime.strptime(x, '%Y-%m-%d')).apply(lambda x: x.date())
    df['checkin_time'] = df.checkin_time.apply(lambda x: datetime.strptime(x, '%H:%M:%S')).apply(lambda x: x.time())
    df['checkout_time'] = df.checkout_time.apply(lambda x: datetime.strptime(x, '%H:%M:%S')).apply(lambda x: x.time())
    
    return df

In [8]:
past_patients_df = clean_pt_records(past_patients_df)
new_patients_df = clean_pt_records(new_patients_df)

In [9]:
past_patients_df

Unnamed: 0_level_0,datetime,pt_name,pt_dob,pt_age,visit_location,visit_reason,visit_code,visit_date,visit_day,checkin_time,checkout_time,rolling_ct,rolling_code,assigned_num_techs,needed_num_techs
pt_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
4000008,2021-05-01 08:06:42,Amy Everett,2008-01-26,14,rino,injury/accident,5,2021-05-01,Saturday,08:06:42,09:20:42,1,0.0,5,1
4000019,2021-05-01 08:09:46,Elijah Bryant,1988-11-12,33,rino,stomach-pain,5,2021-05-01,Saturday,08:09:46,09:36:46,2,5.0,5,1
3000015,2021-05-01 08:11:35,Jerry Trevino,1970-05-25,51,wheatridge,vaccination,3,2021-05-01,Saturday,08:11:35,08:33:35,1,0.0,5,1
5000001,2021-05-01 08:15:27,Sheila Campbell,1976-02-07,46,lakewood,cold/flu/fever,4,2021-05-01,Saturday,08:15:27,09:16:27,1,0.0,4,1
4000011,2021-05-01 08:17:01,Linda Wong,1961-08-09,60,rino,stomach-pain,5,2021-05-01,Saturday,08:17:01,09:51:01,3,5.0,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5023237,2022-04-30 19:27:59,Carl Mitchell DVM,1974-09-29,47,lakewood,rash/allergy,5,2022-04-30,Saturday,19:27:59,20:16:47,8,3.9,5,3
1026198,2022-04-30 19:35:37,Michael Payne,1992-11-23,29,denver,injury/accident,5,2022-04-30,Saturday,19:35:37,20:49:37,2,3.0,5,1
1026191,2022-04-30 19:39:02,John Sanders,2006-12-27,15,denver,cold/flu/fever,4,2022-04-30,Saturday,19:39:02,20:46:02,3,4.0,5,1
2017987,2022-04-30 19:47:23,Jessica Hickman,1989-10-29,32,edgewater,pink-eye,4,2022-04-30,Saturday,19:47:23,20:14:23,1,0.0,4,1


---
---

## 3C: Technician Navigation

**Objective**: Implement technician navigation strategies based on past patient records.

---

In [10]:
df = past_patients_df.copy()  # making copy to be used for test run below

### Re-calibrate scheduling to a lower number of assigned techs per clinic:

In [11]:
### APPROACH 1: REDUCE NUMBER OF CLIENT-BASED SCHEDULED TECHNICIANS ("old way") BY SPECIFIED AMOUNT 

# Define variables based on desired specs
reduce_by = 1

# Reduce tech assignments by specified amount
df['new_num_techs'] = df.assigned_num_techs.apply(lambda x: x-1)

In [12]:
# ### APPROACH 2: REDUCE NUMBER OF CLIENT-BASED SCHEDULED TECHNICIANS ("old way") BY DESIRED QUANTILE 

# # Define variables based on desired specs
# q = 0.75

# # Retrieve specified quantile rolling count based on each day/location's aggregated data
# schedule_df = df.groupby(['visit_location', 'visit_date', 'visit_day']) \
#     .quantile(q=q).reset_index(drop=False)[['visit_location', 'visit_date', 'rolling_ct']]

# # Compute new tech assignments based on desired quantile of rolling patient count
# schedule_df['new_num_techs'] = schedule_df.rolling_ct.apply(lambda x: int(x/3)+1 if x%3 != 0 else int(x/3))

# # Create dictionary object consisting of assigned number of techs per date&location
# schedule_zipper = zip(schedule_df.visit_date, schedule_df.visit_location, schedule_df.new_num_techs)
# schedule_dict = {}
# for i in schedule_zipper:
#     schedule_dict[(i[0], i[1])] = i[2]

# # Assign techs in patient logs based on created dictionary
# df['new_num_techs'] = df[['visit_date', 'visit_location']] \
#     .apply(lambda x: (x[0], x[1]), axis=1) \
#     .map(schedule_dict)

In [13]:
df

Unnamed: 0_level_0,datetime,pt_name,pt_dob,pt_age,visit_location,visit_reason,visit_code,visit_date,visit_day,checkin_time,checkout_time,rolling_ct,rolling_code,assigned_num_techs,needed_num_techs,new_num_techs
pt_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
4000008,2021-05-01 08:06:42,Amy Everett,2008-01-26,14,rino,injury/accident,5,2021-05-01,Saturday,08:06:42,09:20:42,1,0.0,5,1,4
4000019,2021-05-01 08:09:46,Elijah Bryant,1988-11-12,33,rino,stomach-pain,5,2021-05-01,Saturday,08:09:46,09:36:46,2,5.0,5,1,4
3000015,2021-05-01 08:11:35,Jerry Trevino,1970-05-25,51,wheatridge,vaccination,3,2021-05-01,Saturday,08:11:35,08:33:35,1,0.0,5,1,4
5000001,2021-05-01 08:15:27,Sheila Campbell,1976-02-07,46,lakewood,cold/flu/fever,4,2021-05-01,Saturday,08:15:27,09:16:27,1,0.0,4,1,3
4000011,2021-05-01 08:17:01,Linda Wong,1961-08-09,60,rino,stomach-pain,5,2021-05-01,Saturday,08:17:01,09:51:01,3,5.0,5,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5023237,2022-04-30 19:27:59,Carl Mitchell DVM,1974-09-29,47,lakewood,rash/allergy,5,2022-04-30,Saturday,19:27:59,20:16:47,8,3.9,5,3,4
1026198,2022-04-30 19:35:37,Michael Payne,1992-11-23,29,denver,injury/accident,5,2022-04-30,Saturday,19:35:37,20:49:37,2,3.0,5,1,4
1026191,2022-04-30 19:39:02,John Sanders,2006-12-27,15,denver,cold/flu/fever,4,2022-04-30,Saturday,19:39:02,20:46:02,3,4.0,5,1,4
2017987,2022-04-30 19:47:23,Jessica Hickman,1989-10-29,32,edgewater,pink-eye,4,2022-04-30,Saturday,19:47:23,20:14:23,1,0.0,4,1,3


### Simulate tech-navigation for past patient records:

Thoughts, Ideas, Areas for Improvement:

- Right now, tech count is being updated instantaenously, rather than realistic 10-15 min interval after (to account for drive time, etc.)

- Implement time-series ML?? If nearest clinic has only 1 available tech to send, then conduct time series to predict if patient count for that clinic will increase or decrease. If increase, then use next location to pull tech from, otherwise, use the current (nearest) location. ML not needed if available count is >1 because that clinic still has extra hands available and it would be a waste of computational power to conduct ML at that point

In [14]:
# Define tracker dict that holds updated values at each patient check-in log
d = {'denver': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0},
     'edgewater': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0},
     'wheatridge': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0}, 
     'rino': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0},
     'lakewood': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0}}

# Iterate through each record (evaluate situation at each patient check-in)
for indx, row in df.iterrows():
    
    # Update tracker dict with new corresponding values
    location = row['visit_location']
    checkin_time = row['checkin_time']
    num_techs = row['new_num_techs']
    needed_techs = row['needed_num_techs']
    d[location]['checkin_time'] = checkin_time
    d[location]['num_techs'] = num_techs + d[location]['num_movements']
    d[location]['needed_techs'] = needed_techs + d[location]['num_movements']
    available_techs = d[location]['num_techs'] - d[location]['needed_techs']
    d[location]['available_techs'] = available_techs
    # pp.pprint(d[location])
    
    # If location was already flagged for hitting patient:tech threshold
    if d[location]['flag'] == 1:
        
        # debug(to be removed)
        # print(location)
        # print('flag was 1')
        # pp.pprint(d[location])
        # print()
        
        # If location of current patient check-in log exceeds threshold
        if d[location]['num_techs'] < d[location]['needed_techs']:
            print(f"{checkin_time}: {location.capitalize()} needs {needed_techs - num_techs} tech!")
            # avail = [(i, d[i]['available_techs']) for i in d.keys() if d[i]['available_techs'] > 0]
            
            # Get technician availability from nearby clinics, sorted by distance
            available = []
            for clinic in clinics_df.loc[location, 'nearby_clinics']:
                avail_techs = d[clinic[0]]['available_techs']
                if avail_techs > 0:
                    available.append(clinic)
            print("Check to see what's available: ", available)
            
            # If there is an available technician
            if len(available) > 0:
                
                # Pull tech from nearby clinic
                pull_from = available[0][0]
                
                print('Pull tech from:' , pull_from.capitalize(), f' ({d[pull_from]["available_techs"]} available)')
                print(f'Original amount of techs at {location.capitalize()}: {num_techs}')
                print(f'Original amount of techs at {pull_from.capitalize()}: {d[pull_from]["num_techs"]}')
                # print('Before move, original location: ', d[location])
                # print('Before move, pull from: ', d[pull_from])

                # Update tracker dict by subtracting 1 tech from pull_from 
                d[pull_from]['num_movements'] = -1
                # Update tracker dict by adding 1 to location
                d[location]['num_movements'] = 1
                
                # print('After move, original location: ', d[location])
                # print('After move, pulled from: ', d[pull_from])
                print(f'New amount of techs at {location.capitalize()}: {num_techs + d[location]["num_movements"]}')
                print(f'New amount of techs at {pull_from.capitalize()}: {d[pull_from]["num_techs"] + d[pull_from]["num_movements"]}')
                print()
                
            else:
                print('No movement at this time; all clinics are busy.')

    else:
        # If location is not yet flagged, flag the first threshold hit
        if d[location]['num_techs'] == d[location]['needed_techs']:
            d[location]['flag'] = 1
            # print(location)
            # print('flag = 1')
            # pp.pprint(d[location])
            # print()

09:14:59: Rino needs 1 tech!
Check to see what's available:  [('wheatridge', 7.5), ('lakewood', 10.7)]
Pull tech from: Wheatridge  (3 available)
Original amount of techs at Rino: 4
Original amount of techs at Wheatridge: 4
New amount of techs at Rino: 5
New amount of techs at Wheatridge: 3

11:07:27: Denver needs 1 tech!
Check to see what's available:  [('rino', 2.0), ('wheatridge', 6.3), ('lakewood', 8.0)]
Pull tech from: Rino  (2 available)
Original amount of techs at Denver: 3
Original amount of techs at Rino: 5
New amount of techs at Denver: 4
New amount of techs at Rino: 4

12:11:55: Edgewater needs 1 tech!
Check to see what's available:  [('lakewood', 4.4), ('denver', 5.0), ('rino', 7.7)]
Pull tech from: Lakewood  (1 available)
Original amount of techs at Edgewater: 2
Original amount of techs at Lakewood: 3
New amount of techs at Edgewater: 3
New amount of techs at Lakewood: 2

12:14:06: Edgewater needs 1 tech!
Check to see what's available:  [('lakewood', 4.4), ('denver', 5.0), 