# PART 3: NAVIGATOR APPLICATION

**Objective:** This notebook will implement technician-navigation to study the potential benefits in productivity and operational costs.

---

In [1]:
# Data Management
import numpy as np
import pandas as pd

# Modeling
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

# Data Visualization
import geopandas as gpd
import folium
import folium.plugins as plugins
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Utils
import ast
import datetime
import ipywidgets as widgets
import random
import warnings
from datetime import datetime, timedelta
from IPython.display import display, HTML
from ipywidgets import interact, interact_manual
import pprint
from random import choice, choices, shuffle, randrange
from time import sleep
%matplotlib inline
sns.set_style('white')
sns.set_color_codes()
plt.style.use('default')
warnings.filterwarnings("ignore")
pp = pprint.PrettyPrinter(indent=0)

---
---

## 3A: Setup

**Objective**: Read in the clinic data & necessary patient records files.

---

In [2]:
past_patients_df = pd.read_csv('./uc_past_patients.csv', index_col='pt_id')
past_patients_df.head()

Unnamed: 0_level_0,pt_name,pt_dob,pt_age,visit_location,visit_reason,visit_code,visit_date,visit_day,checkin_time,checkout_time,rolling_ct,rolling_code,assigned_num_techs,needed_num_techs
pt_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1000010,Paul Hammond,1958-02-20,64,denver,weakness/dizziness,5,2021-05-01,Saturday,10:57:51,12:05:51,1,0.0,6,1
1000012,Jennifer Smith,2016-02-08,6,denver,covid-test,4,2021-05-01,Saturday,11:11:23,11:44:23,2,5.0,6,1
1000001,Kevin Howe,1994-02-17,28,denver,drug-test,3,2021-05-01,Saturday,11:31:16,12:08:16,3,4.5,6,1
1000017,Laura Wright,1947-08-08,74,denver,weakness/dizziness,5,2021-05-01,Saturday,11:38:16,13:00:16,3,4.0,6,1
1000007,Debra Huerta,1992-06-29,29,denver,pink-eye,4,2021-05-01,Saturday,11:38:24,12:16:00,4,4.3,6,2


In [3]:
new_patients_df = pd.read_csv('./uc_new_patients.csv', index_col='pt_id')
new_patients_df.head()

Unnamed: 0_level_0,pt_name,pt_dob,pt_age,visit_location,visit_reason,visit_code,visit_date,visit_day,checkin_time,checkout_time,rolling_ct,rolling_code,assigned_num_techs,needed_num_techs
pt_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1026609,James Casey,2017-06-12,4,denver,ache/pain,4,2022-05-01,Sunday,08:18:49,08:57:49,1,0.0,5,1
1026602,Ashley Mccarthy,1976-06-27,45,denver,ear-pain,4,2022-05-01,Sunday,08:26:46,09:04:46,2,4.0,5,1
1026611,Ana Benitez,2017-01-17,5,denver,lab-work,3,2022-05-01,Sunday,08:45:45,09:31:45,3,4.0,5,1
1026595,Miguel Peterson,1995-08-23,26,denver,cough,4,2022-05-01,Sunday,08:50:33,09:46:57,4,3.7,5,2
1026592,Margaret Horton,1984-03-02,38,denver,lab-work,3,2022-05-01,Sunday,09:00:58,09:39:58,3,3.5,5,1


In [4]:
clinics_df = pd.read_csv('./uc_clinics.csv', index_col='branch_name')
# clinics_df

---
---

## 3B: Data Processing

**Objective**: Execute minor cleaning implementations due to lost data characteristics from CSV read-in.

---

#### Convert nearby_clinics info from str to original list format:

In [5]:
clinics_df['nearby_clinics'] = clinics_df.nearby_clinics.apply(lambda x: ast.literal_eval(x))  

In [6]:
clinics_df

Unnamed: 0_level_0,lat,lon,to_denver,to_edgewater,to_wheatridge,to_rino,to_lakewood,nearby_clinics
branch_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
denver,39.739064,-104.989697,0,12,14,7,14,"[(rino, 7.0), (edgewater, 12.0), (wheatridge, ..."
edgewater,39.753954,-105.067788,14,0,5,12,9,"[(wheatridge, 5.0), (lakewood, 9.0), (rino, 12..."
wheatridge,39.766857,-105.081983,14,5,0,10,9,"[(edgewater, 5.0), (lakewood, 9.0), (rino, 10...."
rino,39.767328,-104.981132,6,14,14,0,14,"[(denver, 6.0), (edgewater, 14.0), (wheatridge..."
lakewood,39.704552,-105.079883,12,8,8,12,0,"[(edgewater, 8.0), (wheatridge, 8.0), (denver,..."


#### Create appropriate datetime objects for visit date & check-in/out times:

In [7]:
def clean_pt_records(df):
    """Convers datetime info to appropriate formats from csv read-in data."""
    
    # Create unified date/time attribute for sorting purposes
    datetime_str = []
    for i in zip(df.visit_date, df.checkin_time):
        datetime_str.append(i[0] + ' ' + i[1])
    df['datetime'] = datetime_str

    # Sort based on datetime strings
    df = df.sort_values('datetime') #[50:200]

    # Rearrange column order
    df = df[['datetime'] + df.columns.tolist()[:-1]]
    
    # Convert visit date & check-in/out columns to datetime objects (csv converts them into strings)
    df['visit_date'] = df.visit_date.apply(lambda x: datetime.strptime(x, '%Y-%m-%d')).apply(lambda x: x.date())
    df['checkin_time'] = df.checkin_time.apply(lambda x: datetime.strptime(x, '%H:%M:%S')).apply(lambda x: x.time())
    df['checkout_time'] = df.checkout_time.apply(lambda x: datetime.strptime(x, '%H:%M:%S')).apply(lambda x: x.time())
    
    return df

In [8]:
past_patients_df = clean_pt_records(past_patients_df)
new_patients_df = clean_pt_records(new_patients_df)


#### Feature engineer new columns

In [9]:
# Weekend column - 1 for weekend, 0 for weekenday
def feature_engineer(df):
    df['weekend'] = df['visit_day'].copy().apply(lambda x: 1 if x in ['Saturday', 'Sunday'] else 0)
    df['hour'] = df['checkin_time'].copy().apply(lambda x: x.hour)
    df = pd.concat([df, pd.get_dummies(df['visit_location'])], axis=1)
    df['hour'] = df.hour.astype('category')
    # print(df.dtypes)
    return df

In [10]:
new_patients_df = feature_engineer(new_patients_df)
past_patients_df = feature_engineer(past_patients_df)

In [11]:
past_patients_df

Unnamed: 0_level_0,datetime,pt_name,pt_dob,pt_age,visit_location,visit_reason,visit_code,visit_date,visit_day,checkin_time,...,rolling_code,assigned_num_techs,needed_num_techs,weekend,hour,denver,edgewater,lakewood,rino,wheatridge
pt_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4000011,2021-05-01 08:05:11,Linda Wong,2010-10-29,11,rino,stomach-pain,5,2021-05-01,Saturday,08:05:11,...,0.0,4,1,1,8,0,0,0,1,0
2000004,2021-05-01 08:09:09,Larry Hamilton,1956-08-24,65,edgewater,vaccination,3,2021-05-01,Saturday,08:09:09,...,0.0,4,1,1,8,0,1,0,0,0
2000015,2021-05-01 08:10:05,Leah Wright,1946-07-29,75,edgewater,vaccination,3,2021-05-01,Saturday,08:10:05,...,3.0,4,1,1,8,0,1,0,0,0
2000008,2021-05-01 08:10:36,Jasmine James,1979-03-06,43,edgewater,injury/accident,5,2021-05-01,Saturday,08:10:36,...,3.0,4,1,1,8,0,1,0,0,0
4000007,2021-05-01 08:17:26,Drew Bush,1983-04-17,39,rino,cold/flu/fever,4,2021-05-01,Saturday,08:17:26,...,5.0,4,1,1,8,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5023118,2022-04-30 19:41:51,Mariah White,1986-05-09,36,lakewood,covid-test,4,2022-04-30,Saturday,19:41:51,...,4.6,4,3,1,19,0,0,1,0,0
1026583,2022-04-30 19:45:58,Matthew Mccullough,1976-08-02,45,denver,cold/flu/fever,4,2022-04-30,Saturday,19:45:58,...,4.0,4,2,1,19,1,0,0,0,0
5023117,2022-04-30 19:50:39,Dennis Wade,1990-12-08,31,lakewood,sore-throat,4,2022-04-30,Saturday,19:50:39,...,4.6,4,4,1,19,0,0,1,0,0
5023120,2022-04-30 19:52:24,John Mccormick,2016-09-05,5,lakewood,covid-test,4,2022-04-30,Saturday,19:52:24,...,4.5,4,4,1,19,0,0,1,0,0


---
---

## 3C: Modeling

**Objective**: Construct, validate, and evaluate model to predict rolling count based on past patient records.

---

In [12]:
X = past_patients_df[['denver', 'edgewater', 'wheatridge', 'rino', 'lakewood', 'weekend', 'hour']].copy()
y = past_patients_df['needed_num_techs'].copy()

In [13]:
X

Unnamed: 0_level_0,denver,edgewater,wheatridge,rino,lakewood,weekend,hour
pt_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4000011,0,0,0,1,0,1,8
2000004,0,1,0,0,0,1,8
2000015,0,1,0,0,0,1,8
2000008,0,1,0,0,0,1,8
4000007,0,0,0,1,0,1,8
...,...,...,...,...,...,...,...
5023118,0,0,0,0,1,1,19
1026583,1,0,0,0,0,1,19
5023117,0,0,0,0,1,1,19
5023120,0,0,0,0,1,1,19


In [14]:
y

pt_id
4000011    1
2000004    1
2000015    1
2000008    1
4000007    1
          ..
5023118    3
1026583    2
5023117    4
5023120    4
5023122    4
Name: needed_num_techs, Length: 109305, dtype: int64

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=42)

In [16]:
# lr = LinearRegression()
# lr.fit(X_train,y_train)
# y_train_pred = lr.predict(X_train)
# y_test_pred = lr.predict(X_test)
# np.sqrt(mean_squared_error(y_train, y_train_pred)), np.sqrt(mean_squared_error(y_test, y_test_pred))

In [17]:
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
y_train_pred = rf.predict(X_train)
y_test_pred = rf.predict(X_test)
print("Train RMSE:", np.sqrt(mean_squared_error(y_train, y_train_pred)))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_test_pred)))


Train RMSE: 0.9534206187645924
Test RMSE: 0.9551495499336166


In [18]:
X_train.loc[1006918]

denver         1
edgewater      0
wheatridge     0
rino           0
lakewood       0
weekend        0
hour          11
Name: 1006918, dtype: int64

In [19]:
X_train[X_train.index == 1006918]

Unnamed: 0_level_0,denver,edgewater,wheatridge,rino,lakewood,weekend,hour
pt_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1006918,1,0,0,0,0,0,11


In [20]:
rf.predict(X_train[X_train.index == 1006918])[0]

2.906637417104901

---
---

## 3D: Technician Navigation

**Objective**: Implement technician navigation strategies based on past patient records.

---

Use past patients for ML part, feed new patients to model.

### Re-calibrate scheduling to a lower number of assigned techs per clinic:

In [21]:
### APPROACH 1: REDUCE NUMBER OF CLIENT-BASED SCHEDULED TECHNICIANS ("old way") BY SPECIFIED AMOUNT 

# Define variables based on desired specs
reduce_by = 1

# Reduce tech assignments by specified amount
past_patients_df['new_num_techs'] = past_patients_df.assigned_num_techs.apply(lambda x: x-1)
new_patients_df['new_num_techs'] = new_patients_df.assigned_num_techs.apply(lambda x: x-1)
new_patients_df[['visit_location', 'visit_date', 'assigned_num_techs', 'new_num_techs']]

Unnamed: 0_level_0,visit_location,visit_date,assigned_num_techs,new_num_techs
pt_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4020227,rino,2022-05-01,4,3
4020228,rino,2022-05-01,4,3
4020229,rino,2022-05-01,4,3
4020232,rino,2022-05-01,4,3
4020230,rino,2022-05-01,4,3
...,...,...,...,...
3023134,wheatridge,2022-05-31,4,3
2019594,edgewater,2022-05-31,3,2
5025139,lakewood,2022-05-31,5,4
4021961,rino,2022-05-31,4,3


In [22]:
# ### APPROACH 2: REDUCE NUMBER OF CLIENT-BASED SCHEDULED TECHNICIANS ("old way") BY DESIRED QUANTILE 

# # Define variables based on desired specs
# q = 0.75

# # Retrieve specified quantile rolling count based on each day/location's aggregated data
# schedule_df = df.groupby(['visit_location', 'visit_date', 'visit_day']) \
#     .quantile(q=q).reset_index(drop=False)[['visit_location', 'visit_date', 'rolling_ct']]

# # Compute new tech assignments based on desired quantile of rolling patient count
# schedule_df['new_num_techs'] = schedule_df.rolling_ct.apply(lambda x: int(x/3)+1 if x%3 != 0 else int(x/3))

# # Create dictionary object consisting of assigned number of techs per date&location
# schedule_zipper = zip(schedule_df.visit_date, schedule_df.visit_location, schedule_df.new_num_techs)
# schedule_dict = {}
# for i in schedule_zipper:
#     schedule_dict[(i[0], i[1])] = i[2]

# # Assign techs in patient logs based on created dictionary
# df['new_num_techs'] = df[['visit_date', 'visit_location']] \
#     .apply(lambda x: (x[0], x[1]), axis=1) \
#     .map(schedule_dict)

In [23]:
cols = new_patients_df.columns.tolist()

### Simulate tech-navigation for past patient records:

Thoughts, Ideas, Areas for Improvement:

- Right now, tech count is being updated instantaenously, rather than realistic 10-15 min interval after (to account for drive time, etc.)

- Implement time-series ML?? If nearest clinic has only 1 available tech to send, then conduct time series to predict if patient count for that clinic will increase or decrease. If increase, then use next location to pull tech from, otherwise, use the current (nearest) location. ML not needed if available count is >1 because that clinic still has extra hands available and it would be a waste of computational power to conduct ML at that point

In [42]:
# Define tracker dict that holds updated values at each patient check-in log
movements = []
currentNumTechs = []

def TecNav(df):
    global movements
    d = {'denver': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0},
        'edgewater': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0},
        'wheatridge': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0}, 
        'rino': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0},
        'lakewood': {'checkin_time': None, 'num_techs': 0,'needed_techs': 0, 'flag': 0, 'available_techs': 0, 'num_movements': 0}}

    # Iterate through each record (evaluate situation at each patient check-in)
    for indx, row in df.iterrows():
        
        # Update tracker dict with new corresponding values
        location = row['visit_location']
        checkin_time = row['checkin_time']
        num_techs = row['new_num_techs']
        needed_techs = row['needed_num_techs']
        d[location]['checkin_time'] = checkin_time
        current_num_techs = num_techs + d[location]['num_movements']
        currentNumTechs.append(current_num_techs)
        d[location]['num_techs'] = current_num_techs
        d[location]['needed_techs'] = needed_techs + d[location]['num_movements']
        available_techs = d[location]['num_techs'] - d[location]['needed_techs']
        d[location]['available_techs'] = available_techs
        
        # If location was already flagged for hitting patient:tech threshold
        if d[location]['flag'] == 1:
            
            # If location of current patient check-in log exceeds threshold
            if d[location]['num_techs'] < d[location]['needed_techs']:
                print(f"{checkin_time} - {location.capitalize()} Clinic needs a technician")
                
                # Get technician availability from nearby clinics, sorted by distance
                available = []
                for clinic in clinics_df.loc[location, 'nearby_clinics']:
                    avail_techs = d[clinic[0]]['available_techs']
                    if avail_techs > 0:
                        available.append(clinic)
                print("- Availability: ", [i[0].capitalize() for i in available])                  
                
                # If there is an available technician
                while len(available) > 0:

                    pull_from = available[0][0]
                    num_avail = d[pull_from]["available_techs"]

                    # If pull_from location has only 1 available tech, conduct extra assessment if taking away that tech is feasible
                    if num_avail == 1:
                        df = df[['denver', 'edgewater', 'wheatridge', 'rino', 'lakewood', 'weekend', 'hour']].copy()
                        needed_num_techs_pred = rf.predict(df[df.index == indx])[0]
                        print(f'- {pull_from.capitalize()} only has 1 technician available \n  Deploy ML model to assess if transfer is feasible:')
                        print(f"    - Predicted amount needed = {int(needed_num_techs_pred)} | Current amount needed = {d[pull_from]['needed_techs']}")

                        # If the 1 available tech should remain at the pull_from location, move on to the next available location to pull from
                        if needed_num_techs_pred > d[pull_from]['needed_techs']:
                            print(f"        - ML model recommends no transfer from {pull_from.capitalize()}")
                            available.pop(0)
                            if len(available) == 0:
                                break
                            else:
                                pull_from = available[0][0]
                                print(f'        - Assess if pulling from next nearest clinic at {pull_from.capitalize()} is feasible.')
                                
                        # If only 1 available and we can pull from there
                        else: 
                            print(f'- Model anticipates {pull_from.capitalize()} clinic to become less busy; feasible to pull from this location.')
                            # Look around that time - what was the average rolling count in that range. Compare to our current rollign count. Above 3 dont move, below 3 move
                            print(f"- Pull technician from nearest clinic: {pull_from.capitalize()}, {num_avail} available")

                            departure_time = datetime.strptime(str(checkin_time), '%H:%M:%S') + timedelta(minutes=2, seconds=random.choice([i for i in range(60)]))

                            print(f'- Technician from {pull_from.capitalize()} left at {departure_time.time()}')

                            # Update tracker dict by subtracting 1 tech from pull_from 
                            d[pull_from]['num_movements'] = -1
                            # Update tracker dict by adding 1 to location
                            d[location]['num_movements'] = 1
                            
                            travel_time = int(clinics_df.loc[pull_from, 'to_'+location]) + random.choice([0, 2, 3, 4, 5, 6])
                            arrival_time = departure_time + timedelta(minutes=int(travel_time), seconds=random.choice([i for i in range(60)]))
                            print(f"- Technician from {pull_from.capitalize()} arrived at {location.capitalize()} at {arrival_time.time()}")
                            print(f'- {location.capitalize()}: before count = {num_techs} | after count = {num_techs + d[location]["num_movements"]}')
                            print(f'- {pull_from.capitalize()}: before count = {d[pull_from]["num_techs"]} | after count = {d[pull_from]["num_techs"] + d[pull_from]["num_movements"]}')
                            print()
                            movements.append((pull_from, location))
                            d[location]['flag'] = 0
                            break

                    ### IF NUM AVAILABLE > 1
                    # Look around that time - what was the average rolling count in that range. Compare to our current rollign count. Above 3 dont move, below 3 move
                    print(f"- Pull technician from nearest clinic: {pull_from.capitalize()}, {num_avail} available")
                    departure_time = datetime.strptime(str(checkin_time), '%H:%M:%S') + timedelta(minutes=2, seconds=random.choice([i for i in range(60)]))
                    print(f'- Technician from {pull_from.capitalize()} left at {departure_time.time()}')
                
                    # Update tracker dict by subtracting 1 tech from pull_from 
                    d[pull_from]['num_movements'] = -1
                    # Update tracker dict by adding 1 to location
                    d[location]['num_movements'] = 1
                    
                    travel_time = int(clinics_df.loc[pull_from, 'to_'+location]) + random.choice([0, 2, 3, 4, 5, 6])
                    arrival_time = departure_time + timedelta(minutes=int(travel_time), seconds=random.choice([i for i in range(60)]))
                    print(f"- Technician from {pull_from.capitalize()} arrived at {location.capitalize()} at {arrival_time.time()}")
                    print(f'- {location.capitalize()}: before count = {num_techs} | after count = {num_techs + d[location]["num_movements"]}')
                    print(f'- {pull_from.capitalize()}: before count = {d[pull_from]["num_techs"]} | after count = {d[pull_from]["num_techs"] + d[pull_from]["num_movements"]}') 
                    print()
                    movements.append((pull_from, location))
                    d[location]['flag'] = 0
                    break

                else:
                    print('No movement at this time; all clinics are busy.')   ###  THESE INSTANCES ARE IMPORTANT TO TRACK TO DETERMINE ANY HIRING NEEDS

        else:
            # If location is not yet flagged, flag the first threshold hit
            if d[location]['num_techs'] == d[location]['needed_techs']:
                d[location]['flag'] = 1
  
TecNav(new_patients_df)

08:58:16 - Rino Clinic needs a technician
- Availability:  ['Denver']
- Pull technician from nearest clinic: Denver, 2 available
- Technician from Denver left at 09:01:07
- Technician from Denver arrived at Rino at 09:14:41
- Rino: before count = 3 | after count = 4
- Denver: before count = 4 | after count = 3

12:38:58 - Edgewater Clinic needs a technician
- Availability:  ['Wheatridge', 'Rino', 'Denver']
- Wheatridge only has 1 technician available 
  Deploy ML model to assess if transfer is feasible:
    - Predicted amount needed = 2 | Current amount needed = 2
        - ML model recommends no transfer from Wheatridge
        - Assess if pulling from next nearest clinic at Rino is feasible.
- Pull technician from nearest clinic: Rino, 1 available
- Technician from Rino left at 12:41:21
- Technician from Rino arrived at Edgewater at 13:01:09
- Edgewater: before count = 2 | after count = 3
- Rino: before count = 4 | after count = 3

12:50:08 - Rino Clinic needs a technician
- Availabi

In [25]:
new_patients_df['current_num_techs'] = currentNumTechs

In [26]:
new_patients_df['visit_date'] = new_patients_df['visit_date'].astype(str)
new_patients_df.loc[4020227,'visit_date']

'2022-05-01'

In [27]:
fig = go.Figure()
df = new_patients_df[['checkin_time', 'current_num_techs', 'needed_num_techs', 'visit_date', 'visit_location']].copy()
display(df)
df['checkin_time'] = df.checkin_time.astype('str').apply(lambda x: x[:-3])

# Create subset data for desired date and the two locations to compare
df = df[(df.visit_date == '2022-05-01') & (df.visit_location == 'denver')]

# Construct lineplot for tech-count by location & time
line1 = go.Scatter(
    x=df['checkin_time'], y=df['current_num_techs'], 
    name=f'Current Number of Techs @ Denver clinic', marker_color='blue', mode='lines'
)
line2 = go.Scatter(
    x=df['checkin_time'], y=df['needed_num_techs'], 
    name=f'Needed Number of Techs @ Denver clinic', marker_color='green', mode='lines'
)

# Add subplots to figure object
fig.add_trace(line1)
fig.add_trace(line2)
# Output figure with custom modifications
fig.update_traces(opacity=0.3)
fig.update_layout(height=600, width=1200, showlegend=True, legend_tracegroupgap=250)
fig.show()

Unnamed: 0_level_0,checkin_time,current_num_techs,needed_num_techs,visit_date,visit_location
pt_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4020227,08:02:46,3,1,2022-05-01,rino
4020228,08:03:44,3,1,2022-05-01,rino
4020229,08:07:53,3,1,2022-05-01,rino
4020232,08:08:58,3,2,2022-05-01,rino
4020230,08:15:17,3,2,2022-05-01,rino
...,...,...,...,...,...
3023134,19:47:32,4,2,2022-05-31,wheatridge
2019594,19:48:01,3,3,2022-05-31,edgewater
5025139,19:50:05,3,1,2022-05-31,lakewood
4021961,19:53:55,4,4,2022-05-31,rino


In [28]:
locations = new_patients_df.visit_location.unique().tolist()

# fig = go.Figure()
    
fig = make_subplots(
rows=5, cols=1, 
specs=[[{'type':'xy'}], [{'type':'xy'}], [{'type':'xy'}], [{'type':'xy'}], [{'type':'xy'}]], 
subplot_titles=["Denver", "Edgewater", "Wheatridge", "RINO", "Lakewood"]
)

# Plot for each location
for i in range(5):
    
    location = locations[i]
    
    # Convert specific times to grouped versions
    df = new_patients_df.copy()
    df['checkin_time'] = df.checkin_time.astype('str').apply(lambda x: x[:-3])

    # Create subset data for desired date and the two locations to compare
    df = df[(df.visit_date == '2021-05-01') & (df.visit_location == location)]

    # Construct lineplot for tech-count by location & time
    line1 = go.Scatter(
        x=df['checkin_time'], y=df['current_num_techs'], 
        name=f'Current Number of Techs @ {location.capitalize()} clinic', marker_color='red', legendgroup=i+1, mode='markers'
    )
    line2 = go.Scatter(
        x=df['checkin_time'], y=df['needed_num_techs'], 
        name=f'Needed Number of Techs @ {location.capitalize()} clinic', marker_color='green', legendgroup=i+1, mode='lines'
    )

    # Add subplots to figure object
    fig.add_trace(line1, row=i+1, col=1)
    fig.add_trace(line2, row=i+1, col=1)

# Output figure with custom modifications
fig.update_traces(opacity=0.3)
fig.update_layout(height=1500, width=1200, showlegend=True, legend_tracegroupgap=250)
fig.show()

In [29]:
# Create iterable objects that are desired for toggle menus
locations = new_patients_df.visit_location.unique().tolist()
dates = new_patients_df.visit_date.unique().tolist()
# fig = go.FigureWidget()

@interact(date=dates)
def generate_staff_count_visual(date=dates[0]):
    """Produces rolling staff-count needed that can be toggled through for specific locations & dates."""
    
    fig = go.FigureWidget()
    
    fig = make_subplots(
    rows=5, cols=1, 
    specs=[[{'type':'xy'}], [{'type':'xy'}], [{'type':'xy'}], [{'type':'xy'}], [{'type':'xy'}]], 
    subplot_titles=["Denver", "Edgewater", "Wheatridge", "RINO", "Lakewood"]
    )
    
    # Plot for each location
    for i in range(5):
        
        location = locations[i]
        
        # Convert specific times to grouped versions
        df = new_patients_df.copy()
        df['checkin_time'] = df.checkin_time.astype('str').apply(lambda x: x[:-3])

        # Create subset data for desired date and the two locations to compare
        df = df[(df.visit_date == date) & (df.visit_location == location)]

        # Construct lineplot for tech-count by location & time
        line1 = go.Scatter(
            x=df['checkin_time'], y=df['current_num_techs'], 
            name=f'Current Number of Techs @ {location.capitalize()} clinic', marker_color='red', legendgroup=i+1, mode='markers'
        )
        line2 = go.Scatter(
            x=df['checkin_time'], y=df['needed_num_techs'], 
            name=f'Needed Number of Techs @ {location.capitalize()} clinic', marker_color='green', legendgroup=i+1, mode='lines'
        )

        # Add subplots to figure object
        fig.add_trace(line1, row=i+1, col=1)
        fig.add_trace(line2, row=i+1, col=1)

    # Output figure with custom modifications
    fig.update_traces(opacity=0.3)
    fig.update_layout(height=1500, width=1200, showlegend=True, legend_tracegroupgap=250)
    fig.show()

interactive(children=(Dropdown(description='date', options=('2022-05-01', '2022-05-02', '2022-05-03', '2022-05…

In [30]:
clinic_distances = {
    ('denver', 'rino'): 2, 
    ('wheatridge', 'edgewater'): 2,
    ('rino', 'denver'): 1.9, 
    ('edgewater', 'wheatridge'): 2,
    ('wheatridge', 'lakewood'): 4.8, 
    ('rino', 'wheatridge'): 7.5,
    ('edgewater', 'denver'): 5,  
    ('lakewood', 'wheatridge'): 12,
    ('wheatridge', 'denver'): 6.3,  
    ('edgewater', 'lakewood'): 4.4,
    ('denver', 'lakewood'): 8,  
    ('lakewood', 'edgewater'): 4.3,
    ('wheatridge', 'rino'): 7.8,  
    ('rino', 'lakewood'): 10.7,
    ('denver', 'edgewater'): 5.1,  
    ('lakewood', 'rino'): 11,
    ('rino', 'edgewater'): 7.8,  
    ('edgewater', 'rino'): 7.7,
    ('lakewood', 'denver'): 7.5,   
    ('denver', 'wheatridge'): 6.3
}
print('Number of movements total (May 2022):', len(movements))
distances = pd.DataFrame(pd.Series(movements).value_counts()).reset_index(drop=False).rename(columns={'index': 'routes', 0: 'count'})
distances['dist'] = distances.routes.map(clinic_distances)
distances['cum_route_dist'] = distances[['count', 'dist']].apply(lambda x: x[0] * x[1], axis=1)
total_distance = round(distances.cum_route_dist.sum())
print(total_distance)
display(distances)

Number of movements total (May 2022): 321
1581


Unnamed: 0,routes,count,dist,cum_route_dist
0,"(denver, rino)",37,2.0,74.0
1,"(wheatridge, edgewater)",33,2.0,66.0
2,"(rino, denver)",31,1.9,58.9
3,"(edgewater, wheatridge)",28,2.0,56.0
4,"(wheatridge, lakewood)",25,4.8,120.0
5,"(edgewater, denver)",21,5.0,105.0
6,"(lakewood, wheatridge)",21,12.0,252.0
7,"(edgewater, lakewood)",19,4.4,83.6
8,"(lakewood, edgewater)",18,4.3,77.4
9,"(rino, wheatridge)",18,7.5,135.0


In [31]:
avg_gas_price =  4.04  # avg gas price in Denver
avg_mpg = 24.2  # US Dept of Energy

print('Gas money reimbursements to technicians for navigation moves (for May 2022): ', round(total_distance / avg_mpg * avg_gas_price, 2))

Gas money reimbursements to technicians for navigation moves (for May 2022):  263.94


For these 5 clinics in the region, you can schedule 5 technicians less per day and still maintain productivity through TecNav. With an average wage of $21/hr for technicians, this would equate to $91980 saved in compensation over an entire year ($7665/month). Note: These savings only account for the wage compensation and not savings on any additional employee benefits. There were 357 amount of moves that occured through TecNav in a single month--this would lead to an estimated $263.94 of gas compensation per month, equating to about $9100 per year. So accounting for this, the urgent care chain would save about $88812.72 for an entire year. This money could be instead invested in further advanced diagnostic tools & imaging equipment that could help the client offer a more comprehensive set of health services or help minimize medical costs for the patients--both of which align with the overall mission of the client's urgent care chain. 