In [2]:
### import libraries
import streamlit as st
import numpy as np
import pandas as pd
import pickle
from datetime import datetime
import plotly_express as px
from titlecase import titlecase
from sklearn.preprocessing import LabelEncoder

In [3]:
df = pd.read_csv("CHLA_clean_data_2024_Appointments.csv")
df['APPT_DATE'] = pd.to_datetime(df['APPT_DATE'])

In [4]:
df.columns

Index(['MRN', 'APPT_STATUS', 'APPT_DATE', 'BOOK_DATE', 'ZIPCODE', 'CLINIC',
       'SCHEDULE_ID', 'APPT_ID', 'LEAD_TIME', 'IS_REPEAT',
       'APPT_TYPE_STANDARDIZE', 'APPT_NUM', 'TOTAL_NUMBER_OF_CANCELLATIONS',
       'TOTAL_NUMBER_OF_RESCHEDULED',
       'TOTAL_NUMBER_OF_NOT_CHECKOUT_APPOINTMENT',
       'TOTAL_NUMBER_OF_SUCCESS_APPOINTMENT', 'TOTAL_NUMBER_OF_NOSHOW',
       'DAY_OF_WEEK', 'WEEK_OF_MONTH', 'NUM_OF_MONTH', 'HOUR_OF_DAY',
       'IS_NOSHOW', 'AGE', 'ETHNICITY_STANDARDIZE', 'RACE_STANDARDIZE'],
      dtype='object')

In [5]:
mask = (df['APPT_DATE'] >= '2024-01-02 15:00:00') & (df['APPT_DATE'] <= '2024-05-08 15:30:00')
df = df[mask]
df = df[df['CLINIC'] == 'ARCADIA CARE CENTER']

In [6]:
len(df)

9

In [7]:
fdf = df[[
    'MRN',
    'APPT_DATE',
    'AGE',
    'CLINIC',
    'TOTAL_NUMBER_OF_CANCELLATIONS',
    'LEAD_TIME',
    'TOTAL_NUMBER_OF_RESCHEDULED',
    'TOTAL_NUMBER_OF_NOSHOW',
    'TOTAL_NUMBER_OF_SUCCESS_APPOINTMENT',
    'HOUR_OF_DAY',
    'NUM_OF_MONTH'
]]

### slice predictive df
pdf = fdf.drop(['MRN', 'APPT_DATE'], axis=1)

In [8]:
print(len(fdf))
print(len(pdf))

9
9


In [9]:
### label encoding
le = LabelEncoder()
object_cols = ['CLINIC']
for col in object_cols:
    pdf[col] = le.fit_transform(pdf[col])

In [10]:
print(len(fdf))
print(len(pdf))

9
9


In [11]:
### load and run the predictor model
model = pickle.load(open('random_forest_model.pkl', 'rb'))
predictions = model.predict(pdf)

In [12]:
len(predictions)

9

In [13]:
predictions_series = pd.DataFrame(pd.Series(predictions))

In [14]:
column_renaming = {0: 'new_column_name'}
predictions_series = predictions_series.rename(columns=column_renaming)

In [15]:
predictions_series

Unnamed: 0,new_column_name
0,0
1,0
2,0
3,0
4,0
5,0
6,0
7,0
8,0


In [16]:
predictions_series.shape

(9, 1)

In [19]:
fdf = fdf.reset_index(drop=True)

In [20]:
print('final_df', len(fdf))
print('final_df', len(predictions_series))

df.reset_index(drop=True)

final_df = pd.concat([fdf, predictions_series], axis=1)
print('final_df', len(final_df))
final_df.columns = [*final_df.columns[:-1], 'NO-SHOW (Y/N)']
final_df = final_df[['MRN', 'APPT_DATE', 'CLINIC', 'NO-SHOW (Y/N)']]
print('final_df', len(final_df))

final_df 9
final_df 9
final_df 9
final_df 9


In [17]:
no_show_mapping = {0: 'No', 1: 'Yes'}
final_df['NO-SHOW (Y/N)'] = final_df['NO-SHOW (Y/N)'].replace(no_show_mapping)
final_df = final_df.sort_values(by='CLINIC')
final_df

Unnamed: 0,MRN,APPT_DATE,CLINIC,NO-SHOW (Y/N)
0,1873606.0,2024-05-08 15:30:00,ARCADIA CARE CENTER,No
1,4133045.0,2024-05-07 09:00:00,ARCADIA CARE CENTER,No
2,4462967.0,2024-04-16 11:00:00,ARCADIA CARE CENTER,No
4,4457024.0,2024-04-02 12:00:00,ARCADIA CARE CENTER,No
10,4006420.0,2024-02-06 09:30:00,ARCADIA CARE CENTER,
15,4279873.0,2024-01-17 16:00:00,ARCADIA CARE CENTER,
16,4168621.0,2024-01-16 10:00:00,ARCADIA CARE CENTER,
17,4530370.0,2024-01-16 09:30:00,ARCADIA CARE CENTER,
18,1823388.0,2024-01-10 11:30:00,ARCADIA CARE CENTER,
3,,NaT,,No


In [18]:
final_df

Unnamed: 0,MRN,APPT_DATE,CLINIC,NO-SHOW (Y/N)
0,1873606.0,2024-05-08 15:30:00,ARCADIA CARE CENTER,No
1,4133045.0,2024-05-07 09:00:00,ARCADIA CARE CENTER,No
2,4462967.0,2024-04-16 11:00:00,ARCADIA CARE CENTER,No
4,4457024.0,2024-04-02 12:00:00,ARCADIA CARE CENTER,No
10,4006420.0,2024-02-06 09:30:00,ARCADIA CARE CENTER,
15,4279873.0,2024-01-17 16:00:00,ARCADIA CARE CENTER,
16,4168621.0,2024-01-16 10:00:00,ARCADIA CARE CENTER,
17,4530370.0,2024-01-16 09:30:00,ARCADIA CARE CENTER,
18,1823388.0,2024-01-10 11:30:00,ARCADIA CARE CENTER,
3,,NaT,,No
