In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.style.available

In [None]:
# sns.heatmap(correlation_matrix,
#             annot=True,
#             linewidths=0.4,
#             annot_kws={"size": 10})

### import data

In [None]:
appointments = pd.read_csv('Appointments.csv')

In [None]:
calls = pd.read_csv('CallsRingCentral.csv')

In [None]:
reason_for_visit = pd.read_csv('MeetingReasonForVisits.csv')

In [None]:
meeting_status = pd.read_csv('MeetingStatus.csv')

In [None]:
offices = pd.read_csv('Offices.csv')

In [None]:
providers_schedules = pd.read_csv('ProvidersSchedulesLastest.csv')

### explore data

In [None]:
appointments.info()

In [None]:
appointments.head().T

In [None]:
reason_for_visit.head()

In [None]:
reason_for_visit.info()

In [None]:
len(reason_for_visit['Name'].unique())

In [None]:
meeting_status.head()

In [None]:
meeting_status.info()

In [None]:
offices.head()

In [None]:
offices.info()

In [None]:
providers_schedules.head()

In [None]:
providers_schedules.info()

In [None]:
calls.head().T

In [None]:
calls.info()

### combine/merge dataframes

In [None]:
merged1 = pd.merge(left=appointments, right=reason_for_visit, how='left', left_on='MeetingReasonForVisitId',\
                  right_on='Id')

In [None]:
appointments.shape, reason_for_visit.shape, merged1.shape

In [None]:
merged1 = merged1.rename(columns={'MeetingReasonForVisitId': 'ReasonForVisitId', 'Name':'ReasonForVisitName', 'Description':'ReasonForVisitDescription'})

In [None]:
merged1.drop('Id', axis=1, inplace=True)

In [None]:
# merged1.head().T

In [None]:
# merge in office name from offices df
merged1 = pd.merge(left=merged1, right=offices, how='left', left_on='OfficeId', right_on='id')

In [None]:
merged1 = merged1.rename(columns={'Name':'OfficeName', 'id_x':'id'})

In [None]:
merged1.drop('id_y', axis=1, inplace=True)

In [None]:
merged1 = pd.merge(left=merged1, right=meeting_status, how='left', left_on='MeetingStatusId', right_on='Id')

In [None]:
merged1 = merged1.rename(columns={'Name':'MeetingStatusName', 'Description':'MeetingStatusDescription'})

In [None]:
merged1.drop('Id', axis=1, inplace=True)

In [None]:
merged1.info()

In [None]:
merged1.head().T

In [None]:
# rearrange column order to group releveant columns together
merged1.columns

In [None]:
ordered_columns = ['id', 'Patient', 'PatientAgeMeetingDate', 'PatientGender',
       'PatientState', 'PatientCity', 'PatientInsurance', 'Provider',
       'Specialty', 'AppointmentDate', 'AppointmentCreated', 'AppointmentDuration', 'ReasonForVisitId', 'ReasonForVisitName',
       'ReasonForVisitDescription','MeetingStatusId', 'MeetingStatusName',
       'MeetingStatusDescription', 'OfficeId',  'OfficeName', 'CreatedBy']

In [None]:
merged1 = merged1[ordered_columns]

In [None]:
# remove time component from AppointmentDate and AppointmentCreated columns

In [None]:
merged1['AppointmentCreated'] = merged1['AppointmentCreated'].str.slice(start=0, stop=10)

In [None]:
merged1['AppointmentDate'] = merged1['AppointmentDate'].str.slice(start=0, stop=10)

In [None]:
# merged1.head().T

In [None]:
# pd.value_counts(merged1['PatientState'])

In [None]:
# pd.value_counts(merged1['PatientGender'])

In [None]:
# pd.value_counts(merged1['PatientCity'])

In [None]:
# pd.value_counts(merged1['MeetingStatusName'])

In [None]:
pd.value_counts(merged1['AppointmentDuration'])

In [None]:
pd.value_counts(merged1['Specialty'])

In [None]:
# convert date columns to datetime 
merged1['AppointmentDate'] = pd.to_datetime(merged1['AppointmentDate']).apply(lambda x: x.date()) #,format='%Y-%m-%d')

In [None]:
merged1['AppointmentCreated'] = pd.to_datetime(merged1['AppointmentCreated']).apply(lambda x: x.date()) #, format='%Y-%m-%d')

In [None]:
# calculate time between AppointmentCreated and AppointmentDate
merged1['DaysFromAppointmentCreatedToVisit'] = (merged1['AppointmentDate'] - merged1['AppointmentCreated']).dt.days

In [None]:
merged1.head().T

In [None]:
merged1.info()

In [None]:
merged1['Specialty'].isnull()
merged1.isnull().sum()

In [None]:
# pd.value_counts(merged1['DaysFromAppointmentCreatedToVisit'])

In [None]:
merged1[merged1['DaysFromAppointmentCreatedToVisit'] < 0][['DaysFromAppointmentCreatedToVisit', 'AppointmentCreated', 'AppointmentDate']]

In [None]:
merged1['AppointmentDate'] = pd.to_datetime(merged1['AppointmentDate'])
merged1['AppointmentCreated'] = pd.to_datetime(merged1['AppointmentCreated'])

In [None]:
merged1.dtypes

In [None]:
merged1.columns

In [None]:
pd.value_counts(merged1['Specialty'])

In [None]:
medical = merged1[merged1['Specialty'] == 'Medical']

In [None]:
pd.value_counts(medical['ReasonForVisitName'])

In [None]:
medical1 = medical.set_index('AppointmentDate')

In [None]:
# medical1.groupby('ReasonForVisitName').agg({'ReasonForVisitName': 'count'})

In [None]:
# medical1.groupby('ReasonForVisitId').agg({'ReasonForVisitId': 'count'})

In [None]:
merged2 = merged1.set_index('AppointmentDate')

In [None]:
merged_index_month = merged2.index.month

In [None]:
merged_index_year = merged2.index.year

In [None]:
merged2.index.date

In [None]:
specialty_counts = merged2.groupby([merged_index_year, merged_index_month, 'Specialty'])['Specialty'].count()

In [None]:
specialty_counts = merged2.groupby([merged_index_year, merged_index_month, 'Specialty'])['Specialty'].count()

In [None]:
specialty_counts

In [None]:
type(specialty_counts)

In [None]:
specialty = merged2[['Specialty']].dropna(axis=0)

In [None]:
specialty.isnull().sum()

In [None]:
a_index_year = specialty.index.year
a_index_month = specialty.index.month

In [None]:
a_index_month

In [None]:
specialty_df = pd.DataFrame(specialty.groupby([a_index_year, a_index_month, 'Specialty'])['Specialty'].count().unstack('Specialty'))

In [None]:
specialty_df

In [None]:
specialty_bfilled = specialty_df.fillna(method='bfill')

In [None]:
cols = list(specialty_df.columns)
means = specialty_df[cols].mean()
means.index

In [None]:
# fill missing values with column mean
for idx in means.index:
    specialty_df[idx].fillna(means[idx], inplace=True)

In [None]:
specialty_df

In [None]:
# using bfill to fill NaN values
ax5 = specialty_bfilled.plot(figsize=(17,14), colormap='Dark2')
ax5.set_title('Number of Appointments per Month by Specialty', fontsize=30)
ax5.set_xlabel('Date', fontsize=25)
ax5.set_ylabel('Number of Appointments', fontsize=25)
ax5.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=20)
plt.show()

In [None]:
# if NaNs are filled by column mean
ax4 = specialty_df.plot(figsize=(17,14), colormap='Dark2')
ax4.set_title('Number of Appointments per Month by Specialty', fontsize=30)
ax4.set_xlabel('Date', fontsize=25)
ax4.set_ylabel('Number of Appointments', fontsize=25)
ax4.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=20)
plt.show()

In [None]:
# plot number of appoints by specialty for each month, adding multiple years
ax3 = specialty.groupby([a_index_year, a_index_month, 'Specialty'])['Specialty'].count().unstack('Specialty').plot(figsize=(17,15),\
                                                    colormap='Dark2')
ax3.set_ylim(0, 1200)
ax3.set_xlim(0,28)
ax3.set_title('Number of Appointments per Month by Specialty', fontsize=30)
ax3.set_xlabel('Date', fontsize=25)
ax3.set_ylabel('Number of Appointments', fontsize=25)
ax3.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=20)
plt.show()

In [None]:
# plot number of appoints by specialty for each month, adding multiple years
ax2 = specialty.groupby([a_index_year, a_index_month, 'Specialty'])['Specialty'].count().unstack('Specialty').plot.area(figsize=(17,15),\
                                                    colormap='Dark2')
ax2.set_ylim(0, 3000)
ax2.set_xlim(0,28)
ax2.set_title('Number of Appointments per Month by Specialty', fontsize=30)
ax2.set_xlabel('Date', fontsize=25)
ax2.set_ylabel('Number of Appointments', fontsize=25)
ax2.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=20)
plt.show()

In [None]:
# plot number of appoints by specialty for each month, adding months from multiple years together
ax = merged2.groupby([merged_index_month, 'Specialty'])['Specialty'].count().unstack().plot(figsize=(17,15),\
                                                    colormap='Dark2')
ax.set_ylim(0, 1200)
ax.set_xlim(0,13)
ax.set_title('Number of Appointments per Month by Specialty', fontsize=30)
ax.set_xlabel('Month', fontsize=25)
ax.set_ylabel('Number of Appointments', fontsize=25)
ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=20)
plt.show()

In [None]:
# plot number of appoints by specialty for each month, adding months from multiple years together
ax1 = merged2.groupby([merged_index_month, 'Specialty'])['Specialty'].count().unstack().plot.area(figsize=(20,15),\
                                                                                                colormap='Accent')
ax1.set_ylim(0, 5000)
ax1.set_xlim(1, 12)
ax1.set_title('Number of Appointments per Month by Specialty', fontsize=30)
ax1.set_xlabel('Month', fontsize=25)
ax1.set_ylabel('Number of Appointments', fontsize=25)
ax1.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=20)
plt.show()

In [None]:
ax = merged2.groupby([merged_index_month, 'Specialty'])['Specialty'].count().unstack().plot(subplots=True,\
                        colormap='Dark2', figsize=(20,15), layout=(5,2), sharex=False, sharey=False)
plt.xlabel('Month')
plt.tight_layout()
plt.show()

In [None]:
merged2.head(2).T

In [None]:
time_spent_by_month = merged2.groupby([merged_index_year, merged_index_month, 'Specialty'])['AppointmentDuration'].sum()

In [None]:
time_spent_by_month

In [None]:
merged2.groupby([merged_index_year, merged_index_month])['AppointmentDuration'].sum().plot(figsize=(10,6))
plt.xlabel('Date')
plt.ylabel('Time (min)')
plt.title('Sum of time spent for all Specialties together')
plt.show()

In [None]:
ax6 = merged2.groupby([merged_index_year, merged_index_month, 'Specialty'])['AppointmentDuration'].sum().unstack().plot(figsize=(17,15),\
                                                    colormap='Dark2', linewidth=3)
ax6.set_ylim(0, 32000)
ax6.set_xlim(0,25)
ax6.set_title('Time per Month by Specialty', fontsize=30)
ax6.set_xlabel('Date', fontsize=25)
ax6.set_ylabel('Time (minutes)', fontsize=25)
ax6.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=20)
plt.show()

In [None]:
# Area plot of time spent for each specialty
ax7 = merged2.groupby([merged_index_year, merged_index_month, 'Specialty'])['AppointmentDuration']\
.sum().unstack().plot.area(figsize=(17,15), colormap='Dark2', linewidth=3)
                                                   
ax7.set_ylim(0, 130000)
ax7.set_xlim(0,25)
ax7.set_title('Time per Month by Specialty', fontsize=30)
ax7.set_xlabel('Date', fontsize=25)
ax7.set_ylabel('Time (minutes)', fontsize=25)
ax7.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=20)
plt.show()

In [None]:
date = merged2.index
merged2.groupby([date,'Specialty'])['AppointmentDuration'].sum()

In [None]:
date = pd.to_datetime(merged2.index,format='%Y-%m-%d')

In [None]:
date
merged2['AppointmentDate'] = date

In [None]:
merged2.head().T

In [None]:
type(merged2['AppointmentDate'])

In [None]:
merged2.groupby(['AppointmentDate', 'Specialty'])['AppointmentDuration'].sum()

In [None]:
date = pd.to_datetime(merged2.index)
ax8 = merged2.groupby(['AppointmentDate', 'Specialty'])['AppointmentDuration'].sum().unstack().\
plot.area(figsize=(30,30),colormap='Dark2', linewidth=3)
                                                    
# ax8.set_ylim(0, 32000)
# ax8.set_xlim(0,25)
ax8.set_title('Time per Month by Specialty', fontsize=30)
ax8.set_xlabel('Date', fontsize=25)
ax8.set_ylabel('Time (minutes)', fontsize=25)
# ax8.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize=20)
plt.show()

In [None]:
plt.rcParams.keys()

In [None]:
params = {'legend.fontsize': 20, 'legend.handlelength': 2, 'axes.labelsize': 'medium'}
plt.rcParams.update(params)
fig = merged2.groupby(['AppointmentDate', 'Specialty'])['AppointmentDuration'].sum().unstack().plot.area(subplots=True,\
    colormap='Dark2', figsize=(20,60), layout=(10,1), sharex=True, sharey=True, linewidth=3, fontsize=20)
plt.xlabel('Date')
plt.ylabel('Time (minutes)', fontsize=20)
plt.tight_layout()
plt.show()

In [None]:
medical1['id'].groupby(medical1.index.month).count()

In [None]:
medical1.columns

In [None]:
index_month = medical1.index.month
counts = medical1['id'].groupby(index_month).count()
ax = counts.plot()
ax.set_ylim(0, 1200)
ax.set_xlim(0, 13)
ax.set_title('Number of Appointments per Month (Medical)', fontsize=15)
ax.set_xlabel('Month', fontsize=12)
ax.set_ylabel('Number of Appointments', fontsize=12)
plt.show()

In [None]:
medical1.info()

In [None]:
number_cols = ['Provider', 'ReasonForVisitId', 'MeetingStatusId', 'OfficeId', 'DaysFromAppointmentCreatedToVisit']
nummeric = merged2[number_cols]

In [None]:
nummeric.head()

In [None]:
days_until_appointment = nummeric['DaysFromAppointmentCreatedToVisit']

In [None]:
days_until_appointment.head()

In [None]:
# ax = days_until_appointment.plot(x=days_until_appointment.index, kind='hist', figsize=(12,6))
# ax.set_xlabel('Appointment Date', fontsize=16)
# # ax.set_ylabel('Count of each visit type')
# plt.show()

In [None]:
nummeric.index