## Import packages

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
%matplotlib notebook

## Read Data from csv files

In [2]:
bill_amt = pd.read_csv('bill_amount.csv')
bill_id = pd.read_csv('bill_id.csv')
clinical_data = pd.read_csv('clinical_data.csv')
clinical_data.rename(columns = {'id':'patient_id'},inplace=True)
clinical_data.replace(['Yes','No'],[1,0],inplace=True)

demo_data = pd.read_csv('demographics.csv')
demo_data.replace(['f','m'],['Female','Male'],inplace=True)
demo_data.replace(['India','chinese'],['Indian','Chinese'],inplace=True)

## Join Dataframes 

In [3]:
df = pd.merge(bill_amt, bill_id, how = 'right', left_on = 'bill_id', right_on = 'bill_id')
df = df.groupby(['patient_id','date_of_admission']).agg({'amount':np.sum}).reset_index()
df1 = pd.merge(clinical_data, demo_data, how = 'left', left_on = ['patient_id'], right_on = ['patient_id'])
df2 = pd.merge(df,df1, how = 'right', left_on = ['patient_id', 'date_of_admission'], right_on = ['patient_id','date_of_admission'])

## Convert date columns to DateTime format and handle missing data values

In [4]:
df2['date_of_discharge'] = pd.to_datetime(df2['date_of_discharge'])
df2['date_of_admission'] = pd.to_datetime(df2['date_of_admission'])
df2['date_of_birth'] = pd.to_datetime(df2['date_of_birth'])
df2['days_in_hospital'] = pd.to_datetime(df2['date_of_discharge']).sub(pd.to_datetime(df2['date_of_admission']),axis = 0).dt.days
df2.fillna(0, inplace=True)

In [101]:
med_hist = (['medical_history_1', 'medical_history_2', 'medical_history_3',
             'medical_history_4', 'medical_history_5', 'medical_history_6',
             'medical_history_7'])
no_patient_mh = []
avg_bill_amt = []
avg_no_days_spent = []
for i,hist in enumerate(med_hist):
    print(i,hist)
    patient_mh1 = df2[df2[hist] == 1]
    no_patient = len(patient_mh1['patient_id'])
    avg_bill = np.sum(patient_mh1['amount'])/no_patient
    avg_no_days = np.sum(patient_mh1['days_in_hospital'])/no_patient
    no_patient_mh.append(no_patient)
    avg_bill_amt.append(avg_bill)
    avg_no_days_spent.append(avg_no_days)

0 medical_history_1
1 medical_history_2
2 medical_history_3
3 medical_history_4
4 medical_history_5
5 medical_history_6
6 medical_history_7


In [102]:
#y_pos = np.arange(len(med_hist))
#figure1 = plt.figure()
#plt.bar(y_pos,avg_bill_amt, align='center', alpha=0.5)
#plt.xticks(y_pos,list(range(1,8)))
#plt.ylabel('Average number of days spent per patient')
#plt.xlabel('Type of medical history')
#plt.title('Cost as a factor of Average number of days per patient spent in hospital for type of medical history') 


#figure2 = plt.figure()
#plt.bar(y_pos,no_patient_mh, align='center', alpha=0.5)
#plt.xticks(y_pos,list(range(1,8)))
##plt.ylabel('Total number of Patients')
#plt.xlabel('Type of medical history')
#plt.title('Number of Patients admitted for type of medical historyCost as a factor of Average number of days per patient spent in hospital for type of medical history') 
#plt.show()

In [103]:

x_labels=['med_hist1', 'med_hist2', 'med_hist3','med_hist4','med_hist5','med_hist6','med_hist7']

fig = plt.figure(figsize=(10,6)) # Create matplotlib figure
width = 0.3
ax1 = fig.add_subplot(111)
ax1.bar(y_pos ,no_patient_mh,color='b',width = width, align='center')
ax1.set_ylabel('Total number of patients',color='b')
ax1.set_xlabel('Type of medical history')
ax1.set_xticks(y_pos+width)
ax1.set_xticklabels(x_labels,)
ax2 = ax1.twinx()
ax2.bar(y_pos+width,avg_bill_amt,color='r',width = width, align='center')
ax2.set_ylabel('Average cost per patient', color='r')
for tl in ax1.get_yticklabels():
    tl.set_color('b')
for tl in ax2.get_yticklabels():
    tl.set_color('r')

#plt.savefig('images/two-scales-5.png')


<IPython.core.display.Javascript object>

## Create Age column from DOB data and create age groups 


In [65]:
age_group = ['24-34','35-44','45-54','55-64','>65']
def age_group_fun(dl):
    if dl > 64: return 'age_group5'
    elif 54 < dl <= 64: return 'age_group4'
    elif 44 < dl <= 54: return 'age_group3'
    elif 34 < dl <= 44: return 'age_group2'
    elif 24 < dl <= 34: return 'age_group1'
    else: return 'None'
df2['age'] = (pd.to_datetime('today').year)-(df2['date_of_birth'].dt.year)
df2['age_group'] = df2['age'].map(age_group_fun)

## Age of the patients as the driver of the cost

In [95]:
no_pat_group = df2.groupby(['age_group']).size()
total_cost_agegrp = df2.groupby(['age_group']).agg({'amount':np.sum})
avg_cost = np.divide(total_cost_agegrp.amount,no_pat_group)

In [100]:
figure2 = plt.figure()
plt.bar(np.arange(len(age_group)),avg_cost, align='center', alpha=0.5)
plt.xticks(np.arange(len(age_group)),age_group)
plt.ylabel('Average cost per patient')
plt.xlabel('Age group of the patients')
plt.title('Age of the patients as the driver of the cost of care')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x7fb01da401d0>

## Preop Medication as the driver of the cost

In [104]:
preop_med = (['preop_medication_1', 'preop_medication_2',
              'preop_medication_3', 'preop_medication_4', 'preop_medication_5',
              'preop_medication_6'])
no_patient_med = []
avg_bill_med = []
for i,hist in enumerate(preop_med):
    #print(i,hist)
    patient_mh1 = df2[df2[hist] == 1]
    no_patient = len(patient_mh1['patient_id'])
    avg_bill = np.sum(patient_mh1['amount'])/no_patient
    #avg_no_days = np.sum(patient_mh1['days_in_hospital'])/no_patient
    no_patient_med.append(no_patient)
    avg_bill_med.append(avg_bill)

0 preop_medication_1
1 preop_medication_2
2 preop_medication_3
3 preop_medication_4
4 preop_medication_5
5 preop_medication_6


In [106]:
x_labels=['med_1', 'med_2', 'med_3','med_4','med_5','med_6']

fig = plt.figure(figsize=(10,6)) # Create matplotlib figure
width = 0.3
ax1 = fig.add_subplot(111)
ax1.bar(np.arange(len(x_labels)) ,no_patient_med,color='b',width = width, align='center')
ax1.set_ylabel('Total number of patients',color='b')
ax1.set_xlabel('Type of medical history')
ax1.set_xticks(np.arange(len(x_labels))+width)
ax1.set_xticklabels(x_labels)
ax2 = ax1.twinx()
ax2.bar(np.arange(len(x_labels))+width,avg_bill_med,color='r',width = width, align='center')
ax2.set_ylabel('Average cost per patient', color='r')
for tl in ax1.get_yticklabels():
    tl.set_color('b')
for tl in ax2.get_yticklabels():
    tl.set_color('r')


<IPython.core.display.Javascript object>

In [114]:
gender = ['Male', 'Female']
no_pat_gender = []
avg_bill_gender = []
for i,hist in enumerate(gender):
    #print(i,hist)
    patient_mh1 = df2[df2['gender'] == hist]
    no_patient = len(patient_mh1['patient_id'])
    avg_bill = np.sum(patient_mh1['amount'])/no_patient
    #avg_no_days = np.sum(patient_mh1['days_in_hospital'])/no_patient
    no_pat_gender.append(no_patient)
    avg_bill_gender.append(avg_bill)

0 Male
1 Female


In [135]:
labels = gender
size_patient_gender =  (no_pat_gender/np.sum(no_pat_gender))*360
size_bill_gender = (avg_bill_gender/np.sum(avg_bill_gender))*360
explode = (0, 0.1)  # "explode" 

fig4, axes = plt.subplots(nrows=1, ncols=2)
#fig(figsize = (12,8))
ax0, ax1 = axes.flatten()
ax0.pie(size_patient_gender, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True)
ax0.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
#ax0.title('Ratio of female and male patients admitted in hospital')
ax1.pie(size_bill_gender, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
#ax1.title('Raining Hogs and Dogs', bbox={'facecolor':'0.8', 'pad':5})

plt.show()

<IPython.core.display.Javascript object>

In [126]:
race = ['Chinese', 'Indian', 'Malay', 'Others']
no_pat_race = []
avg_bill_race = []
for i,hist in enumerate(race):
    #print(i,hist)
    patient_mh1 = df2[df2['race'] == hist]
    no_patient = len(patient_mh1['patient_id'])
    avg_bill = np.sum(patient_mh1['amount'])/no_patient
    #avg_no_days = np.sum(patient_mh1['days_in_hospital'])/no_patient
    no_pat_race.append(no_patient)
    avg_bill_race.append(avg_bill)

0 Chinese
1 Indian
2 Malay
3 Others


In [128]:

# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = race
sizes = (avg_bill_race/np.sum(avg_bill_gender))*360
explode = (0, 0,0.1,0)  # only "explode" the 2nd slice (i.e. 'Hogs')

fig4, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()

<IPython.core.display.Javascript object>