In [1]:
# Importing Library 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

# Showing max columns and rows
pd.set_option("display.max_columns",None)
pd.set_option('display.max_rows',None)
pd.option_context('mode.use_inf_as_na', True)

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Importing Dataset
healthcare = pd.read_csv('E:\Project for Resume\Python Healthcare Analysis Project\Healthcare Dataset\healthcare_dataset.csv')
healthcare.head()

In [3]:
# Checking for null values
healthcare.isnull().sum()

In [4]:
# checking for Blank Values
healthcare.isna().sum()

In [5]:
healthcare.duplicated().sum()

In [6]:
healthcare.head()

In [7]:
healthcare.columns

In [8]:
healthcare.info()

In [9]:
healthcare['Date of Admission'] = pd.to_datetime(healthcare['Date of Admission'],format = '%Y-%m-%d')
healthcare.info()

In [10]:
healthcare['Discharge Date'] = pd.to_datetime(healthcare['Discharge Date'])
healthcare['Discharge Date'].info()

In [11]:
healthcare['Billing Amount']=round(healthcare[['Billing Amount']],2)
healthcare.head()

In [12]:
healthcare.describe()

In [13]:
healthcare['Age'].mean()
print('The Average age of overall pateints is {}.'.format(round(healthcare['Age'].mean(),2)))

In [14]:
avg_age_by_gender = round(healthcare[['Gender','Age']].groupby('Gender')[['Age']].mean(),2)
avg_age_by_gender

In [15]:
healthcare.head()

In [16]:
male_female_patients =  healthcare[['Gender']].value_counts()
male_female_patients

In [17]:
healthcare.head()

In [18]:
healthcare['Blood Type'].unique()
print('Different Blood Types are {}.'.format(healthcare['Blood Type'].unique()))

In [19]:
healthcare[['Hospital']].nunique()

In [20]:
# there are 8639 Hospitals , but top 20 hospitals with count of patients are  
healthcare[['Hospital','Name']].groupby('Hospital')[['Name']].count().rename(
    columns = {'Name':'Patient Count'}).sort_values('Patient Count',ascending = False).reset_index()

In [21]:
healthcare.head()

In [22]:
common_medical_patients = healthcare.groupby('Medical Condition')[['Medical Condition']].count()
common_medical_patients

In [23]:
plt.figure(figsize =(12,4))
ax= sns.countplot(data= healthcare, 
                  x = 'Medical Condition',
                  palette = 'GnBu_r',
                  order = healthcare['Medical Condition'].value_counts().index)
for bars in ax.containers:
    ax.bar_label(ax.containers[0])

In [24]:
healthcare.head()

In [25]:
healthcare['Insurance Provider'].unique()

In [26]:
healthcare['Insurance Provider'].value_counts()

In [27]:
plt.figure(figsize =(12,4))
ax = sns.countplot(data = healthcare,
                   x='Insurance Provider',
                   palette = 'Purples',
                  order = healthcare['Insurance Provider'].value_counts().index)
for bars in ax.containers:
    ax.bar_label(ax.containers[0]);

In [28]:
healthcare.head()

In [29]:
avg_billing_amt = round(healthcare[['Admission Type','Billing Amount']].groupby('Admission Type').mean(),2)
avg_billing_amt.rename(columns={'Billing Amount':'Average Billing Amount'},inplace = True)
avg_billing_amt = avg_billing_amt.sort_values('Average Billing Amount',ascending = False)
avg_billing_amt

In [30]:
plt.figure(figsize =(6,3))

ax = sns.barplot(data = avg_billing_amt , 
                 x = avg_billing_amt.index,
                 y = 'Average Billing Amount',
                 palette = 'gist_earth_r' )
for bars in ax.containers:
    ax.bar_label(bars)

In [31]:
healthcare.head()

In [32]:
distribution = pd.DataFrame(healthcare[['Name','Room Number']].value_counts() )
distribution

In [33]:
distribution[distribution['count']>=2]

In [34]:
healthcare.head()

In [35]:
doctor = pd.DataFrame(healthcare[['Doctor','Name']].groupby('Doctor')['Name'].count())
doctor.rename(columns={'Name':'Counts'},inplace= True)
doctor = doctor[doctor['Counts']>=2].sort_values('Counts',ascending = False)
doctor

In [36]:
doctor[(doctor['Counts']>=2) & (doctor['Counts']<=7)]

In [37]:
healthcare.info()

In [38]:
healthcare['doa_month'] = healthcare['Date of Admission'].dt.month
healthcare['doa_year'] = healthcare['Date of Admission'].dt.year
healthcare.info()

In [39]:
admission_by_year = healthcare[['Date of Admission','doa_year']].groupby(['doa_year']).count()
admission_by_year

In [40]:
plt.figure(figsize=(10,4))
sns.lineplot(data = admission_by_year,y='Date of Admission',x='doa_year')
plt.xlabel('Year');

In [41]:
year = healthcare[['Date of Admission','doa_year','doa_month']].groupby(['doa_year','doa_month']).count()
year.reset_index(inplace = True)
year.rename(columns={'Date of Admission': 'Counts'},inplace = True)
year

In [42]:
year_2018 = year[year['doa_year'] == 2018]

year_2019 = year[year['doa_year'] == 2019]

year_2020 = year[year['doa_year'] == 2020]

year_2021 = year[year['doa_year'] == 2021]

year_2022 = year[year['doa_year'] == 2022]

year_2023 = year[year['doa_year'] == 2023]

In [43]:
# creating subplots
plt.figure(figsize=(12,15))

plt.subplot(3,3,1)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2018')
sns.lineplot(data = year_2018,x='doa_month', y='Counts');

plt.subplot(3,3,2)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2019')
sns.lineplot(data = year_2019,x='doa_month', y='Counts');

plt.subplot(3,3,3)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2020')
sns.lineplot(data = year_2020,x='doa_month', y='Counts');

plt.subplot(3,3,4)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2021')
sns.lineplot(data = year_2021,x='doa_month', y='Counts');

plt.subplot(3,3,5)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2022')
sns.lineplot(data = year_2022,x='doa_month', y='Counts');

plt.subplot(3,3,6)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2023')
sns.lineplot(data = year_2023,x='doa_month', y='Counts');

In [44]:
healthcare.head()

In [45]:
healthcare.info()

In [46]:
Age_Billing_Amount_corr = healthcare['Age'].corr(healthcare['Billing Amount'])
print('Patients Age And Billing Amount has correlation of {}'.format(Age_Billing_Amount_corr))

In [47]:
healthcare.head()

In [48]:
average_stay_hospital = (healthcare['Discharge Date'] - healthcare['Date of Admission']).dt.days.mean()
print('Average Stay of patients in Hospital is {}.'.format(round(average_stay_hospital)))

In [49]:
healthcare.head()

In [50]:
avg_amt_paid_by_gender = round(healthcare[['Gender','Billing Amount']].groupby('Gender')[['Billing Amount']].mean(),2)
avg_amt_paid_by_gender.rename(columns={'Billing Amount':'Average Billing amount'},inplace = True)
avg_amt_paid_by_gender

In [51]:
healthcare.head()

In [52]:
admission_type_trend = healthcare[['Admission Type','doa_year']].groupby(['Admission Type','doa_year'])[['Admission Type']].count()
admission_type_trend.rename(columns={'Admission Type':'Counts'},inplace = True)
admission_type_trend.reset_index(inplace = True)
admission_type_trend

In [53]:
elective = admission_type_trend[admission_type_trend['Admission Type'] == 'Elective']

emergency = admission_type_trend[admission_type_trend['Admission Type'] == 'Emergency']

urgent = admission_type_trend[admission_type_trend['Admission Type'] == 'Urgent']
 

In [54]:

plt.figure(figsize=(10,3))
plt.tight_layout()

ax1 = plt.subplot(1,3,1)
ax = sns.barplot(data = elective,x = 'doa_year', y = 'Counts', palette = 'afmhot')
for bars in ax.containers:
    ax.bar_label(ax.containers[0])
plt.xlabel('Year')
plt.ylabel('Counts')
plt.title('Admission Type - Elective');

ax2 = plt.subplot(1,3,2,sharey=ax1)
ax = sns.barplot(data = emergency,x = 'doa_year', y = 'Counts', palette = 'afmhot')
for bars in ax.containers:
    ax.bar_label(ax.containers[0])
plt.xlabel('Year')
plt.ylabel('Counts')
plt.title('Admission Type - Emergency');

ax3 = plt.subplot(1,3,3,sharey =ax1)
ax = sns.barplot(data = urgent,x = 'doa_year', y = 'Counts', palette = 'afmhot')
for bars in ax.containers:
    ax.bar_label(ax.containers[0])
plt.xlabel('Year')
plt.ylabel('Counts')
plt.title('Admission Type - Urgent');

In [55]:
healthcare.head()

In [56]:
healthcare.groupby('Medical Condition')['Billing Amount'].describe()

In [57]:
avg_billing_amt = round(healthcare.groupby('Medical Condition')[['Billing Amount']].mean().rename(
    columns={'Billing Amount': 'Average Billing Amount'}).sort_values('Average Billing Amount',ascending = False).reset_index(),2)
avg_billing_amt

In [58]:
ax = sns.boxplot(data = healthcare, x = 'Medical Condition', y = 'Billing Amount',palette='YlGn_r');

In [59]:
healthcare.head()

In [60]:
medication_pattern = healthcare.pivot_table(index ='Medical Condition',columns = 'Medication',aggfunc='size')
medication_pattern

In [61]:
sns.heatmap(medication_pattern, annot = True,fmt ='g' );

In [62]:
healthcare.head()

In [63]:
pd.DataFrame(healthcare['Test Results'].value_counts()).rename(columns = {'count':'Counts'})

In [64]:
test_results_distribution = healthcare[['Test Results','Name']].groupby('Test Results')[['Name']].count().rename(columns={'Name':'Count'})
test_results_distribution

In [65]:
plt.figure(figsize = (6,4))
plt.title('Test Results Distribution')
plt.xlabel('Test Results')
ax = sns.barplot(data = test_results_distribution, x = test_results_distribution.index,y = 'Count' ,palette = 'viridis');
for bars in ax.containers:
    ax.bar_label(bars)

In [66]:
healthcare.head()

In [67]:
healthcare['Days in Hospital'] = (healthcare['Discharge Date'] - healthcare['Date of Admission']).dt.days

In [68]:
healthcare[['Days in Hospital','Medication']].groupby('Medication')[['Days in Hospital']].mean()

In [69]:
healthcare[['Days in Hospital','Medication']].groupby('Medication')[['Days in Hospital']].describe()

In [70]:
plt.figure(figsize=(10,4))
sns.boxplot(data = healthcare, x = 'Medication', y='Days in Hospital', palette = 'GnBu');

In [71]:
healthcare.head()

In [72]:
Insurance_Provider_avg_Billing_amt = round(healthcare[['Insurance Provider','Billing Amount']].groupby('Insurance Provider').mean(),2).rename(columns = {'Billing Amount':'Average Billing Amount'}).sort_values('Average Billing Amount',ascending = False)
Insurance_Provider_avg_Billing_amt

In [73]:
plt.figure(figsize=(6,4))
plt.title('Average Billing Amount by Insurance Provider')
sns.lineplot(data= Insurance_Provider_avg_Billing_amt,
             x =Insurance_Provider_avg_Billing_amt.index,
             y = 'Average Billing Amount');

In [74]:
healthcare.head()

In [75]:
seasonal_trend_2018 = healthcare[healthcare['doa_year']==2018]
seasonal_trend_2018 = seasonal_trend_2018[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})
seasonal_trend_2018

In [76]:
seasonal_trend_2019 = healthcare[healthcare['doa_year']==2019]
seasonal_trend_2019 = seasonal_trend_2019[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})
seasonal_trend_2019

In [77]:
seasonal_trend_2020 = healthcare[healthcare['doa_year']==2020]
seasonal_trend_2020 = seasonal_trend_2020[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})
seasonal_trend_2020

In [78]:
seasonal_trend_2021 = healthcare[healthcare['doa_year']==2021]
seasonal_trend_2021 = seasonal_trend_2021[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})

In [79]:
seasonal_trend_2022 = healthcare[healthcare['doa_year']==2022]
seasonal_trend_2022 = seasonal_trend_2022[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})

In [80]:
seasonal_trend_2023 = healthcare[healthcare['doa_year']==2023]
seasonal_trend_2023 = seasonal_trend_2023[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})

In [81]:
plt.figure(figsize=(4,2))
plt.title('Seasonal Trend of Year 2018')
ax = sns.barplot(data= seasonal_trend_2018, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [82]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2019')
ax = sns.barplot(data= seasonal_trend_2019, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [83]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2020')
ax = sns.barplot(data= seasonal_trend_2020, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [84]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2021')
ax = sns.barplot(data= seasonal_trend_2021, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [85]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2022')
ax = sns.barplot(data= seasonal_trend_2022, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [86]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2023')
ax = sns.barplot(data= seasonal_trend_2023, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [87]:
plt.figure(figsize=(15,10))


plt.subplot(2,3,1)
plt.title('Seasonal Trend of Year 2018')
ax = sns.barplot(data= seasonal_trend_2018, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,2)
plt.title('Seasonal Trend of Year 2019')
ax = sns.barplot(data= seasonal_trend_2019, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,3)
plt.title('Seasonal Trend of Year 2020')
ax = sns.barplot(data= seasonal_trend_2020, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,4)
plt.title('Seasonal Trend of Year 2021')
ax = sns.barplot(data= seasonal_trend_2021, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,5)
plt.title('Seasonal Trend of Year 2022')
ax = sns.barplot(data= seasonal_trend_2022, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,6)
plt.title('Seasonal Trend of Year 2023')
ax = sns.barplot(data= seasonal_trend_2023, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [88]:
healthcare.head()

In [89]:
avg_days_in_hospital = round(healthcare[['Medical Condition','Days in Hospital']].groupby('Medical Condition')[['Days in Hospital']].mean()).rename(
columns = {'Days in Hospital':'Average Count of Days'})
avg_days_in_hospital.reset_index()

In [90]:
healthcare.head()

In [91]:
 (( healthcare['Billing Amount'] - healthcare['Billing Amount'].mean() )/healthcare['Billing Amount'].std()).abs()
healthcare['zscore_Billing_Amount'] = (( healthcare['Billing Amount'] - healthcare['Billing Amount'].mean() )/healthcare['Billing Amount'].std()).abs()
healthcare.head()

In [92]:
threshold_limit = 3
healthcare[healthcare['zscore_Billing_Amount'] > threshold_limit]

In [93]:
%notebook "E:\Project for Resume\Python Healthcare Analysis Project\Healthcare Analysis Python Notebook\Healthcare Analysis.ipynb

In [94]:
# Importing Library 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

# Showing max columns and rows
pd.set_option("display.max_columns",None)
pd.set_option('display.max_rows',None)
pd.option_context('mode.use_inf_as_na', True)

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [95]:
healthcare.head()

In [96]:
healthcare[['Gender']].value_counts()

In [97]:
gender=pd.DataFrame(healthcare[['Gender']].value_counts())
gender

In [98]:
plt.pie(x=gender['count'])

In [99]:
plt.pie(x=gender['count'],labels=gender.index);

In [100]:
gender=pd.DataFrame(healthcare[['Gender']].value_counts())
gender.reset_index(inplace =True)

In [101]:
plt.pie(x=gender['count'],labels=gender.index);

In [102]:
plt.pie(x=gender['count'],labels=gender);

In [103]:
plt.pie(x=gender['count'],labels='gender');

In [104]:
gender=pd.DataFrame(healthcare[['Gender']].value_counts())
gender.reset_index(inplace =True)
gender

In [105]:
plt.pie(x=gender['count'],labels='Gender');

In [106]:
plt.pie(x=gender['count);

In [107]:
plt.pie(x=gender['count']);

In [108]:
plt.pie(x=gender['count'],labels=gender['Gender']);

In [109]:
plt.pie(x=gender['count'],labels=gender['Gender'], autopct='%1.1f%%');

In [110]:
plt.figure(figsize = (7,8))
plt.pie(x=gender['count'],labels=gender['Gender'], autopct='%1.1f%%');

In [111]:
plt.figure(figsize = (2,2))
plt.pie(x=gender['count'],labels=gender['Gender'], autopct='%1.1f%%');

In [112]:
plt.figure(figsize = (4,5))
plt.pie(x=gender['count'],labels=gender['Gender'], autopct='%1.1f%%');

In [113]:
# Importing Library 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

# Showing max columns and rows
pd.set_option("display.max_columns",None)
pd.set_option('display.max_rows',None)
pd.option_context('mode.use_inf_as_na', True)

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [114]:
# Importing Dataset
healthcare = pd.read_csv('E:\Project for Resume\Python Healthcare Analysis Project\Healthcare Dataset\healthcare_dataset.csv')
healthcare.head()

In [115]:
# Checking for null values
healthcare.isnull().sum()

In [116]:
# checking for Blank Values
healthcare.isna().sum()

In [117]:
healthcare.duplicated().sum()

In [118]:
healthcare.head()

In [119]:
healthcare.columns

In [120]:
healthcare.info()

In [121]:
healthcare['Date of Admission'] = pd.to_datetime(healthcare['Date of Admission'],format = '%Y-%m-%d')
healthcare.info()

In [122]:
healthcare['Discharge Date'] = pd.to_datetime(healthcare['Discharge Date'])
healthcare['Discharge Date'].info()

In [123]:
healthcare['Billing Amount']=round(healthcare[['Billing Amount']],2)
healthcare.head()

In [124]:
healthcare.describe()

In [125]:
healthcare['Age'].mean()
print('The Average age of overall pateints is {}.'.format(round(healthcare['Age'].mean(),2)))

In [126]:
avg_age_by_gender = round(healthcare[['Gender','Age']].groupby('Gender')[['Age']].mean(),2)
avg_age_by_gender

In [127]:
gender=pd.DataFrame(healthcare[['Gender']].value_counts())
gender.reset_index(inplace =True)
gender

In [128]:
Gender

In [129]:
# there are 8639 Hospitals , but top 20 hospitals with count of patients are  
healthcare[['Hospital','Name']].groupby('Hospital')[['Name']].count().rename(
    columns = {'Name':'Patient Count'}).sort_values('Patient Count',ascending = False).reset_index()

In [130]:
healthcare.head()

In [131]:
common_medical_patients = healthcare.groupby('Medical Condition')[['Medical Condition']].count()
common_medical_patients

In [132]:
plt.figure(figsize =(12,4))
ax= sns.countplot(data= healthcare, 
                  x = 'Medical Condition',
                  palette = 'GnBu_r',
                  order = healthcare['Medical Condition'].value_counts().index)
for bars in ax.containers:
    ax.bar_label(ax.containers[0])

In [133]:
healthcare.head()

In [134]:
healthcare['Insurance Provider'].unique()

In [135]:
healthcare['Insurance Provider'].value_counts()

In [136]:
plt.figure(figsize =(12,4))
ax = sns.countplot(data = healthcare,
                   x='Insurance Provider',
                   palette = 'Purples',
                  order = healthcare['Insurance Provider'].value_counts().index)
for bars in ax.containers:
    ax.bar_label(ax.containers[0]);

In [137]:
healthcare.head()

In [138]:
avg_billing_amt = round(healthcare[['Admission Type','Billing Amount']].groupby('Admission Type').mean(),2)
avg_billing_amt.rename(columns={'Billing Amount':'Average Billing Amount'},inplace = True)
avg_billing_amt = avg_billing_amt.sort_values('Average Billing Amount',ascending = False)
avg_billing_amt

In [139]:
plt.figure(figsize =(6,3))

ax = sns.barplot(data = avg_billing_amt , 
                 x = avg_billing_amt.index,
                 y = 'Average Billing Amount',
                 palette = 'gist_earth_r' )
for bars in ax.containers:
    ax.bar_label(bars)

In [140]:
healthcare.head()

In [141]:
distribution = pd.DataFrame(healthcare[['Name','Room Number']].value_counts() )
distribution

In [142]:
distribution[distribution['count']>=2]

In [143]:
healthcare.head()

In [144]:
doctor = pd.DataFrame(healthcare[['Doctor','Name']].groupby('Doctor')['Name'].count())
doctor.rename(columns={'Name':'Counts'},inplace= True)
doctor = doctor[doctor['Counts']>=2].sort_values('Counts',ascending = False)
doctor

In [145]:
doctor[(doctor['Counts']>=2) & (doctor['Counts']<=7)]

In [146]:
healthcare.info()

In [147]:
healthcare['doa_month'] = healthcare['Date of Admission'].dt.month
healthcare['doa_year'] = healthcare['Date of Admission'].dt.year
healthcare.info()

In [148]:
admission_by_year = healthcare[['Date of Admission','doa_year']].groupby(['doa_year']).count()
admission_by_year

In [149]:
plt.figure(figsize=(10,4))
sns.lineplot(data = admission_by_year,y='Date of Admission',x='doa_year')
plt.xlabel('Year');

In [150]:
year = healthcare[['Date of Admission','doa_year','doa_month']].groupby(['doa_year','doa_month']).count()
year.reset_index(inplace = True)
year.rename(columns={'Date of Admission': 'Counts'},inplace = True)
year

In [151]:
year_2018 = year[year['doa_year'] == 2018]

year_2019 = year[year['doa_year'] == 2019]

year_2020 = year[year['doa_year'] == 2020]

year_2021 = year[year['doa_year'] == 2021]

year_2022 = year[year['doa_year'] == 2022]

year_2023 = year[year['doa_year'] == 2023]

In [152]:
# creating subplots
plt.figure(figsize=(12,15))

plt.subplot(3,3,1)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2018')
sns.lineplot(data = year_2018,x='doa_month', y='Counts');

plt.subplot(3,3,2)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2019')
sns.lineplot(data = year_2019,x='doa_month', y='Counts');

plt.subplot(3,3,3)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2020')
sns.lineplot(data = year_2020,x='doa_month', y='Counts');

plt.subplot(3,3,4)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2021')
sns.lineplot(data = year_2021,x='doa_month', y='Counts');

plt.subplot(3,3,5)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2022')
sns.lineplot(data = year_2022,x='doa_month', y='Counts');

plt.subplot(3,3,6)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2023')
sns.lineplot(data = year_2023,x='doa_month', y='Counts');

In [153]:
healthcare.head()

In [154]:
healthcare.info()

In [155]:
Age_Billing_Amount_corr = healthcare['Age'].corr(healthcare['Billing Amount'])
print('Patients Age And Billing Amount has correlation of {}'.format(Age_Billing_Amount_corr))

In [156]:
healthcare.head()

In [157]:
average_stay_hospital = (healthcare['Discharge Date'] - healthcare['Date of Admission']).dt.days.mean()
print('Average Stay of patients in Hospital is {}.'.format(round(average_stay_hospital)))

In [158]:
healthcare.head()

In [159]:
avg_amt_paid_by_gender = round(healthcare[['Gender','Billing Amount']].groupby('Gender')[['Billing Amount']].mean(),2)
avg_amt_paid_by_gender.rename(columns={'Billing Amount':'Average Billing amount'},inplace = True)
avg_amt_paid_by_gender

In [160]:
healthcare.head()

In [161]:
admission_type_trend = healthcare[['Admission Type','doa_year']].groupby(['Admission Type','doa_year'])[['Admission Type']].count()
admission_type_trend.rename(columns={'Admission Type':'Counts'},inplace = True)
admission_type_trend.reset_index(inplace = True)
admission_type_trend

In [162]:
elective = admission_type_trend[admission_type_trend['Admission Type'] == 'Elective']

emergency = admission_type_trend[admission_type_trend['Admission Type'] == 'Emergency']

urgent = admission_type_trend[admission_type_trend['Admission Type'] == 'Urgent']
 

In [163]:

plt.figure(figsize=(10,3))
plt.tight_layout()

ax1 = plt.subplot(1,3,1)
ax = sns.barplot(data = elective,x = 'doa_year', y = 'Counts', palette = 'afmhot')
for bars in ax.containers:
    ax.bar_label(ax.containers[0])
plt.xlabel('Year')
plt.ylabel('Counts')
plt.title('Admission Type - Elective');

ax2 = plt.subplot(1,3,2,sharey=ax1)
ax = sns.barplot(data = emergency,x = 'doa_year', y = 'Counts', palette = 'afmhot')
for bars in ax.containers:
    ax.bar_label(ax.containers[0])
plt.xlabel('Year')
plt.ylabel('Counts')
plt.title('Admission Type - Emergency');

ax3 = plt.subplot(1,3,3,sharey =ax1)
ax = sns.barplot(data = urgent,x = 'doa_year', y = 'Counts', palette = 'afmhot')
for bars in ax.containers:
    ax.bar_label(ax.containers[0])
plt.xlabel('Year')
plt.ylabel('Counts')
plt.title('Admission Type - Urgent');

In [164]:
healthcare.head()

In [165]:
healthcare.groupby('Medical Condition')['Billing Amount'].describe()

In [166]:
avg_billing_amt = round(healthcare.groupby('Medical Condition')[['Billing Amount']].mean().rename(
    columns={'Billing Amount': 'Average Billing Amount'}).sort_values('Average Billing Amount',ascending = False).reset_index(),2)
avg_billing_amt

In [167]:
ax = sns.boxplot(data = healthcare, x = 'Medical Condition', y = 'Billing Amount',palette='YlGn_r');

In [168]:
healthcare.head()

In [169]:
medication_pattern = healthcare.pivot_table(index ='Medical Condition',columns = 'Medication',aggfunc='size')
medication_pattern

In [170]:
sns.heatmap(medication_pattern, annot = True,fmt ='g' );

In [171]:
healthcare.head()

In [172]:
pd.DataFrame(healthcare['Test Results'].value_counts()).rename(columns = {'count':'Counts'})

In [173]:
test_results_distribution = healthcare[['Test Results','Name']].groupby('Test Results')[['Name']].count().rename(columns={'Name':'Count'})
test_results_distribution

In [174]:
plt.figure(figsize = (6,4))
plt.title('Test Results Distribution')
plt.xlabel('Test Results')
ax = sns.barplot(data = test_results_distribution, x = test_results_distribution.index,y = 'Count' ,palette = 'viridis');
for bars in ax.containers:
    ax.bar_label(bars)

In [175]:
healthcare.head()

In [176]:
healthcare['Days in Hospital'] = (healthcare['Discharge Date'] - healthcare['Date of Admission']).dt.days

In [177]:
healthcare[['Days in Hospital','Medication']].groupby('Medication')[['Days in Hospital']].mean()

In [178]:
healthcare[['Days in Hospital','Medication']].groupby('Medication')[['Days in Hospital']].describe()

In [179]:
plt.figure(figsize=(10,4))
sns.boxplot(data = healthcare, x = 'Medication', y='Days in Hospital', palette = 'GnBu');

In [180]:
healthcare.head()

In [181]:
Insurance_Provider_avg_Billing_amt = round(healthcare[['Insurance Provider','Billing Amount']].groupby('Insurance Provider').mean(),2).rename(columns = {'Billing Amount':'Average Billing Amount'}).sort_values('Average Billing Amount',ascending = False)
Insurance_Provider_avg_Billing_amt

In [182]:
plt.figure(figsize=(6,4))
plt.title('Average Billing Amount by Insurance Provider')
sns.lineplot(data= Insurance_Provider_avg_Billing_amt,
             x =Insurance_Provider_avg_Billing_amt.index,
             y = 'Average Billing Amount');

In [183]:
healthcare.head()

In [184]:
seasonal_trend_2018 = healthcare[healthcare['doa_year']==2018]
seasonal_trend_2018 = seasonal_trend_2018[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})
seasonal_trend_2018

In [185]:
seasonal_trend_2019 = healthcare[healthcare['doa_year']==2019]
seasonal_trend_2019 = seasonal_trend_2019[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})
seasonal_trend_2019

In [186]:
seasonal_trend_2020 = healthcare[healthcare['doa_year']==2020]
seasonal_trend_2020 = seasonal_trend_2020[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})
seasonal_trend_2020

In [187]:
seasonal_trend_2021 = healthcare[healthcare['doa_year']==2021]
seasonal_trend_2021 = seasonal_trend_2021[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})

In [188]:
seasonal_trend_2022 = healthcare[healthcare['doa_year']==2022]
seasonal_trend_2022 = seasonal_trend_2022[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})

In [189]:
seasonal_trend_2023 = healthcare[healthcare['doa_year']==2023]
seasonal_trend_2023 = seasonal_trend_2023[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})

In [190]:
plt.figure(figsize=(4,2))
plt.title('Seasonal Trend of Year 2018')
ax = sns.barplot(data= seasonal_trend_2018, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [191]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2019')
ax = sns.barplot(data= seasonal_trend_2019, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [192]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2020')
ax = sns.barplot(data= seasonal_trend_2020, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [193]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2021')
ax = sns.barplot(data= seasonal_trend_2021, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [194]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2022')
ax = sns.barplot(data= seasonal_trend_2022, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [195]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2023')
ax = sns.barplot(data= seasonal_trend_2023, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [196]:
plt.figure(figsize=(15,10))


plt.subplot(2,3,1)
plt.title('Seasonal Trend of Year 2018')
ax = sns.barplot(data= seasonal_trend_2018, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,2)
plt.title('Seasonal Trend of Year 2019')
ax = sns.barplot(data= seasonal_trend_2019, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,3)
plt.title('Seasonal Trend of Year 2020')
ax = sns.barplot(data= seasonal_trend_2020, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,4)
plt.title('Seasonal Trend of Year 2021')
ax = sns.barplot(data= seasonal_trend_2021, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,5)
plt.title('Seasonal Trend of Year 2022')
ax = sns.barplot(data= seasonal_trend_2022, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,6)
plt.title('Seasonal Trend of Year 2023')
ax = sns.barplot(data= seasonal_trend_2023, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [197]:
healthcare.head()

In [198]:
avg_days_in_hospital = round(healthcare[['Medical Condition','Days in Hospital']].groupby('Medical Condition')[['Days in Hospital']].mean()).rename(
columns = {'Days in Hospital':'Average Count of Days'})
avg_days_in_hospital.reset_index()

In [199]:
healthcare.head()

In [200]:
 (( healthcare['Billing Amount'] - healthcare['Billing Amount'].mean() )/healthcare['Billing Amount'].std()).abs()
healthcare['zscore_Billing_Amount'] = (( healthcare['Billing Amount'] - healthcare['Billing Amount'].mean() )/healthcare['Billing Amount'].std()).abs()
healthcare.head()

In [201]:
threshold_limit = 3
healthcare[healthcare['zscore_Billing_Amount'] > threshold_limit]

In [202]:
%notebook "E:\Project for Resume\Python Healthcare Analysis Project\Healthcare Analysis Python Notebook\Healthcare Analysis.ipynb

In [203]:
# Importing Library 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

# Showing max columns and rows
pd.set_option("display.max_columns",None)
pd.set_option('display.max_rows',None)
pd.option_context('mode.use_inf_as_na', True)

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [204]:
# Importing Dataset
healthcare = pd.read_csv('E:\Project for Resume\Python Healthcare Analysis Project\Healthcare Dataset\healthcare_dataset.csv')
healthcare.head()

In [205]:
# Checking for null values
healthcare.isnull().sum()

In [206]:
# checking for Blank Values
healthcare.isna().sum()

In [207]:
healthcare.duplicated().sum()

In [208]:
healthcare.head()

In [209]:
healthcare.columns

In [210]:
healthcare.info()

In [211]:
healthcare['Date of Admission'] = pd.to_datetime(healthcare['Date of Admission'],format = '%Y-%m-%d')
healthcare.info()

In [212]:
healthcare['Discharge Date'] = pd.to_datetime(healthcare['Discharge Date'])
healthcare['Discharge Date'].info()

In [213]:
healthcare['Billing Amount']=round(healthcare[['Billing Amount']],2)
healthcare.head()

In [214]:
healthcare.describe()

In [215]:
healthcare['Age'].mean()
print('The Average age of overall pateints is {}.'.format(round(healthcare['Age'].mean(),2)))

In [216]:
avg_age_by_gender = round(healthcare[['Gender','Age']].groupby('Gender')[['Age']].mean(),2)
avg_age_by_gender

In [217]:
gender=pd.DataFrame(healthcare[['Gender']].value_counts())
gender.reset_index(inplace =True)
gender

In [218]:
plt.figure(figsize = (4,5))
plt.pie(x=gender['count'],labels=gender['Gender'], autopct='%1.1f%%');

In [219]:
healthcare.head()

In [220]:
male_female_patients =  healthcare[['Gender']].value_counts()
male_female_patients

In [221]:
healthcare.head()

In [222]:
healthcare['Blood Type'].unique()
print('Different Blood Types are {}.'.format(healthcare['Blood Type'].unique()))

In [223]:
healthcare[['Hospital']].nunique()

In [224]:
# there are 8639 Hospitals , but top 20 hospitals with count of patients are  
healthcare[['Hospital','Name']].groupby('Hospital')[['Name']].count().rename(
    columns = {'Name':'Patient Count'}).sort_values('Patient Count',ascending = False).reset_index()

In [225]:
healthcare.head()

In [226]:
common_medical_patients = healthcare.groupby('Medical Condition')[['Medical Condition']].count()
common_medical_patients

In [227]:
plt.figure(figsize =(12,4))
ax= sns.countplot(data= healthcare, 
                  x = 'Medical Condition',
                  palette = 'GnBu_r',
                  order = healthcare['Medical Condition'].value_counts().index)
for bars in ax.containers:
    ax.bar_label(ax.containers[0])

In [228]:
healthcare.head()

In [229]:
healthcare['Insurance Provider'].unique()

In [230]:
healthcare['Insurance Provider'].value_counts()

In [231]:
plt.figure(figsize =(12,4))
ax = sns.countplot(data = healthcare,
                   x='Insurance Provider',
                   palette = 'Purples',
                  order = healthcare['Insurance Provider'].value_counts().index)
for bars in ax.containers:
    ax.bar_label(ax.containers[0]);

In [232]:
healthcare.head()

In [233]:
avg_billing_amt = round(healthcare[['Admission Type','Billing Amount']].groupby('Admission Type').mean(),2)
avg_billing_amt.rename(columns={'Billing Amount':'Average Billing Amount'},inplace = True)
avg_billing_amt = avg_billing_amt.sort_values('Average Billing Amount',ascending = False)
avg_billing_amt

In [234]:
plt.figure(figsize =(6,3))

ax = sns.barplot(data = avg_billing_amt , 
                 x = avg_billing_amt.index,
                 y = 'Average Billing Amount',
                 palette = 'gist_earth_r' )
for bars in ax.containers:
    ax.bar_label(bars)

In [235]:
healthcare.head()

In [236]:
distribution = pd.DataFrame(healthcare[['Name','Room Number']].value_counts() )
distribution

In [237]:
distribution[distribution['count']>=2]

In [238]:
healthcare.head()

In [239]:
doctor = pd.DataFrame(healthcare[['Doctor','Name']].groupby('Doctor')['Name'].count())
doctor.rename(columns={'Name':'Counts'},inplace= True)
doctor = doctor[doctor['Counts']>=2].sort_values('Counts',ascending = False)
doctor

In [240]:
doctor[(doctor['Counts']>=2) & (doctor['Counts']<=7)]

In [241]:
healthcare.info()

In [242]:
healthcare['doa_month'] = healthcare['Date of Admission'].dt.month
healthcare['doa_year'] = healthcare['Date of Admission'].dt.year
healthcare.info()

In [243]:
admission_by_year = healthcare[['Date of Admission','doa_year']].groupby(['doa_year']).count()
admission_by_year

In [244]:
plt.figure(figsize=(10,4))
sns.lineplot(data = admission_by_year,y='Date of Admission',x='doa_year')
plt.xlabel('Year');

In [245]:
year = healthcare[['Date of Admission','doa_year','doa_month']].groupby(['doa_year','doa_month']).count()
year.reset_index(inplace = True)
year.rename(columns={'Date of Admission': 'Counts'},inplace = True)
year

In [246]:
year_2018 = year[year['doa_year'] == 2018]

year_2019 = year[year['doa_year'] == 2019]

year_2020 = year[year['doa_year'] == 2020]

year_2021 = year[year['doa_year'] == 2021]

year_2022 = year[year['doa_year'] == 2022]

year_2023 = year[year['doa_year'] == 2023]

In [247]:
# creating subplots
plt.figure(figsize=(12,15))

plt.subplot(3,3,1)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2018')
sns.lineplot(data = year_2018,x='doa_month', y='Counts');

plt.subplot(3,3,2)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2019')
sns.lineplot(data = year_2019,x='doa_month', y='Counts');

plt.subplot(3,3,3)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2020')
sns.lineplot(data = year_2020,x='doa_month', y='Counts');

plt.subplot(3,3,4)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2021')
sns.lineplot(data = year_2021,x='doa_month', y='Counts');

plt.subplot(3,3,5)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2022')
sns.lineplot(data = year_2022,x='doa_month', y='Counts');

plt.subplot(3,3,6)
plt.xlabel('Months')
plt.ylabel('Counts')
plt.title('Year 2023')
sns.lineplot(data = year_2023,x='doa_month', y='Counts');

In [248]:
healthcare.head()

In [249]:
healthcare.info()

In [250]:
Age_Billing_Amount_corr = healthcare['Age'].corr(healthcare['Billing Amount'])
print('Patients Age And Billing Amount has correlation of {}'.format(Age_Billing_Amount_corr))

In [251]:
healthcare.head()

In [252]:
average_stay_hospital = (healthcare['Discharge Date'] - healthcare['Date of Admission']).dt.days.mean()
print('Average Stay of patients in Hospital is {}.'.format(round(average_stay_hospital)))

In [253]:
healthcare.head()

In [254]:
avg_amt_paid_by_gender = round(healthcare[['Gender','Billing Amount']].groupby('Gender')[['Billing Amount']].mean(),2)
avg_amt_paid_by_gender.rename(columns={'Billing Amount':'Average Billing amount'},inplace = True)
avg_amt_paid_by_gender

In [255]:
healthcare.head()

In [256]:
admission_type_trend = healthcare[['Admission Type','doa_year']].groupby(['Admission Type','doa_year'])[['Admission Type']].count()
admission_type_trend.rename(columns={'Admission Type':'Counts'},inplace = True)
admission_type_trend.reset_index(inplace = True)
admission_type_trend

In [257]:
elective = admission_type_trend[admission_type_trend['Admission Type'] == 'Elective']

emergency = admission_type_trend[admission_type_trend['Admission Type'] == 'Emergency']

urgent = admission_type_trend[admission_type_trend['Admission Type'] == 'Urgent']
 

In [258]:

plt.figure(figsize=(10,3))
plt.tight_layout()

ax1 = plt.subplot(1,3,1)
ax = sns.barplot(data = elective,x = 'doa_year', y = 'Counts', palette = 'afmhot')
for bars in ax.containers:
    ax.bar_label(ax.containers[0])
plt.xlabel('Year')
plt.ylabel('Counts')
plt.title('Admission Type - Elective');

ax2 = plt.subplot(1,3,2,sharey=ax1)
ax = sns.barplot(data = emergency,x = 'doa_year', y = 'Counts', palette = 'afmhot')
for bars in ax.containers:
    ax.bar_label(ax.containers[0])
plt.xlabel('Year')
plt.ylabel('Counts')
plt.title('Admission Type - Emergency');

ax3 = plt.subplot(1,3,3,sharey =ax1)
ax = sns.barplot(data = urgent,x = 'doa_year', y = 'Counts', palette = 'afmhot')
for bars in ax.containers:
    ax.bar_label(ax.containers[0])
plt.xlabel('Year')
plt.ylabel('Counts')
plt.title('Admission Type - Urgent');

In [259]:
healthcare.head()

In [260]:
healthcare.groupby('Medical Condition')['Billing Amount'].describe()

In [261]:
avg_billing_amt = round(healthcare.groupby('Medical Condition')[['Billing Amount']].mean().rename(
    columns={'Billing Amount': 'Average Billing Amount'}).sort_values('Average Billing Amount',ascending = False).reset_index(),2)
avg_billing_amt

In [262]:
ax = sns.boxplot(data = healthcare, x = 'Medical Condition', y = 'Billing Amount',palette='YlGn_r');

In [263]:
healthcare.head()

In [264]:
medication_pattern = healthcare.pivot_table(index ='Medical Condition',columns = 'Medication',aggfunc='size')
medication_pattern

In [265]:
sns.heatmap(medication_pattern, annot = True,fmt ='g' );

In [266]:
healthcare.head()

In [267]:
pd.DataFrame(healthcare['Test Results'].value_counts()).rename(columns = {'count':'Counts'})

In [268]:
test_results_distribution = healthcare[['Test Results','Name']].groupby('Test Results')[['Name']].count().rename(columns={'Name':'Count'})
test_results_distribution

In [269]:
plt.figure(figsize = (6,4))
plt.title('Test Results Distribution')
plt.xlabel('Test Results')
ax = sns.barplot(data = test_results_distribution, x = test_results_distribution.index,y = 'Count' ,palette = 'viridis');
for bars in ax.containers:
    ax.bar_label(bars)

In [270]:
healthcare.head()

In [271]:
healthcare['Days in Hospital'] = (healthcare['Discharge Date'] - healthcare['Date of Admission']).dt.days

In [272]:
healthcare[['Days in Hospital','Medication']].groupby('Medication')[['Days in Hospital']].mean()

In [273]:
healthcare[['Days in Hospital','Medication']].groupby('Medication')[['Days in Hospital']].describe()

In [274]:
plt.figure(figsize=(10,4))
sns.boxplot(data = healthcare, x = 'Medication', y='Days in Hospital', palette = 'GnBu');

In [275]:
healthcare.head()

In [276]:
Insurance_Provider_avg_Billing_amt = round(healthcare[['Insurance Provider','Billing Amount']].groupby('Insurance Provider').mean(),2).rename(columns = {'Billing Amount':'Average Billing Amount'}).sort_values('Average Billing Amount',ascending = False)
Insurance_Provider_avg_Billing_amt

In [277]:
plt.figure(figsize=(6,4))
plt.title('Average Billing Amount by Insurance Provider')
sns.lineplot(data= Insurance_Provider_avg_Billing_amt,
             x =Insurance_Provider_avg_Billing_amt.index,
             y = 'Average Billing Amount');

In [278]:
healthcare.head()

In [279]:
seasonal_trend_2018 = healthcare[healthcare['doa_year']==2018]
seasonal_trend_2018 = seasonal_trend_2018[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})
seasonal_trend_2018

In [280]:
seasonal_trend_2019 = healthcare[healthcare['doa_year']==2019]
seasonal_trend_2019 = seasonal_trend_2019[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})
seasonal_trend_2019

In [281]:
seasonal_trend_2020 = healthcare[healthcare['doa_year']==2020]
seasonal_trend_2020 = seasonal_trend_2020[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})
seasonal_trend_2020

In [282]:
seasonal_trend_2021 = healthcare[healthcare['doa_year']==2021]
seasonal_trend_2021 = seasonal_trend_2021[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})

In [283]:
seasonal_trend_2022 = healthcare[healthcare['doa_year']==2022]
seasonal_trend_2022 = seasonal_trend_2022[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})

In [284]:
seasonal_trend_2023 = healthcare[healthcare['doa_year']==2023]
seasonal_trend_2023 = seasonal_trend_2023[['doa_month','Name']].groupby('doa_month')[['Name']].count().reset_index().rename(columns ={'Name':'Patient_count'})

In [285]:
plt.figure(figsize=(4,2))
plt.title('Seasonal Trend of Year 2018')
ax = sns.barplot(data= seasonal_trend_2018, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [286]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2019')
ax = sns.barplot(data= seasonal_trend_2019, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [287]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2020')
ax = sns.barplot(data= seasonal_trend_2020, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [288]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2021')
ax = sns.barplot(data= seasonal_trend_2021, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [289]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2022')
ax = sns.barplot(data= seasonal_trend_2022, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [290]:
plt.figure(figsize=(8,4))
plt.title('Seasonal Trend of Year 2023')
ax = sns.barplot(data= seasonal_trend_2023, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [291]:
plt.figure(figsize=(15,10))


plt.subplot(2,3,1)
plt.title('Seasonal Trend of Year 2018')
ax = sns.barplot(data= seasonal_trend_2018, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,2)
plt.title('Seasonal Trend of Year 2019')
ax = sns.barplot(data= seasonal_trend_2019, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,3)
plt.title('Seasonal Trend of Year 2020')
ax = sns.barplot(data= seasonal_trend_2020, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,4)
plt.title('Seasonal Trend of Year 2021')
ax = sns.barplot(data= seasonal_trend_2021, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,5)
plt.title('Seasonal Trend of Year 2022')
ax = sns.barplot(data= seasonal_trend_2022, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month')

plt.subplot(2,3,6)
plt.title('Seasonal Trend of Year 2023')
ax = sns.barplot(data= seasonal_trend_2023, x = 'doa_month',y = 'Patient_count',palette = 'gnuplot');
for bars in ax.containers:
    ax.bar_label(bars)
plt.ylabel('Patiet Counts')
plt.xlabel('Month');

In [292]:
healthcare.head()

In [293]:
avg_days_in_hospital = round(healthcare[['Medical Condition','Days in Hospital']].groupby('Medical Condition')[['Days in Hospital']].mean()).rename(
columns = {'Days in Hospital':'Average Count of Days'})
avg_days_in_hospital.reset_index()

In [294]:
healthcare.head()

In [295]:
 (( healthcare['Billing Amount'] - healthcare['Billing Amount'].mean() )/healthcare['Billing Amount'].std()).abs()
healthcare['zscore_Billing_Amount'] = (( healthcare['Billing Amount'] - healthcare['Billing Amount'].mean() )/healthcare['Billing Amount'].std()).abs()
healthcare.head()

In [296]:
threshold_limit = 3
healthcare[healthcare['zscore_Billing_Amount'] > threshold_limit]