# Data Interpretation

# 1. Patient Demographics

In [16]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import chi2_contingency
from scipy.stats import ttest_ind

In [2]:
# load data set
df = pd.read_excel("Hospital Readmission Data.xlsx")
df.head()

Unnamed: 0,PatientID,Age (in years),Gender,AdmissionType,Diagnosis,LengthOfStay (in days),TreatmentReceived,FollowUpSchedule,Readmitted
0,1,65,Male,Emergency,Heart Failure,7,Medication A,Yes,Yes
1,2,50,Female,Elective,Hip Replacement,5,Surgery,Yes,No
2,3,72,Male,Emergency,Pneumonia,10,Antibiotics,No,Yes
3,4,45,Female,Urgent,Appendicitis,3,Surgery,Yes,No
4,5,80,Female,Emergency,Stroke,12,Rehabilitation,Yes,Yes


In [3]:
# Apply Label hot encoding on a data set
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
categorical_columns = ['Gender', 'AdmissionType', 'Diagnosis', 'TreatmentReceived', 'FollowUpSchedule', 'Readmitted' ]
# Apply Label encoding to each categorical column
for columns in categorical_columns:
    df[columns] = le.fit_transform(df[columns])

In [4]:
df.head(1)

Unnamed: 0,PatientID,Age (in years),Gender,AdmissionType,Diagnosis,LengthOfStay (in days),TreatmentReceived,FollowUpSchedule,Readmitted
0,1,65,1,1,4,7,3,1,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype
---  ------                  --------------  -----
 0   PatientID               10 non-null     int64
 1   Age (in years)          10 non-null     int64
 2   Gender                  10 non-null     int32
 3   AdmissionType           10 non-null     int32
 4   Diagnosis               10 non-null     int32
 5   LengthOfStay (in days)  10 non-null     int64
 6   TreatmentReceived       10 non-null     int32
 7   FollowUpSchedule        10 non-null     int32
 8   Readmitted              10 non-null     int32
dtypes: int32(6), int64(3)
memory usage: 612.0 bytes


In [6]:
readmission_rates = df.groupby('Age (in years)')['Readmitted'].mean().reset_index()
readmission_rates

Unnamed: 0,Age (in years),Readmitted
0,30,0.0
1,45,0.0
2,50,0.0
3,55,0.0
4,60,0.0
5,65,1.0
6,67,1.0
7,72,1.0
8,77,1.0
9,80,1.0


In [7]:
# Create a contingency table
contingency_table = pd.crosstab(df['Gender'], df['Readmitted'])
# Perform the chi-square test
chi2, p, dof, expected = chi2_contingency(contingency_table)
# Display the results
chi2, p

(1.2, 0.5488116360940265)

# 2). Medical History & Treatment

In [8]:
readmission_rates_by_diagnosis = df.groupby('Diagnosis')['Readmitted'].mean().reset_index()
# Sort the results by readmission rate in descending order
readmission_rates_by_diagnosis = readmission_rates_by_diagnosis.sort_values(by='Readmitted', ascending=False)
print(readmission_rates_by_diagnosis)

   Diagnosis  Readmitted
1          1         1.0
3          3         1.0
4          4         1.0
7          7         1.0
8          8         1.0
0          0         0.0
2          2         0.0
5          5         0.0
6          6         0.0
9          9         0.0


In [10]:
# Calculate readmission rates for each treatment
readmission_rates_by_treatment = df.groupby('TreatmentReceived')['Readmitted'].mean().reset_index()
# Display the readmission rates
print("Readmission rates by treatment:") 
print(readmission_rates_by_treatment)
# Step 2: Create a contingency table for the chi-square test
contingency_table = pd.crosstab(df['TreatmentReceived'], df['Readmitted'])

# Perform the chi-square test
chi2, p, dof, expected = chi2_contingency(contingency_table)

# Display the chi-square test results
print("\nChi-square test results:")
print(f"Chi-square statistic: {chi2}")
print(f"P-value: {p}")
print(f"Degrees of freedom: {dof}")
print("Expected frequencies:")
print(expected)

# Interpretation
if p < 0.05:
    print("\nThere is a statistically significant difference in readmission rates between treatments.")
else:
    print("\nThere is no statistically significant difference in readmission rates between treatments.")

Readmission rates by treatment:
   TreatmentReceived  Readmitted
0                  0         1.0
1                  1         1.0
2                  2         0.0
3                  3         1.0
4                  4         1.0
5                  5         1.0
6                  6         0.0

Chi-square test results:
Chi-square statistic: 10.0
P-value: 0.12465201948308108
Degrees of freedom: 6
Expected frequencies:
[[0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [2.  2. ]]

There is no statistically significant difference in readmission rates between treatments.


# 3. Admission Details

In [11]:
# Step 1: Calculate readmission rates for each type of admission
readmission_rates_by_admission_type = df.groupby('AdmissionType')['Readmitted'].mean().reset_index()

# Display the readmission rates
print("Readmission rates by admission type:")
print(readmission_rates_by_admission_type)

# Step 2: Create a contingency table for chi-square test
contingency_table = pd.crosstab(df['AdmissionType'], df['Readmitted'])

# Perform the chi-square test
chi2, p, dof, expected = chi2_contingency(contingency_table)

# Display the chi-square test results
print("\nChi-square test results:")
print(f"Chi-square statistic: {chi2}")
print(f"P-value: {p}")
print(f"Degrees of freedom: {dof}")
print("Expected frequencies:")
print(expected)

# Interpretation
if p < 0.05:
    print("\nThere is a statistically significant difference in readmission rates between admission types.")
else:
    print("\nThere is no statistically significant difference in readmission rates between admission types.")

Readmission rates by admission type:
   AdmissionType  Readmitted
0              0         0.0
1              1         1.0
2              2         0.0

Chi-square test results:
Chi-square statistic: 10.0
P-value: 0.006737946999085468
Degrees of freedom: 2
Expected frequencies:
[[1.5 1.5]
 [2.5 2.5]
 [1.  1. ]]

There is a statistically significant difference in readmission rates between admission types.


In [17]:
# Step 1: Calculate descriptive statistics
readmitted = df[df['Readmitted'] == 1]
not_readmitted = df[df['Readmitted'] == 0]

print("Descriptive statistics for Length of Stay:")
print(f"Mean length of stay for readmitted patients: {readmitted['LengthOfStay (in days)'].mean():.2f}")
print(f"Mean length of stay for non-readmitted patients: {not_readmitted['LengthOfStay (in days)'].mean():.2f}")
print(f"Standard deviation of length of stay for readmitted patients: {readmitted['LengthOfStay (in days)'].std():.2f}")
print(f"Standard deviation of length of stay for non-readmitted patients: {not_readmitted['LengthOfStay (in days)'].std():.2f}")

# Step 2: Perform t-test
t_stat, p_val = ttest_ind(readmitted['LengthOfStay (in days)'], not_readmitted['LengthOfStay (in days)'])

print("\nT-test results:")
print(f"T-statistic: {t_stat:.2f}")
print(f"P-value: {p_val:.2f}")

# Interpretation
if p_val < 0.05:
    print("\nThere is a statistically significant difference in length of stay between readmitted and non-readmitted patients.")
else:
    print("\nThere is no statistically significant difference in length of stay between readmitted and non-readmitted patients.")

Descriptive statistics for Length of Stay:
Mean length of stay for readmitted patients: 9.20
Mean length of stay for non-readmitted patients: 4.00
Standard deviation of length of stay for readmitted patients: 1.92
Standard deviation of length of stay for non-readmitted patients: 1.58

T-test results:
T-statistic: 4.67
P-value: 0.00

There is a statistically significant difference in length of stay between readmitted and non-readmitted patients.
