In [14]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_1samp
from scipy.stats import binom_test

# Load heart disease data
heart = pd.read_csv('heart_disease.csv')

yes_hd = heart[heart['heart_disease'] == 'presence']
no_hd = heart[heart['heart_disease'] == 'absence']

heart.head()

Unnamed: 0,age,sex,trestbps,chol,cp,exang,fbs,thalach,heart_disease
0,63,male,145,233,typical angina,0,1,150,absence
1,67,male,160,286,asymptomatic,1,0,108,presence
2,67,male,120,229,asymptomatic,1,0,129,presence
3,37,male,130,250,non-anginal pain,0,0,187,absence
4,41,female,130,204,atypical angina,0,0,172,absence


In [15]:
# Get cholesterol levels for heart disease (HD) patients
chol_hd = yes_hd.chol

# Calculate mean cholesterol level for HD patients
chol_hd_mean = np.mean(chol_hd)

print(chol_hd_mean)

251.4748201438849


In [16]:
# Compare this average to the cut-off for high cholesterol
tstat, pval = ttest_1samp(a = chol_hd, popmean = 240, alternative = 'greater')

print(pval)

0.0035411033905155703


It would appear that HD patients have an average cholesterol level that that is significantly greater than 240 mg/dl. (Used a significance threshold of 0.05)

In [17]:
# Get cholesterol levels for patients without HD
chol_nohd = no_hd.chol

# Calculate mean cholesterol level for non-HD patients
chol_nohd_mean = np.mean(chol_nohd)

print(chol_nohd_mean)

242.640243902439


In [18]:
# Compare this average to the cut-off for high cholesterol
tstat, pval = ttest_1samp(a = chol_nohd, popmean = 240, alternative = 'greater')

print(pval)

0.26397120232220506


It would appear that non-HD patients have an average cholesterol level that is NOT significantly greater than 240 mg/ld.

In [19]:
# Calculate total number of patients
num_patients = len(heart)

print(num_patients)

303


In [20]:
# Calculate number of patients with FBS > 120
num_highfbs_patients = np.sum(heart.fbs)

print(num_highfbs_patients)

45


In [21]:
# Calculate 8% of of sample size

print(0.08 * num_patients)

24.240000000000002


In [22]:
# Run binomial test
pval = binom_test(x = num_highfbs_patients, n = num_patients, p = 0.08, alternative = 'greater')

print(pval)

4.6894719514488777e-05


It would appear that this sample likely comes from a population where more than 8% of people have FBS > 120 mg/dl.