In [1]:
import pandas as pd
from io import StringIO

data = """
Age	Sex	ChestPainType	RestingBP	Cholesterol	FastingBS	RestingECG	MaxHR	ExerciseAngina	Oldpeak	ST_Slope	HeartDisease
0	52	M	ASY	145	250	0	Normal	160	Y	1.2	Flat	1
1	43	F	NAP	130	220	0	ST	140	N	0.0	Up	0
2	57	M	ATA	138	300	1	Normal	150	N	0.8	Flat	1
3	60	F	ASY	150	270	0	Normal	132	Y	2.3	Down	1
4	41	M	NAP	125	210	0	Normal	168	N	0.0	Up	0
5	46	F	ATA	135	240	0	ST	155	N	0.4	Up	0
6	55	M	ASY	160	290	1	Normal	120	Y	1.6	Flat	1
7	38	F	NAP	118	200	0	Normal	172	N	0.0	Up	0
8	49	M	ATA	132	260	0	ST	145	N	0.0	Up	0
9	63	F	ASY	170	310	1	Normal	110	Y	2.0	Down	1
"""

df = pd.read_csv(StringIO(data), sep="\t")
print(df)
print(df.info())

   Age Sex ChestPainType  RestingBP  Cholesterol  FastingBS RestingECG  MaxHR  \
0   52   M           ASY        145          250          0     Normal    160   
1   43   F           NAP        130          220          0         ST    140   
2   57   M           ATA        138          300          1     Normal    150   
3   60   F           ASY        150          270          0     Normal    132   
4   41   M           NAP        125          210          0     Normal    168   
5   46   F           ATA        135          240          0         ST    155   
6   55   M           ASY        160          290          1     Normal    120   
7   38   F           NAP        118          200          0     Normal    172   
8   49   M           ATA        132          260          0         ST    145   
9   63   F           ASY        170          310          1     Normal    110   

  ExerciseAngina  Oldpeak ST_Slope  HeartDisease  
0              Y      1.2     Flat             1  
1     

In [2]:
df["Oldpeak"].describe()

Unnamed: 0,Oldpeak
count,10.0
mean,0.83
std,0.894489
min,0.0
25%,0.0
50%,0.6
75%,1.5
max,2.3


In [3]:
df.columns

Index(['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope',
       'HeartDisease'],
      dtype='object')

In [4]:
def bp_risk(bp):
    if bp< 120:
        return 'Normal'
    elif 120 <= bp < 140:
        return 'Elevated'
    else:
        return 'High'

def old_peak(op):
    if op == 0:
        return 'No Stress'
    elif op <= 2:
        return 'Moderate Stress'
    else:
        return 'High Stress'

df['BPRisk'] = df['RestingBP'].apply(bp_risk)
df['OldpeakRisk'] = df['Oldpeak'].apply(old_peak)

df[['RestingBP', 'BPRisk', 'Oldpeak', 'OldpeakRisk']]

Unnamed: 0,RestingBP,BPRisk,Oldpeak,OldpeakRisk
0,145,High,1.2,Moderate Stress
1,130,Elevated,0.0,No Stress
2,138,Elevated,0.8,Moderate Stress
3,150,High,2.3,High Stress
4,125,Elevated,0.0,No Stress
5,135,Elevated,0.4,Moderate Stress
6,160,High,1.6,Moderate Stress
7,118,Normal,0.0,No Stress
8,132,Elevated,0.0,No Stress
9,170,High,2.0,Moderate Stress


In [5]:
from  sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.model_selection import train_test_split

num_features = ['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']
cat_features = ['Sex', 'ChestPainType', 'FastingBS', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

In [6]:
num_pipeline = Pipeline([
    ("scaler", StandardScaler())
])

cat_pipeline = Pipeline([
    ("ohe", OneHotEncoder(drop="first"))
])

preprocess = ColumnTransformer([
    ("num",num_pipeline, num_features),
    ("cat",cat_pipeline, cat_features)
])

clf = Pipeline([
    ("prep",preprocess),
    ("model", LogisticRegression(max_iter=1000))
])



In [7]:
target_col = "HeartDisease"
x = df.drop(columns = [target_col])
y = df[target_col]

x_train_pipe, x_test_pipe, y_train_pipe, y_test_pipe = train_test_split(x, y, test_size=0.2, random_state=42)

clf.fit(x_train_pipe, y_train_pipe)

from sklearn.metrics import accuracy_score

y_pred_pipe = clf.predict(x_test_pipe)
acc = accuracy_score(y_test_pipe, y_pred_pipe)
print(f"Accuracy: {acc*100:.2f}%")

Accuracy: 100.00%
