In [2]:
pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.


In [3]:
#To install packages needed to run this code, use: pip install -r requirements.txt
from ucimlrepo import fetch_ucirepo 
import sklearn.model_selection
from sklearn.preprocessing import OneHotEncoder
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import numpy as np

# fetch dataset 
data = fetch_ucirepo(id=45)

#Convert to binary classification (no heart disease vs heart disease)
y = data.data.targets.to_numpy().reshape(-1) 
y = np.clip(y, 0, 1)  

#Impute missing values with mode
df = data.data.features
df = df.fillna(df.mode().iloc[0])

#Get feature values  
X = df.to_numpy() 

#Reencode categorical variables
categorical_columns       = ['cp','restecg']
categorical_indices       = [i for i,c in enumerate(df.columns) if c in categorical_columns]
non_categorical_indices   = [i for i,c in enumerate(df.columns) if c not in categorical_columns]
encoder                   = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
Xonehot                   = encoder.fit_transform(X[:,categorical_indices])
Xnew                      = np.hstack([X[:,non_categorical_indices],Xonehot])

#Split into train and test sets
Xtrain, Xtest, ytrain, ytest = sklearn.model_selection.train_test_split(Xnew, y, test_size=0.20, random_state=589,stratify=y)

#Standardize features
scaler = StandardScaler()
Xtrain= scaler.fit_transform(Xtrain)
Xtest = scaler.fit_transform(Xtest)

In [4]:
print(f"Number of training cases: {Xtrain.shape[0]}")
print(f"Number of test cases: {Xtest.shape[0]}")
print(f"Number of feature dimensions: {Xtrain.shape[1]}")

Number of training cases: 242
Number of test cases: 61
Number of feature dimensions: 18


In [10]:
lr = LogisticRegression()
lr.fit(Xtrain, ytrain)

ytrain_pred = lr.predict(Xtrain)
ytest_pred  = lr.predict(Xtest)

train_error = 1- np.mean(ytrain_pred == ytrain)
test_error  = 1- np.mean(ytest_pred == ytest)

print(f"Training error rate: {train_error:}")
print(f"Test error rate: {test_error:}")

Training error rate: 0.12396694214876036
Test error rate: 0.16393442622950816
