In [None]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 

In [None]:
dataset = pd.read_csv('/kaggle/input/heart-disease-prediction-using-logistic-regression/framingham.csv')
dataset

In [None]:
dataset.dropna(inplace = True)

# The last column is the target, the rest are features

In [None]:
X = dataset.iloc[:,:-1].values
Y = dataset.iloc[:,-1].values

# Split the data to test and train

In [None]:
from sklearn.model_selection import train_test_split
X_train , X_test , Y_train , Y_test= train_test_split(X, Y, test_size = 0.2)

# Normalize the data for both test and train

In [None]:
from sklearn.preprocessing import StandardScaler, Normalizer
norm = Normalizer()
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Create a model, predict and print the score

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score

methods = [norm, sc, None]

best_model = None
best_acc = 0
for method in methods:
    if(method != None):
        x_train = method.fit_transform(X_train)
        x_test = method.transform(X_test)
        if method is norm:
            print("Using normalization:")
        else:
            print("Using standardization:")
    else:
        x_train = X_train
        x_test = X_test
        print("Not adjusting the data:")
    
    model = LogisticRegression(random_state = 42, max_iter = 10000)
    model.fit(x_train , Y_train)
    Y_pred = model.predict(x_test)

    from sklearn.metrics import confusion_matrix, accuracy_score
    from sklearn.metrics import f1_score, recall_score, precision_score

    cm = confusion_matrix(Y_pred, Y_test)
    acc = accuracy_score(Y_pred, Y_test)
    f1 = f1_score(Y_pred, Y_test)
    recall = recall_score(Y_pred, Y_test)
    precision = precision_score(Y_pred, Y_test)
    print("\tModel accuracy:", acc)
    print("\tF1 score:", f1)
    print("\tRecall:", recall)
    print("\tPrecision:", precision)
    if best_acc < acc:
        best_acc = acc
        best_model = model

In [None]:
print("Weights:")
print("\tB0 =", best_model.intercept_[0])
for i, b in enumerate(best_model.coef_[0]):
    print("\tB{} ={}".format(i+1, b))


In [None]:
def plot_confusion_matrix(cm, classes,
                        normalize=False,
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

# Confusion Matrix

In [None]:
import itertools

plot_confusion_matrix(cm, ["No risk of CHD", "Risk of CHD"])