In [6]:
import numpy as np
import pandas as pd

# Define your LDA class
class LDA:
    def __init__(self):
        self.mu_0 = None
        self.mu_1 = None
        self.sigma = None
        self.pi1 = None
        self.pi0 = None

    def fit(self, X_train, Y_train):
        self.mu_0 = np.mean(X_train[Y_train == 0], axis=0)
        self.mu_1 = np.mean(X_train[Y_train == 1], axis=0)
        self.sigma = np.cov(X_train.T)
        self.pi1 = np.sum(Y_train == 1) / len(Y_train)
        self.pi0 = 1 - self.pi1

    def predict(self, X):
        Y_pred = []
        Y_prob = []

        for x in X:
            ln_prob0 = np.log(self.pi0) - 0.5 * np.log( np.linalg.det(2*np.pi*self.sigma)) - 0.5 * np.dot(np.dot((x - self.mu_0).T, np.linalg.inv(self.sigma)), (x - self.mu_0))
            ln_prob1 = np.log(self.pi1) - 0.5 * np.log(np.linalg.det(2*np.pi*self.sigma)) - 0.5 * np.dot(np.dot((x - self.mu_1).T, np.linalg.inv(self.sigma)), (x - self.mu_1))

            if ln_prob1 >= ln_prob0:
                Y_pred.append(1)
                Y_prob.append(np.exp(ln_prob1))
            else:
                Y_pred.append(0)
                Y_prob.append(np.exp(ln_prob0))

        return np.array(Y_pred), np.array(Y_prob)

# Load the datasets
train_data = pd.read_csv("breast_cancer_train.csv")
valid_data = pd.read_csv("breast_cancer_valid.csv")

# Assuming the last column is the target variable
x_train = train_data.iloc[:, :-1].values
y_train = train_data.iloc[:, -1].values

x_valid = valid_data.iloc[:, :-1].values
y_valid = valid_data.iloc[:, -1].values

# Initialize and train your LDA model
lda = LDA()
lda.fit(x_train, y_train)

# Predict on the validation set
y_pred, y_prob = lda.predict(x_valid)

# Evaluate your model
from sklearn.metrics import confusion_matrix, roc_auc_score

print("Confusion Matrix:")
print(confusion_matrix(y_valid, y_pred))
print("AUC ROC:", roc_auc_score(y_valid, y_prob))


Confusion Matrix:
[[35  5]
 [ 1 73]]
AUC ROC: 0.7574324324324324


In [10]:
def calc_conf_matrix(y, y_prob, threshold=0.5):
    y_pred = [1 if prob >= threshold else 0 for prob in y_prob]
    conf_matrix = {'fp':0, 'fn':0, 'tp':0, 'tn':0}
    for i in range(len(y)):
        if y[i] == y_pred[i]:
            if y[i]:
                conf_matrix['tp'] += 1
            else:
                conf_matrix['tn'] += 1
        else:
            if y_pred[i]:
                conf_matrix['fp'] += 1
            else: 
                conf_matrix['fn'] += 1
    return conf_matrix

calc_conf_matrix(y_valid, y_pred)

{'fp': 5, 'fn': 1, 'tp': 73, 'tn': 35}