In [1]:
import numpy as np
import pandas as pd
from FischersLinearDiscriminant import FischersLinearDiscriminant
import sys
sys.path.append("..")
from preprocessor import Preprocessor

In [None]:
dataset = pd.read_csv("../dataset.csv")
dataset.drop(columns = ["id"], inplace=True)

In [None]:
preprocessor = Preprocessor(dataset, "diagnosis")
preprocessor.preprocess()

In [None]:
accuracies: list[float] = []
precisions: list[tuple[float, float]] = []
recalls: list[tuple[float, float]] = []

In [None]:
for _ in range(1000):

    splits = preprocessor.get_folds(100)

    train = pd.concat(splits[:67])
    test = pd.concat(splits[67:])

    X_train = train.drop(columns=["diagnosis"]).to_numpy()
    y_train = train["diagnosis"].to_numpy()
    X_test = test.drop(columns=["diagnosis"]).to_numpy()
    y_test = test["diagnosis"].to_numpy()

    fldm1 = FischersLinearDiscriminant()
    fldm1.fit(X_train, y_train)
    tp, tn, fp, fn = fldm1.score(X_test, y_test)
    accuracies.append((tp+tn)/(tp+tn+fp+fn))
    precisions.append((tp/(tp+fp), tn/(tn+fn)))
    recalls.append((tp/(tp+fn), tn/(tn+fp)))


In [None]:
print('--------Results--------')
print(f"Accuracy mean: {np.mean(accuracies).round(4)*100}%, stdev: {np.std(accuracies).round(4)*100}%")

print("----Class 1----")
print(f"Precision mean: {np.mean([precision[0] for precision in precisions]).round(4)*100}%, stdev: {np.std([precision[0] for precision in precisions]).round(4)*100}%")
print(f"Recall mean: {np.mean([recall[0] for recall in recalls]).round(4)*100}%, stdev: {np.std([recall[0] for recall in recalls]).round(4)*100}%")

print("----Class -1----")
print(f"Precision mean: {np.mean([precision[1] for precision in precisions]).round(4)*100}%, stdev: {np.std([precision[1] for precision in precisions]).round(4)*100}%")
print(f"Recall mean: {np.mean([recall[1] for recall in recalls]).round(4)*100}%, stdev: {np.std([recall[1] for recall in recalls]).round(4)*100}%")

In [None]:
shuffled_dataset = dataset.sample(frac=1, axis=1)


In [None]:
dataset.columns

In [None]:
shuffled_dataset.columns

In [None]:
preprocessor = Preprocessor(shuffled_dataset, "diagnosis")
preprocessor.preprocess()

In [None]:
accuracies: list[float] = []
precisions: list[tuple[float, float]] = []
recalls: list[tuple[float, float]] = []

In [None]:
for _ in range(1000):

    splits = preprocessor.get_folds(100)

    train = pd.concat(splits[:67])
    test = pd.concat(splits[67:])

    X_train = train.drop(columns=["diagnosis"]).to_numpy()
    y_train = train["diagnosis"].to_numpy()
    X_test = test.drop(columns=["diagnosis"]).to_numpy()
    y_test = test["diagnosis"].to_numpy()

    fldm2 = FischersLinearDiscriminant()
    fldm2.fit(X_train, y_train)
    tp, tn, fp, fn = fldm2.score(X_test, y_test)
    accuracies.append((tp+tn)/(tp+tn+fp+fn))
    precisions.append((tp/(tp+fp), tn/(tn+fn)))
    recalls.append((tp/(tp+fn), tn/(tn+fp)))

In [None]:
print('--------Results--------')
print(f"Accuracy mean: {np.mean(accuracies).round(4)*100}%, stdev: {np.std(accuracies).round(4)*100}%")

print("----Class 1----")
print(f"Precision mean: {np.mean([precision[0] for precision in precisions]).round(4)*100}%, stdev: {np.std([precision[0] for precision in precisions]).round(4)*100}%")
print(f"Recall mean: {np.mean([recall[0] for recall in recalls]).round(4)*100}%, stdev: {np.std([recall[0] for recall in recalls]).round(4)*100}%")

print("----Class -1----")
print(f"Precision mean: {np.mean([precision[1] for precision in precisions]).round(4)*100}%, stdev: {np.std([precision[1] for precision in precisions]).round(4)*100}%")
print(f"Recall mean: {np.mean([recall[1] for recall in recalls]).round(4)*100}%, stdev: {np.std([recall[1] for recall in recalls]).round(4)*100}%")