In [6]:
import numpy as np
import pandas as pd
import matplotlib as plt
from model import LogRegCCD
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, recall_score, f1_score, roc_auc_score, balanced_accuracy_score,precision_score

In [4]:
arcene_train = pd.read_csv('ARCENE/arcene_train.data', header=None, sep='\s+')
arcene_train_y = pd.read_csv('ARCENE/arcene_train.labels', header=None)
arcene_train_y = arcene_train_y.squeeze()
arcene_valid = pd.read_csv('ARCENE/arcene_valid.data', header=None, sep='\s+')
arcene_valid_y = pd.read_csv('ARCENE/arcene_valid.labels', header=None)
arcene_valid_y = arcene_valid_y.squeeze()
mapping = {1: 0, -1: 1}
arcene_train_y = arcene_train_y.map(mapping).to_numpy()
arcene_valid_y = arcene_valid_y.map(mapping).to_numpy()


In [None]:
arcene_train = arcene_train.sample(n=450, axis=1, random_state=42)
arcene_valid = arcene_valid.sample(n=450, axis=1, random_state=42)
#reducing number of features 

In [None]:
lambdas = np.linspace(10, 0.01, 10) # 10 lambdas from 10 to 0.01
model = LogRegCCD(lambdas)
model.fit(arcene_train, arcene_train_y)
model.validate(arcene_valid, arcene_valid_y)

In [None]:
print(model.best_lambda_)
model.plot(arcene_valid, arcene_valid_y, measure="precision")
model.plot_coefficients()

In [None]:
model.plot(arcene_valid, arcene_valid_y, measure="roc_auc")
model.plot_coefficients()

In [None]:
model.validate(arcene_valid, arcene_valid_y, measure="balanced_accuracy")
model.plot(arcene_valid, arcene_valid_y, measure="balanced_accuracy")
model.plot_coefficients()

In [None]:
model.validate(arcene_valid, arcene_valid_y, measure="recall")
model.plot(arcene_valid, arcene_valid_y, measure="recall")
model.plot_coefficients()

In [None]:
model.validate(arcene_valid, arcene_valid_y, measure="f_measure")
model.plot(arcene_valid, arcene_valid_y, measure="f_measure")
model.plot_coefficients()

In [None]:
modelLR = LogisticRegression(penalty=None)
modelLR.fit(arcene_train, arcene_train_y)


In [None]:
print(modelLR.coef_)
print(model.best_coef_)

In [None]:
y_pred = modelLR.predict(arcene_valid)
y_prob = modelLR.predict_proba(arcene_valid)[:, 1] 

balanced_acc = balanced_accuracy_score(arcene_valid_y, y_pred)
recall = recall_score(arcene_valid_y, y_pred, average='binary')
f1 = f1_score(arcene_valid_y, y_pred, average='binary')
roc_auc = roc_auc_score(arcene_valid_y, y_prob)
precision = precision_score(arcene_valid_y, y_pred, average='binary')

metrics = ['Balanced Accuracy', 'Recall', 'F1-Score', 'Roc-Auc', 'Precision']
values = [balanced_acc, recall, f1, roc_auc, precision]

plt.figure(figsize=(8, 6))
plt.bar(metrics, values, color=['skyblue', 'lightgreen', 'salmon', 'pink', 'purple'])
plt.xlabel('Metrics')
plt.ylabel('Score')
plt.title('Model Performance Metrics')
plt.ylim([0, 1])  
plt.yticks(np.linspace(0, 1, 22))  

plt.show()

