In [1]:
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder, label_binarize
from sklearn.metrics import accuracy_score, roc_auc_score
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
n_classes = 2

In [3]:
# pneum retina breast
data = np.load('pneumoniamnist.npz')

# Load the training data
X_train = pd.read_csv('train_PneumoniaMnist224_BettiVectors_100.csv')
X_test = pd.read_csv('test_PneumoniaMnist224_BettiVectors_100.csv')

train_labels = data['train_labels']
test_labels = data['test_labels']

In [4]:
train_labels_df = pd.DataFrame(train_labels)
test_labels_df = pd.DataFrame(test_labels)

train_labels_df['combined'] = train_labels_df.astype(str).apply(''.join, axis=1)
test_labels_df['combined'] = test_labels_df.astype(str).apply(''.join, axis=1)

train_df = pd.DataFrame(train_labels_df['combined'])
test_df = pd.DataFrame(test_labels_df['combined'])

train_df = train_df.squeeze()
test_df = test_df.squeeze()

label_encoder = LabelEncoder() 

y_train = label_encoder.fit_transform(train_df)
y_test = label_encoder.fit_transform(test_df)

In [5]:
model = xgb.XGBClassifier(objective='multi:softmax', num_class = n_classes, n_estimators=100)

model.fit(X_train, y_train)

normal_prediction = model.predict(X_test)

accuracy = accuracy_score(y_test, normal_prediction)

print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 78.85%


In [6]:
probabilities = model.predict_proba(X_test)[:,1]

auc = roc_auc_score(y_test, probabilities)

print(f"AUC: {auc * 100:.2f}%")

AUC: 88.75%
