In [1]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from core.data import PneumoniaDataModule, build_dataset_annotation
from joblib import dump
from PIL import Image
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from torch.nn import functional as F
from torchvision import models, transforms

In [2]:
# Freeze layers from the pre-trained DenseNet model
backbone = models.densenet121(pretrained=True)
backbone.cuda()

for param in backbone.parameters():
    param.requires_grad = False
backbone.eval();

In [10]:
data_dir = Path("./chest_xray").expanduser().resolve()

# Load images
dm = PneumoniaDataModule(
    data_dir,
    batch_size=64,
    num_workers=16,
)
dm.setup("fit")
dm.setup("test")

# Use feature extractor to get n by 1000 X data frame
X_train, y_train = [], []
for batch in dm.train_dataloader():
    imgs, labels = batch
    feat = backbone(imgs.cuda())
    X_train.append(feat)
    y_train.append(labels)
for batch in dm.val_dataloader():
    imgs, labels = batch
    feat = backbone(imgs.cuda())
    X_train.append(feat)
    y_train.append(labels)

X_train = torch.cat(X_train).cpu().numpy()
y_train = torch.cat(y_train).cpu().numpy()

# Do the same thing to prepare the test set
X_test, y_test = [], []
for batch in dm.test_dataloader():
    imgs, labels = batch
    feat = backbone(imgs.cuda())
    X_test.append(feat)
    y_test.append(labels)
X_test = torch.cat(X_test).cpu().numpy()
y_test = torch.cat(y_test).cpu().numpy()

In [11]:
names = [
    "Nearest Neighbors",
    "Linear SVM",
    "Gaussian Process",
    "Decision Tree",
    "Random Forest",
    "Neural Net",
    "AdaBoost",
    "Naive Bayes",
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
]

rng = np.random.RandomState(2)

X_scaler = StandardScaler().fit(X_train)
X_train = X_scaler.transform(X_train)

# iterate over classifiers
models = {}
for name, clf in zip(names, classifiers):
    clf.fit(X_train, y_train)
    models[name] = clf

# Save models
dump(models, "sklearn_models.joblib")

KeyboardInterrupt: 

In [None]:
for name, model in models.items():
    print(name)
    print(confusion_matrix(y_test, model.predict(X_test)))