In [3]:
import torch
import torchvision
import numpy as np
import pandas as pd
import argparse
import os
import copy
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms
from PIL import Image
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score

LABELS_Severity = {35: 0, 43: 0, 47: 1, 53: 1, 61: 2, 65: 2, 71: 2, 85: 2}

mean = (.1706)
std = (.2112)

normalize = transforms.Normalize(mean=mean, std=std)

transform = transforms.Compose([
    transforms.Resize(size=(224,224)),
    transforms.ToTensor(),
    normalize,
])

#Check if GPU is being used
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using {device} device')

#Define the dataloader class
class OCTDataset(Dataset):
    def __init__(self, subset='train', transform=None):
        if subset == 'train':
            self.annot = pd.read_csv('df_prime_train.csv')
        elif subset == 'test':
            self.annot = pd.read_csv('df_prime_test.csv')
        
        self.annot = self.annot.sample(frac=0.5, random_state=42)
        self.annot['Severity_Label'] = [LABELS_Severity[drss] for drss in copy.deepcopy(self.annot['DRSS'].values)]
        self.root = os.path.expanduser('/storage/home/hpaceice1/shared-classes/materials/ece8803fml/')
        self.transform = transform
        self.nb_classes=len(np.unique(list(LABELS_Severity.values())))
        self.path_list = self.annot['File_Path'].values
        self._labels = self.annot['Severity_Label'].values
        assert len(self.path_list) == len(self._labels)

    def __getitem__(self, index):
        img, target = Image.open(self.root+self.path_list[index]).convert("L"), self._labels[index]
        if self.transform is not None:
            img = self.transform(img)
        return img, target

    def __len__(self):
        return len(self._labels)  

Using cuda device


In [4]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, precision_score, confusion_matrix
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

# Define the dataset
dataset = OCTDataset(subset='train', transform=transform)

# Define the dataloader
batch_size = 64
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

# Prepare the data for training
X, y = [], []
for batch_idx, (data, target) in enumerate(dataloader):
    X_batch = data.detach().cpu().numpy()
    y_batch = target.detach().cpu().numpy()
    X.append(X_batch.reshape(X_batch.shape[0], -1))
    y.append(y_batch)
X = np.concatenate(X, axis=0)
y = np.concatenate(y, axis=0)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the classifier and fit to the training data
clf = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

In [5]:
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score

# y_test and y_pred are defined as in the previous example

# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Compute accuracy and balanced accuracy scores
acc = accuracy_score(y_test, y_pred)
bacc = balanced_accuracy_score(y_test, y_pred)


# Compute precision, recall, and f1-score
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Compute true positive rate (recall) and false positive rate (1 - specificity)
if cm.shape == (2, 2):  # binary classification
    tn, fp, fn, tp = cm.ravel()
else:  # multiclass classification
    tn, fp, fn, tp = cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1]
tpr = tp / (tp + fn)
fpr = fp / (fp + tn)


print("Confusion matrix:")
print(cm)
print("Accuracy: {:.4f}".format(acc))
print("Balanced accuracy: {:.4f}".format(bacc))
print("Precision: {:.4f}".format(precision))
print("Recall: {:.4f}".format(recall))
print("F1-score: {:.4f}".format(f1))
print("True positive rate (recall): {:.4f}".format(tpr))
print("False positive rate (1 - specificity): {:.4f}".format(fpr))

Confusion matrix:
[[ 576  199    1]
 [ 147 1010   34]
 [  25  112  322]]
Accuracy: 0.7865
Balanced accuracy: 0.7639
Precision: 0.8122
Recall: 0.7639
F1-score: 0.7831
True positive rate (recall): 0.8729
False positive rate (1 - specificity): 0.2568


In [None]:
# Define the dataset
dataset = OCTDataset(subset='train', transform=transform)

#Define performance metrics as lists
precision = []
recall = []
f1 = []
acc = []
bacc = []
tpr = []
fpr = []

# Define batch size and dataloader
batch_size = 64
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

# Prepare the data for training
X, y = [], []
for batch_idx, (data, target) in enumerate(dataloader):
    X_batch = data.detach().cpu().numpy()
    y_batch = target.detach().cpu().numpy()
    X.append(X_batch.reshape(X_batch.shape[0], -1))
    y.append(y_batch)
X = np.concatenate(X, axis=0)
y = np.concatenate(y, axis=0)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Define 
n_estimators = list(range(10,205,25))

for i in n_estimators:
    # Define the classifier and fit to the training data
    clf = RandomForestClassifier(n_estimators=i, max_depth=None, random_state=42)
    clf.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = clf.predict(X_test)
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Compute accuracy and balanced accuracy scores
    acc.append(accuracy_score(y_test, y_pred))
    bacc.append(balanced_accuracy_score(y_test, y_pred))
    
    # Compute precision, recall, and f1-score
    precision.append(precision_score(y_test, y_pred, average='macro'))
    recall.append(recall_score(y_test, y_pred, average='macro'))
    f1.append(f1_score(y_test, y_pred, average='macro'))
    
    # Compute true positive rate (recall) and false positive rate (1 - specificity)
    if cm.shape == (2, 2):  # binary classification
        tn, fp, fn, tp = cm.ravel()
    else:  # multiclass classification
        tn, fp, fn, tp = cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1]
    tpr.append(tp / (tp + fn))
    fpr.append(fp / (fp + tn))

In [None]:
import matplotlib.pyplot as plt

plt.grid()
plt.plot(n_estimators, acc, linestyle = '-', color='blue', label='Accuracy')
plt.plot(n_estimators, bacc, linestyle = '-', color='red', label='Balanced Accuracy')
plt.xlabel('Number of Estimators')
plt.ylabel('Score')
plt.legend()