In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os

print(torch.cuda.is_available())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# works best with len(samples) = 2, 4, 6
def show_samples(samples):
    y = int(len(samples) / 2)
    for i in range(len(samples)):
        plt.subplot(2, y, i + 1)
        plt.subplots_adjust(hspace=1, wspace=1)
        plt.imshow(samples[i][0])
        plt.title("Uninfected" if samples[i][1] else "Infected", loc='center')
        plt.ylabel(f"Height ({samples[i][0].shape[0]})")
        plt.xlabel(f"Width ({samples[i][0].shape[1]})")
    plt.show()

class MalariaDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        image, label = self.dataframe[index]
        if self.transform: 
            image = self.transform(image)
        return image, label

def get_loader(df, batch_size=32, transform=None): 
    dataset = MalariaDataset(df, transform=transform)
    return DataLoader(dataset, batch_size=batch_size, shuffle=False)

def evaluate(model, loader): 
    total_correct = 0
    total_samples = 0
    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images) 
        total_correct += int(((outputs > 0.0).squeeze().long() == labels).sum())
        total_samples += len(labels)
    return float(total_correct) / total_samples

  from .autonotebook import tqdm as notebook_tqdm


True


In [2]:
input_size = 224
data_dir = f"../data/corrected_{input_size}.pickle"
df = pd.read_pickle(data_dir)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
df["image"] = df["image"].apply(lambda x: transform(x))

np_df = df.to_numpy()
np.random.seed(1000)
np.random.shuffle(np_df)

num_samples = len(np_df)
a = int(num_samples * 0.8)
b = int(num_samples * 0.9)
df_trn = np_df[:a]
df_val = np_df[a:b]
df_tst = np_df[b:]

In [13]:
batch_size = 32

trn_transform = transforms.Compose([
    transforms.RandomPerspective(distortion_scale=0.1, p=0.25),
    transforms.RandomAffine(degrees=5, translate=(0.05, 0.05)),
    transforms.RandomRotation(180), 
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
])

trn_loader = get_loader(df_trn, batch_size=batch_size, transform=trn_transform)
val_loader = get_loader(df_val, batch_size=batch_size)
tst_loader = get_loader(df_tst, batch_size=batch_size)

In [14]:
import torchvision.models
alexnet = torchvision.models.alexnet(pretrained=True).to(device)

In [7]:
try: os.mkdir("./alexnet_features")
except: assert os.path.isdir("./alexnet_features")

def compute_features(name, loader): 
    features = []
    for images, _ in loader: 
        images = images.to(device)
        fts = alexnet.features(images)
        features.append(fts.cpu().detach().numpy())
    with open(f"./alexnet_features/{name}.pkl", 'wb') as f:
        pickle.dump(features, f)

In [8]:
compute_features("trn", trn_loader)
compute_features("val", val_loader)
compute_features("tst", tst_loader)

In [9]:
class Classifier(nn.Module): 
    def __init__(self): 
        super().__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(256, 128, 1), 
            nn.ReLU(), 
            nn.Conv2d(128, 64, 1), 
            nn.ReLU(), 
            nn.Conv2d(64, 16, 1),
            nn.ReLU(),
            nn.Flatten(), 
            nn.Linear(16 * 6 * 6, 1)
        )
    
    def forward(self, x):
        return self.layers(x)

In [10]:
num_epochs = 300 
learning_rate = 0.001

model = Classifier().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_function = nn.BCEWithLogitsLoss()
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)

try: os.mkdir("./alexnet_models")
except: assert os.path.isdir("./alexnet_models")

## Training

In [None]:
alexnet.eval()

In [None]:
# trn_features = pickle.load(open(f"./alexnet_features/trn.pkl", 'rb'))
# val_features = pickle.load(open(f"./alexnet_features/val.pkl", 'rb'))

best = {
    "epoch": 0, 
    "accuracy": 0
}

for epoch in range(num_epochs):
    total_correct = 0
    total_samples = 0
    for i, (images, labels) in enumerate(trn_loader):
        images = images.to(device)
        labels = labels.unsqueeze(1).float()
        labels = labels.to(device)
        # forward pass
        features = alexnet.features(images)
        outputs = model(features)
        # compute loss
        loss = loss_function(outputs, labels)
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # training accuracy 
        total_correct += int(((outputs > 0.0) == labels).sum())
        total_samples += len(labels)
    training_accuracy = float(total_correct) / total_samples

    total_correct = 0
    total_samples = 0
    for i, (images, labels) in enumerate(val_loader):
        images = images.to(device)
        labels = labels.unsqueeze(1).float()
        labels = labels.to(device)
        features = alexnet.features(images)
        outputs = model(features)
        total_correct += int(((outputs > 0.0) == labels).sum())
        total_samples += len(labels)
    validation_accuracy = float(total_correct) / total_samples
    if validation_accuracy > best["accuracy"]:
        best["epoch"] = epoch + 1
        best["accuracy"] = validation_accuracy

    print(f"Epoch {epoch + 1}/{num_epochs} Loss: {loss.item():.6f} Training Accuracy: {training_accuracy:.6f} Validation Accuracy: {validation_accuracy:.6f}")
    torch.save(model.state_dict(), f"alexnet_models/epoch_{epoch + 1}.pt")

print(f"Best Validation Accuracy : {best['accuracy']} at Epoch {best['epoch']}")