In [None]:
pwd

In [None]:
!pip3 install ax-platform 

In [None]:
!pip install -U albumentations

In [None]:
import torch
import numpy as np
import pandas as pd

from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.service.managed_loop import optimize
from ax.utils.notebook.plotting import render, init_notebook_plotting
from ax.utils.tutorials.cnn_utils import load_mnist, train, evaluate, CNN

from torchvision import models, datasets, utils#,transforms
from collections import OrderedDict

from torch import nn, optim
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image

init_notebook_plotting()

In [None]:
torch.manual_seed(2020)
dtype = torch.float32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transform

In [None]:
IMAGE_SIZE = 300
BATCH_SIZE = 8

TRAIN_DIR = "../input/lipodata2/data/train"
VALID_DIR = "../input/lipodata2/data/val"

#TRAIN_DIR = "gdrive/MyDrive/lipo/train"
#VALID_DIR = "gdrive/MyDrive/lipo/val"

#train_transforms = transforms.Compose([
#     transforms.RandomVerticalFlip(p=0.5),
#     transforms.RandomRotation(degrees=20),
#    transforms.Resize((IMAGE_SIZE,IMAGE_SIZE)),
#    transforms.ToTensor(dtype=torch.float)
#])

#valid_transforms = transforms.Compose([
#    transforms.Resize((IMAGE_SIZE,IMAGE_SIZE)),
#    transforms.ToTensor()
#])

train_transforms = A.Compose(
    [
        A.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(0, 1.0)),
        A.VerticalFlip(),
        A.HorizontalFlip(),
        A.Resize(IMAGE_SIZE, IMAGE_SIZE),
        ToTensorV2(),
        
    ]
)

valid_transforms = A.Compose(
    [
        A.Resize(height = 300, width = 300),
        ToTensorV2(),
        
    ]
)


train_dataset = datasets.ImageFolder(root=TRAIN_DIR, transform=train_transforms)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_dataset = datasets.ImageFolder(root=VALID_DIR, transform=valid_transforms)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
class LipoDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, transform=None):
        self.transform = transform
        self.root_dir = root_dir
        
        # Create a list of filepaths of images and the respective label
        self.samples = []

        for i in os.listdir(root_dir):
            if i in ["positive", 'negative']:
                folder = os.path.join(root_dir, i)
                target = folder.split("/")[-1]
                for label in os.listdir(folder):
                    filepath = os.path.join(folder, label)
                    self.samples.append((target, filepath))


    def __len__(self):
        # Get the length of the samples 
        return len(self.samples)

    def __getitem__(self, index):
        # Implement logic to get an image and its label using the received index.
        #
        # `image` should be a NumPy array with the shape [height, width, num_channels].
        # If an image contains three color channels, it should use an RGB color scheme.
        #
        # `label` should be an integer in the range [0, model.num_classes - 1] where `model.num_classes`
        # is a value set in the `search.yaml` file.
        
        # get the filepath of the image based on the index and convert it to 
        # color scale and then into a numpy array
        image = np.array(Image.open(self.samples[index][1]).convert("RGB"))

        # maps a label to an integer value
        label_to_int = {"positive": 1, "negative": 0}
        label = label_to_int[self.samples[index][0]]

        if self.transform is not None:
            transformed = self.transform(image=image)
            image = transformed["image"]
            image = image/255

        return image, label

In [None]:
import os

In [None]:
# test if the above class works and outputs the appropriate info
if __name__ == '__main__':
    dataset = LipoDataset(TRAIN_DIR)
    print(len(dataset))
    print(dataset[0][0].shape)

In [None]:
# create a dataset and tranform the images 
dataset = LipoDataset(TRAIN_DIR, transform=train_transforms)
v_dataset = LipoDataset(VALID_DIR, transform=valid_transforms)
# create a dataloader 
train_loader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True)
valid_loader = torch.utils.data.DataLoader(v_dataset, batch_size=8)

img, label = next(iter(train_loader))
img.shape

In [None]:
import matplotlib.pyplot as plt
# plot the samples
plt.figure(figsize=(20, 8)); plt.axis("off"); plt.title("Sample Training Images")
plt.imshow(np.transpose(utils.make_grid(img, padding=1),(1, 2, 0)));

In [None]:
def weights_init(m, verbose = False):
    """Initialize weights of each layer to make the results more reproducible"""
    if isinstance(m, nn.Conv2d):
        if verbose:
            print("Initializing weights of a Conv2d layer!")
        nn.init.normal_(m.weight, mean=0, std=0.1)
        if m.bias is not None:
            nn.init.zeros_(m.bias)
    elif isinstance(m, nn.BatchNorm2d): 
        if verbose:        
            print("Initializing weights of a batchnorm layer!")
        nn.init.ones_(m.weight)
        nn.init.zeros_(m.bias)
    elif isinstance(m, nn.Linear):
        if verbose:
            print("Initializing weights of a Linear layer!")
        nn.init.xavier_uniform_(m.weight)
        nn.init.zeros_(m.bias)

In [None]:
def make_model():
    vgg16 = models.vgg16_bn(pretrained=True)
    
    # Freeze training for all layers
    for param in vgg16.features.parameters():
        param.require_grad = False
        
    # Newly created modules have require_grad=True by default
    num_features = vgg16.classifier[6].in_features
    features = list(vgg16.classifier.children())[:-1] # Remove last layer
    features.extend([nn.Linear(num_features, 1)]) # Add our layer with 4 outputs
    vgg16.classifier = nn.Sequential(*features) 
    
    torch.manual_seed(2020)
    vgg16.apply(weights_init);
    return vgg16

In [None]:
def train(model, train_loader, hyperparameters, epochs=20):
    """Training wrapper for PyTorch network."""

    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=hyperparameters.get("lr", 0.001),
                           betas=(hyperparameters.get("beta1", 0.9), 0.999))
    for epoch in range(epochs):
        for X, y in train_loader:
            if device.type == 'cuda':
                X, y = X.to(device, torch.float32), y.to(device, torch.float32)
            optimizer.zero_grad()
            y_hat = model(X).flatten()
            loss = criterion(y_hat, y.type(torch.float32))
            loss.backward()
            optimizer.step()
    
    return model

In [None]:
def evaluate(model, valid_loader):
    """Validation wrapper for PyTorch network."""
    
    model.eval()
    accuracy = 0
    with torch.no_grad():  # this stops pytorch doing computational graph stuff under-the-hood and saves memory and time
        for X, y in valid_loader:
            if device.type == 'cuda':
                X, y = X.to(device, torch.float32), y.to(device, torch.float32)
            y_hat = model(X).flatten()
            y_hat_labels = torch.sigmoid(y_hat) > 0.5
            accuracy += (y_hat_labels == y).type(torch.float32).sum().item()
    accuracy /= len(valid_loader.dataset)  # avg accuracy
    print(f"Validation accuracy: {accuracy:.4f}")
    
    return accuracy 

In [None]:
def train_evaluate(parameterization):
    model = make_model()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model = train(model, train_loader, hyperparameters=parameterization, epochs=20)
    return evaluate(model, valid_loader)

# Train

In [None]:
best_parameters, values, experiment, model = optimize(
    parameters=[
        {"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True, "value_type": 'float'},
        {"name": "beta1", "type": "range", "bounds": [0.5, 0.999], "value_type": 'float'},
    ],
    evaluation_function=train_evaluate,
    objective_name='accuracy',
    total_trials = 25
)

In [None]:
best_parameters

In [None]:
means, covariances = values
means, covariances

In [None]:
best_objectives = np.array([[trial.objective_mean*100 for trial in experiment.trials.values()]])
best_objective_plot = optimization_trace_single_method(
    y=np.maximum.accumulate(best_objectives, axis=1),
    title="Model performance vs. # of iterations",
    ylabel="Classification Accuracy, %",
)
render(best_objective_plot)

In [None]:
data = experiment.fetch_data()
df = data.df
best_arm_name = df.arm_name[df['mean'] == df['mean'].max()].values[0]
best_arm = experiment.arms_by_name[best_arm_name]
best_arm

In [None]:
df

# Test

In [None]:
criterion = nn.BCEWithLogitsLoss()
betas = (best_parameters["beta1"], 0.999)
model = make_model()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
optimizer = torch.optim.Adam(model.parameters(),
                                lr = best_parameters["lr"],
                                betas = betas)
model = train(model, train_loader, hyperparameters=best_arm.parameters, epochs=25)

In [None]:
TEST_DIR = "../input/lipodata2/data/test"
test_dataset = LipoDataset(TEST_DIR, transform=valid_transforms)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [None]:
def get_test_accuracy(cnn, data_loader):
    """return accuracy on a holdout sample for a pytorch cnn model"""
    test_batch_acc = 0
    cnn.eval()
    with torch.no_grad():
        for X, y in data_loader:
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
            if device.type == 'cuda':
                X, y = X.to(device), y.to(device)
            y_hat = cnn(X).flatten()
            y_hat_labels = torch.sigmoid(y_hat) > 0.5
            test_batch_acc += (y_hat_labels == y).type(torch.float32).sum().item()
    test_accuracy = test_batch_acc / len(data_loader.dataset)
    print(f"Test accuracy is {test_accuracy*100:.2f}%.")
    return test_accuracy

get_test_accuracy(model, test_loader)

In [None]:
#Saving a checkpoint
torch.save(model.state_dict(), 'vgg16.pth')