# Summary

### This project extends a previous project called CNN_Assignment. That project invovled building and training Convolutional Neural Networks with fully connnected layers to classify images of brains as 'Nondemented', 'Mild[ly]Demented', 'VeryMild[ly]Demented', and 'Demented.'

#Much of the code is provided by the tuning with Ray Tune Tutorial here: https://pytorch.org/tutorials/beginner/hyperparameter_tuning_tutorial.html, but I have adapted it and heavily commented it to show what is happening.

# imports

In [43]:
from functools import partial
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import ImageFolder
from torchvision import transforms, utils
import matplotlib.pyplot as plt
import os

import zipfile
from zipfile import ZipFile

from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

Steps of converting a torch training workflow into a tune workflow:

* 1) wrap data loading and training in functions
* 2) make some artificial neural network parameters configurable
* 3) add checkpointing
* 4) define the search space for model tuning


In [None]:
# Define 

# Load Data Function

In [26]:
def load_data(data_dir = "./data/Alzheimer_s Dataset"):

    cwd = os.getcwd()

#     with ZipFile("alzheimer's archive.zip", "r") as zip_ref:
#         zip_ref.extractall()
    
    train_data = ImageFolder(os.path.join(data_dir, '/train'),
                            transform = transforms.Compose([
                                transforms.Resize(140),
                                transforms.CenterCrop(128),
                                transforms.Grayscale(),
                                transforms.ToTensor()

                            ]))
    
    test_data = ImageFolder(os.path.join(data_dir, '/test'),
                        transform = transforms.Compose([
                            transforms.Resize(140),
                            transforms.CenterCrop(128),
                            transforms.Grayscale(),
                            transforms.ToTensor()
                            
                        ]))
    
    return train_data, test_data

# Define Model

#### In our definition of the model, we include two hyperparameters lin_1, corresponding to the number of nodes in the first hidden linear layer, and lin_2 corresponding t the number of nodes in the second hidden linear layer. The last layer remains 4 because that's the number of classification categories for our alzheimer's image data.

#### I create a general ConvNet class and then subclass it to a ConvNetwithDropout class.

In [35]:
class ConvNet(nn.Module):
    
    def __init__(self, lin_1 = 100, lin_2 = 200):
        super(ConvNet, self).__init__()
        #initialize the convolutional layers
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 8, kernel_size = 3, stride = 1, padding = 1)
        self.pool = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.conv2 = nn.Conv2d(in_channels = 8, out_channels = 8, kernel_size = 3, stride = 1, padding = 1)

        
        ##initialize the fully connected layers
        self.fc1 = nn.Linear(in_features = 8*16*16, out_features = lin_1) #NOTE: we are using maxpool (with a kernel size of 2) thrice, so our 128 x 128 image becomes a 16x16 image.
        self.fc2 = nn.Linear(in_features = lin_1, out_features = lin_2)
        self.fc3 = nn.Linear(in_features = lin_2, out_features = 4)
        
    def forward(self, _x):
        #Print statements below are useful for debugging shape mismatches.
        #print("input shape is: {}".format(_x.shape)) 
        _x = self.pool(F.relu(self.conv1(_x)))
        #print("shape after one convolution is: {}".format(_x.shape))
        
        _x = self.pool(F.relu(self.conv2(_x)))
        #print("shape after two convolutions is: {}".format(_x.shape))
        
        #adding more layers for assignment:
        _x = self.pool(F.relu(self.conv2(_x)))
        #print("shape after three convolutions is: {}".format(_x.shape))
        
        _x = _x.view(-1, 8*16*16)
        #print(_x.shape)
        _x = F.relu(self.fc1(_x))
        _x = F.relu(self.fc2(_x))
        _x = self.fc3(_x)
        
        return _x

In [36]:
class ConvNetwithDropout(ConvNet):
    def __init__(self, lin_1 = 100, lin_2 = 200):
        super(ConvNetwithDropout, self).__init__()
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, _x):
        _x = self.pool(F.relu(self.conv1(_x)))        
        _x = self.pool(F.relu(self.conv2(_x)))
        _x = self.pool(F.relu(self.conv2(_x)))
        _x = _x.view(-1, 8*16*16)
        #print(_x.shape)
        _x = F.relu(self.fc1(_x))
        #DROPOUT ADDED HERE
        _x = self.dropout(_x)
        _x = F.relu(self.fc2(_x))
        _x = self.fc3(_x)
        
        return _x

# Config

### number of nodes in hidden layers range between 32 and 32768. These numbers are arbitrarily chosen. Learning rate is chosen from a reciprocal continuous random variable. Batch size is chosen from batches of size  10, 20, 40 or 80.

In [37]:
config = {
    "lin_1": tune.sample_from(lambda _: 2**np.random.randint(5, 15)),
    "lin_2": tune.sample_from(lambda _: 2**np.random.randint(5, 15)),
    "lr": tune.loguniform(1e-4, 1e-1),
    "batch_size": tune.choice([10, 20, 40, 80])
}

# Make Model

In [38]:
net = ConvNetwithDropout(config['lin_1'], config['lin_2'])

# Define Train Method

#### The train_cnn method takes as arguments a config dictionary and two directories: checkpoint, and data. These directories will store the checkpoints and data for reference in code that prints out the training data.

### 

In [40]:
def train_cnn(config, checkpoint_dir=None, data_dir=None):

    #Send processes to gpu if it is available. Parallelize if there are multiple gpus.
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)
    
    #Define loss function/criterion and optimizer 
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
    
    #If there is a checkpoint directory passed to the train_cnn function,
    #use it to load the net and optimizer with the states preserved in the checkpoint directory.
    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)
        
    #load the trainset and testset from the data_directory passed to the train_cnn function
    trainset, testset = load_data(data_dir)
    
    #make train, test, and validation splits
    test_abs = int(len(trainset) * 0.8)
    train_subset, val_subset = random_split(
        trainset, [test_abs, len(trainset) - test_abs])
    
    
    #make train and validation loaders
    trainloader = torch.utils.data.DataLoader(
        train_subset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        num_workers=8)
        
    valloader = torch.utils.data.DataLoader(
        val_subset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        num_workers=8)
    
    
    #Begin Training loop!
    for epoch in range(10):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]; send both inputs and labels to device
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            ## print statistics
            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                                running_loss / epoch_steps))
                running_loss = 0.0
                
        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1
            
            with tune.checkpoint_dir(epoch) as checkpoint_dir:
                path = os.path.join(checkpoint_dir, "checkpoint")
                torch.save((net.state_dict(), optimizer.state_dict()), path)
                
            tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
        print("Finished Training")


In [41]:
def test_accuracy(net, device="cpu"):
    trainset, testset = load_data()

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=2)

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [45]:
gpus_per_trial = 2
# ...
result = tune.run(
    partial(train_cnn, data_dir=data_dir),
    resources_per_trial={"cpu": 8, "gpu": gpus_per_trial},
    config=config,
    num_samples=num_samples,
    scheduler=scheduler,
    progress_reporter=reporter,
    checkpoint_at_end=True)

NameError: name 'data_dir' is not defined