In [1]:
# Import relevant packages
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import os
import time
import warnings
warnings.filterwarnings("ignore", category = FutureWarning)

In [2]:
# Flags
DISABLE_CUDA = False

In [3]:
# Hyperparameters
input_dim = [32, 64, 128, 256, 512]
lr = [0.001, 0.0001, 0.00001]
train_test_ratio = 0.8

# Declare important file paths
notebook_path = os.path.abspath("Custom_CNN.ipynb")
data_path = os.path.dirname(notebook_path) + '/project/data/columbia-prcg-datasets/'
model_path = os.path.dirname(notebook_path) + '/project/model.pth'

In [4]:
# Select accelerator device
def get_default_device():
    """Returns device, is_cuda (bool)."""
    if not DISABLE_CUDA and torch.cuda.is_available():
        print("Running on CUDA!")
        return torch.device('cuda'), True
    else:
        print("Running on CPU!")
        return torch.device('cpu'), False
device, using_cuda = get_default_device()

Running on CPU!


In [5]:
def obtain_data(input_dim):
    # Transform the data
    transform = transforms.Compose([
                        transforms.Resize(input_dim),
                        transforms.ToTensor(),
                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # Create training/testing dataloaders
    full_set = datasets.ImageFolder(root=data_path, transform=transform)
    train_size = int(train_test_ratio * len(full_set))
    val_size = int((len(full_set) - train_size) / 2)
    test_size = len(full_set) - train_size - val_size
    train_set, val_set, test_set = torch.utils.data.random_split(full_set, [train_size, val_size, test_size])

    train_loader = torch.utils.data.DataLoader(train_set, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_set, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_set, shuffle=False)

    return train_loader, val_loader

    # train_data_in_memory = load_data_into_memory(train_loader)
    # val_data_in_memory = load_data_into_memory(val_loader)
    # return train_data_in_memory, val_data_in_memory 

In [6]:
# # Load data into memory to elimate read bottleneck
# def load_data_into_memory(data_loader):
#     output = []
#     for data in data_loader:
#         inputs = data[0].to(device, non_blocking=True)
#         labels = data[1].to(device, non_blocking=True)
#         output.append((inputs, labels))
#     return output

In [6]:
# Declare our model architecture
def declare_model(input_dim):
    class ConvNet(nn.Module):  # Convolutional Neural Network
        def __init__(self):
            super(ConvNet, self).__init__()
            self.layer1 = nn.Sequential(
                nn.Conv2d(3, 32, kernel_size=5, stride=2, padding=2),  # (512, 512, 32) (256, 256, 32)
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2))  # (256, 256, 32)
            self.layer2 = nn.Sequential(
                nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=2),  # (256, 256, 64)
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2))  #  (128, 128, 64)
            self.layer3 = nn.Sequential(
                nn.Conv2d(64, 128, kernel_size=5, stride=1, padding=2),  # (512, 512, 64)
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2))  #  (64, 64, 64)
    #         self.drop_out = nn.Dropout(0.1)
            self.fc1 = nn.Linear(int(input_dim/16) * int(input_dim/16) * 128, 32)
            self.fc2 = nn.Linear(32, 1)
            self.sigmoid = nn.Sigmoid()
            
        def forward(self, x):
            print (x.shape)
            out = self.layer1(x)
            print (out.shape)
            out = self.layer2(out)
            print (out.shape)
            out = self.layer3(out)
            print (out.shape)
            out = out.reshape(out.size(0), -1)
            print (out.shape)
    #         out = self.drop_out(out)
            out = self.fc1(out)
            print (out.shape)
            out = self.fc2(out)
            out = self.sigmoid(out)
            return out

    model = ConvNet()
    model.to(device)
    return model

In [7]:
def train_model(model, loss_fn, optimizer, train_loader, val_loader, num_epochs):
    loss_list = []
    train_accuracy_list = []
    val_accuracy_list = []
    t = torch.Tensor([0.5]).to(device)  # 0.5 acts as threshold
    # highest_acc = 0.0

    torch.backends.cudnn.benchmark = True  # make training faster on Cuda

    # start_time = time.time()

    model.train()  # switch to train mode
        
    for epoch in range(num_epochs):
        # Train the model
        running_loss = 0.0
        train_correct = train_total = 0 
        for i, (inputs, labels) in enumerate(train_loader):
            labels = labels.view(-1,1)

            probs = model(inputs)

            outputs = (probs > t).float() * 1  # obtain train accuracies
            train_total += len(outputs)
            train_correct += (outputs == labels.float()).float().sum() / len(outputs)  # normalize batch size

            loss = loss_fn(probs, labels.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            # if False and (i + 1) % 400 == 0:
            #     print ('# Images: {:} | Loss: {:.6f} | Time: {:.6f}'.format(i + 1, running_loss / (i + 1), time.time() - start_time))
        train_accuracy = train_correct / train_total
            
        # Test current version of model to obtain accuracy    
        val_correct = val_total = 0 
        with torch.no_grad():
            for (inputs, labels) in val_loader:
                labels = labels.view(-1,1)

                probs = model(inputs)
                outputs = (probs > t).float() * 1
                val_total += len(outputs)
                val_correct += (outputs == labels.float()).float().sum() / len(outputs)  # normalize batch size
        val_accuracy = val_correct / val_total

        # if val_accuracy > highest_acc:  # save highest accuracy model
        #     highest_acc = val_accuracy
        #     torch.save(model.state_dict(), model_path)

        loss_list.append(running_loss)
        train_accuracy_list.append(train_accuracy)
        val_accuracy_list.append(val_accuracy)
        # print ('Epoch: {:} | Time (m): {:.6f} | Loss: {:.6f} | Train Accuracy: {:.8%} | Validation Accuracy: {:.8%}'.format(
        #     epoch, (time.time() - start_time)/60, running_loss, train_accuracy, val_accuracy))

    return loss_list, train_accuracy_list, val_accuracy_list

In [8]:
def run_experiment(input_dim, lr):
    train_loader, val_loader = obtain_data(input_dim)

    model = declare_model(input_dim)
    # Define the loss function and optimizer
    loss_fn = torch.nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr = lr)

    loss_list, train_accuracy_list, val_accuracy_list = train_model(model, loss_fn, optimizer, train_loader, val_loader, num_epochs=1)
    return loss_list, train_accuracy_list, val_accuracy_list

run_experiment(32, 0.0001)

torch.Size([1, 3, 32, 48])
torch.Size([1, 32, 8, 12])
torch.Size([1, 64, 2, 3])
torch.Size([1, 128, 1, 1])
torch.Size([1, 128])


RuntimeError: size mismatch, m1: [1 x 128], m2: [512 x 32] at ../aten/src/TH/generic/THTensorMath.cpp:197