In [1]:
from pathlib import Path
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision import models
import torch.nn as nn
from torch import optim, cuda
from datetime import datetime
import pandas as pd

In [2]:
# Setting up paths
base_path = Path('./clean_data/').absolute()
raw_base_path = base_path / 'motorcycles'
raw_train_path = str(raw_base_path / 'train')
raw_val_path = str(raw_base_path / 'val')
raw_test_path = str(raw_base_path / 'test')
square_base_path = base_path / 'square_motorcycles'
square_train_path = str(square_base_path / 'train')
square_val_path = str(square_base_path / 'val')
square_test_path = str(square_base_path / 'test')

In [3]:
# parameters
batch_size = 256 # Make smaller if running out of memory
num_classes = 0 # Will update after we load in the data
num_inputs = 0 # Will update once our model is selected
num_epochs = 1



Train on gpu: True
1 gpus detected.


Here, we create our transforms. In this case, we will simply resize and center crop, then normalize using the same normalization settings as ResNet-34. Then we create our data and loader

In [4]:
raw_transforms = {
    'train': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        # Normalize using same mean, std as imagenet
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        # Normalize using same mean, std as ResNet
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [5]:
raw_data = {
    'train': datasets.ImageFolder(root=raw_train_path, transform = raw_transforms['train'] ),
    'valid': datasets.ImageFolder(root=raw_val_path, transform = raw_transforms['valid'])
}

raw_dataloaders = {
    'train': DataLoader(raw_data['train'], batch_size=batch_size, shuffle=True),
    'val': DataLoader(raw_data['valid'], batch_size=batch_size, shuffle=True)
}

num_classes = len(raw_data['train'].classes)

Here, we will create a CNN using the pretrained ResNet-34. We set all layers to autograd=False, so that we are not changing weights and biases on them. Then we will add a classifier. Later on, we will create a function to make all of this easier for playing with settings

## Creating the model
If we look at modle.fc, I believe we are seeing the final connected layer. This is the part of the model that applies a target. 
We will replace this with four new layers. These will be an additional convolution and a classifier. 

In [None]:
model = models.resnet34(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

print(model.fc)

num_inputs = model.fc.in_features
model.fc = nn.Sequential(nn.Linear(num_inputs, 256),
                                   nn.ReLU(),
                                   nn.Dropout(0.4),
                                   nn.Linear(256, num_classes),
                                   nn.LogSoftmax(dim=1))
# Move to the GPU
model = model.to('cuda')

This is a very basic model, without great transforms, no hyperparameters, and little tracking. But, it should work to see if things are moving in the right direction. 

## Training the model

In [None]:
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters())

for epoch in range(num_epochs):
    print(f'Epoch: {epoch + 1}')
    train_loss = 0.0
    for data, targets in raw_dataloaders['train']:
        data = data.to('cuda')
        targets = targets.to('cuda')
        out = model(data)
        loss = criterion(out, targets)
        loss.backward()
        # Get loss for the batch
        batch_loss = loss.item() * data.size(0)
        train_loss += batch_loss
        optimizer.step()
        #data = None
        #targets = None
        #cuda.empty_cache()
    print(f'Train_loss: {train_loss}')
    results.append({'data': 'raw images', 'epoch': epoch + 1, 'train_loss': train_loss})
# model = None
# cuda.empty_cache()

Now on the data we made square

In [6]:
square_transforms = {
    'train': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        # Normalize using same mean, std as imagenet
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        # Normalize using same mean, std as imagenet
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

square_data = {
    'train': datasets.ImageFolder(root=square_train_path, transform = square_transforms['train'] ),
    'valid': datasets.ImageFolder(root=square_val_path, transform = square_transforms['valid'])
}

square_dataloaders = {
    'train': DataLoader(square_data['train'], batch_size=batch_size, shuffle=True),
    'val': DataLoader(square_data['valid'], batch_size=batch_size, shuffle=True)
}



In [None]:
num_classes = len(square_data['train'].classes)

model = models.resnet34(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

print(model.fc)

num_inputs = model.fc.in_features
model.fc = nn.Sequential(nn.Linear(num_inputs, 256),
                                   nn.ReLU(),
                                   nn.Dropout(0.4),
                                   nn.Linear(256, num_classes),
                                   nn.LogSoftmax(dim=1))
# Move to the GPU
model = model.to('cuda')



In [None]:
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters())

for epoch in range(num_epochs):
    print(f'Epoch: {epoch + 1}')
    train_loss = 0.0
    for data, targets in square_dataloaders['train']:
        data = data.to('cuda')
        targets = targets.to('cuda')
        out = model(data)
        loss = criterion(out, targets)
        loss.backward()
        # Get loss for the batch
        batch_loss = loss.item() * data.size(0)
        train_loss += batch_loss
        optimizer.step()
        #data = None
        #targets = None
        #cuda.empty_cache()
        # Get cuda memory
        cuda_memory.append({
            'method': 'no clearing cache', 
            'timestamp': datetime.now().timestamp(),  
            'cuda_memory': cuda.memory_allocated()})
    print(f'Train_loss: {train_loss}')
    results.append({'data': 'square images', 'epoch': epoch + 1, 'train_loss': train_loss})
#model = None
#cuda.empty_cache()

In [7]:
def basic_train_model(data, dataloaders, epochs, clear_cuda_cache=True, name='basic model'):
    '''
    Very-early training function. Not much here, but the basics to train the
    model and report loss and cuda memory
    data: A pytorch dataset with train and val data
    dataloader: A Pytorch dataloader with train and validation datasets
    clear_cuda_cache: Boolean telling us to clear the cuda cache when possible
    name: String with a name to give the model.
    '''
    start_time = datetime.now()
    results = []
    cuda_memory = []
    num_classes = len(data['train'].classes)

    model = models.resnet34(pretrained=True)

    for param in model.parameters():
        param.requires_grad = False

    num_inputs = model.fc.in_features
    model.fc = nn.Sequential(nn.Linear(num_inputs, 256),
                                       nn.ReLU(),
                                       # Get rid of dropout. I will re-evaluate later
                                       #nn.Dropout(0.4),
                                       nn.Linear(256, num_classes),
                                       nn.LogSoftmax(dim=1))
    # Move to the GPU
    model = model.to('cuda')

    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters())

    for epoch in range(epochs):
        print(f'Epoch: {epoch + 1}')
        train_loss = 0.0
        for data, targets in dataloaders['train']:
             #Get cuda memory
            cuda_memory.append({
                'name': name,
                'timestamp': datetime.now(),
                'cuda_memory': cuda.memory_allocated()})
            data = data.to('cuda')
            targets = targets.to('cuda')
            cuda_memory.append({
                'name': name,
                'timestamp': datetime.now(),
                'cuda_memory': cuda.memory_allocated()})
            # Clear the gradients
            optimizer.zero_grad()
            out = model(data)
            
            loss = criterion(out, targets)
            # clear the graidients or they will accumulate
            
            loss.backward()
            # Get loss for the batch
            batch_loss = loss.item() * data.size(0)
            train_loss += batch_loss
            optimizer.step()
            #Get cuda memory
            cuda_memory.append({
                'name': name,
                'timestamp': datetime.now(),
                'cuda_memory': cuda.memory_allocated()})
            # Clear the batch from cuda memory. It is no longer needed
            if clear_cuda_cache is True:
                data = None
                targets = None
                cuda.empty_cache()
            
        print(f'Train_loss: {train_loss}')
        results.append({
            'data': 'square images',
            'epoch': epoch + 1,
            'train_loss': train_loss})
    end_time = datetime.now()
    return {'model': model, 'name': name, 'train_loss': train_loss, 'cuda_memory': cuda_memory, 
            'run_time': end_time - start_time}

In [9]:
raw_results_clear_cache = basic_train_model(data=raw_data, dataloaders=raw_dataloaders, epochs=25, 
                                            name='Raw images with clear', clear_cuda_cache=True)
'''square_results_clear_cache = basic_train_model(data=square_data, dataloaders=raw_dataloaders, epochs=5, 
                                               name='Square images with clear', clear_cuda_cache=True)
raw_results = basic_train_model(data=raw_data, dataloaders=raw_dataloaders, epochs=5, 
                                name='Raw images no clear', clear_cuda_cache=False)
square_results = basic_train_model(data=square_data, dataloaders=raw_dataloaders, epochs=5, 
                                   name='Square images no clear', clear_cuda_cache=False)'''

Epoch: 1
Train_loss: 9178.373175621033
Epoch: 2
Train_loss: 9067.50984954834
Epoch: 3
Train_loss: 9053.591597557068
Epoch: 4
Train_loss: 9043.208864212036
Epoch: 5
Train_loss: 9033.444452285767
Epoch: 6
Train_loss: 9017.467531204224
Epoch: 7
Train_loss: 8988.775751113892
Epoch: 8
Train_loss: 8968.493178367615
Epoch: 9
Train_loss: 8950.81909942627
Epoch: 10
Train_loss: 8943.764300346375
Epoch: 11
Train_loss: 8918.916849136353
Epoch: 12
Train_loss: 8895.64979171753
Epoch: 13
Train_loss: 8873.114252090454
Epoch: 14
Train_loss: 8864.470151901245
Epoch: 15
Train_loss: 8839.355823516846
Epoch: 16
Train_loss: 8805.560286521912
Epoch: 17
Train_loss: 8805.328288078308
Epoch: 18
Train_loss: 8766.316045761108
Epoch: 19
Train_loss: 8707.695014953613
Epoch: 20
Train_loss: 8701.306518554688
Epoch: 21
Train_loss: 8712.221153259277
Epoch: 22
Train_loss: 8665.408208847046
Epoch: 23
Train_loss: 8643.866147994995
Epoch: 24
Train_loss: 8621.163217544556
Epoch: 25
Train_loss: 8611.817869186401


"square_results_clear_cache = basic_train_model(data=square_data, dataloaders=raw_dataloaders, epochs=5, \n                                               name='Square images with clear', clear_cuda_cache=True)\nraw_results = basic_train_model(data=raw_data, dataloaders=raw_dataloaders, epochs=5, \n                                name='Raw images no clear', clear_cuda_cache=False)\nsquare_results = basic_train_model(data=square_data, dataloaders=raw_dataloaders, epochs=5, \n                                   name='Square images no clear', clear_cuda_cache=False)"

In [None]:
print(raw_results['run_time'])
print(raw_results_clear_cache['run_time'])
print(square_results['run_time'])
print(square_results_clear_cache['run_time'])


Looking at memory usage

In [None]:
no_clear = pd.DataFrame(raw_results['cuda_memory'])
no_clear = no_clear.set_index('timestamp')
no_clear.plot()

In [None]:
with_clear = pd.DataFrame(raw_results_clear_cache['cuda_memory'])
with_clear = with_clear.set_index('timestamp')
with_clear.plot()

In [10]:
raw_results_clear_cache['model'].parameters()

<generator object Module.parameters at 0x000002631093D930>