In [21]:
from random import random
import os
import shutil

def copy_file(source_directory, destination_directory, filename):
    """
    Utility function used to copy a file from a source_directory to a destination_directory
    """
    destination_directory.mkdir(parents=True, exist_ok=True)
    shutil.copy(source_directory/filename, destination_directory/filename)
    
def organize_train_valid_dataset(root, dest, labels, valid_probability=0.1):
    """
    Creates the train, train_valid and valid folders respecting PyTorch's ImageDataset structure, performing
    train/validation split based on the given percentage
    """
    source_directory = root/'images'
    
    for index, row in labels.iterrows():
        img_index = row['filename'].split('.')[0]  # The filename is the name of the image except the extension
        img_class = row['quantity']

        # Randomly assign the image to the valid dataset with probability 'valid_probability'
        channel = Path('train') if random()>valid_probability else Path('valid')
        destination_directory = dest/channel/str(img_class)

        # Copy the image to either the train or valid folder, and also to the train_valid folder
        copy_file(source_directory, destination_directory, row['filename'])
        copy_file(source_directory, dest/'train_valid'/str(img_class), row['filename'])

def organize_test_dataset(root, dest, labels):
    """
    Creates the test folder respecting PyTorch's ImageDataset structure, using a dummy 'undefined' label
    """
    source_directory = root/'images'
        
    for index, row in labels.iterrows():
        img_index = row['filename'].split('.')[0]  # The index is the name of the image except the extension

        channel = Path('test')
        destination_directory = dest/channel/'undefined'
        
        try:
            copy_file(source_directory, destination_directory, row['filename'])
        except Exception as e:
            print(e)

In [22]:
import pandas as pd
from pathlib import Path

root = Path('./SageMaker')
dest = Path('./SageMaker')
dest.mkdir(exist_ok=True, parents=True)

# Read in the labels DataFrame with a label for each image
train = pd.read_csv(f'{root}/train.csv')
test = pd.read_csv(f'{root}/test.csv')

# Create the train/train_valid/valid folder structure
valid_probability = 0.1
organize_train_valid_dataset(root, dest, train, valid_probability)

# Create the test folder structure
organize_test_dataset(root, dest, test)

In [52]:
import torch, torchvision

train_transforms = torchvision.transforms.Compose([
        torchvision.transforms.Resize((224,224)),
        torchvision.transforms.ToTensor(),
    ])

train_dataset, train_valid_dataset = [torchvision.datasets.ImageFolder(folder, transform=train_transforms) for folder in [root/'train', root/'train_valid']]


valid_transforms = torchvision.transforms.Compose([
        torchvision.transforms.Resize((224,224)),
        torchvision.transforms.ToTensor(),
    ])

valid_dataset, test_dataset = [torchvision.datasets.ImageFolder(folder, transform=valid_transforms) for folder in [root/'valid', root/'test']]

In [53]:
num_gpus = torch.cuda.device_count()
print(num_gpus)

1


In [54]:
# create dataloaders

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2*num_gpus, pin_memory=True)
train_valid_dataloader = torch.utils.data.DataLoader(train_valid_dataset, batch_size=128, shuffle=True, num_workers=2*num_gpus, pin_memory=True)

valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=256, shuffle=False, num_workers=2*num_gpus, pin_memory=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=2*num_gpus, pin_memory=True)

In [59]:
import timm
def get_net():
    convnext_tiny = timm.create_model('convnext_tiny', pretrained=True)
    num_ftrs = convnext_tiny.head.fc.in_features
    convnext_tiny.head.fc = torch.nn.Linear(in_features=num_ftrs, out_features=6, bias=True)
    
    #resnext = torchvision.models.vit_b_16(pretrained=True)
    
    #resnext.fc = torch.nn.Linear(num_ftrs, 6)
    #resnext.classifier[6] = torch.nn.Linear(resnext.classifier[6].in_features, 6)
    torch.nn.init.xavier_uniform_(convnext_tiny.head.fc.weight) # initialize the weights of the new layer
    return convnext_tiny

In [60]:
import time

def train(net, train_dataloader, valid_dataloader, criterion, optimizer, scheduler=None, epochs=10, device='cpu', checkpoint_epochs=10):
    start = time.time()
    print(f'Training for {epochs} epochs on {device}')
    
    for epoch in range(1,epochs+1):
        print(f"Epoch {epoch}/{epochs}")
        
        net.train()  # put network in train mode for Dropout and Batch Normalization
        train_loss = torch.tensor(0., device=device)  # loss and accuracy tensors are on the GPU to avoid data transfers
        train_accuracy = torch.tensor(0., device=device)
        for X, y in train_dataloader:
            X = X.to(device)
            y = y.to(device)
            preds = net(X)
            loss = criterion(preds, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            with torch.no_grad():
                train_loss += loss * train_dataloader.batch_size
                train_accuracy += (torch.argmax(preds, dim=1) == y).sum()
        
        if valid_dataloader is not None:
            net.eval()  # put network in train mode for Dropout and Batch Normalization
            valid_loss = torch.tensor(0., device=device)
            valid_accuracy = torch.tensor(0., device=device)
            with torch.no_grad():
                for X, y in valid_dataloader:
                    X = X.to(device)
                    y = y.to(device)
                    preds = net(X)
                    loss = criterion(preds, y)

                    valid_loss += loss * valid_dataloader.batch_size
                    valid_accuracy += (torch.argmax(preds, dim=1) == y).sum()
        
        if scheduler is not None: 
            scheduler.step()
            
        print(f'Training loss: {train_loss/len(train_dataloader.dataset):.2f}')
        print(f'Training accuracy: {100*train_accuracy/len(train_dataloader.dataset):.2f}')
        
        if valid_dataloader is not None:
            print(f'Valid loss: {valid_loss/len(valid_dataloader.dataset):.2f}')
            print(f'Valid accuracy: {100*valid_accuracy/len(valid_dataloader.dataset):.2f}')
        
        if epoch%checkpoint_epochs==0:
            torch.save({
                'epoch': epoch,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, './checkpoint.pth.tar')
        
        print()
    
    end = time.time()
    print(f'Total training time: {end-start:.1f} seconds')
    return net

In [62]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
lr, epochs = 1e-4, 10

# get network and move it to GPU
net = get_net().to(device)

# standard CrossEntropy Loss for multi-class classification problems
criterion = torch.nn.CrossEntropyLoss()

# define the optimizer for the network parameters
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

# start training process
net = train(net, train_dataloader, valid_dataloader, criterion, optimizer, None, epochs, device)

Training for 10 epochs on cuda
Epoch 1/10
Training loss: 1.34
Training accuracy: 37.80
Valid loss: 1.22
Valid accuracy: 43.58

Epoch 2/10
Training loss: 1.12
Training accuracy: 49.90
Valid loss: 1.14
Valid accuracy: 48.76

Epoch 3/10
Training loss: 0.94
Training accuracy: 59.01
Valid loss: 1.22
Valid accuracy: 46.06

Epoch 4/10
Training loss: 0.67
Training accuracy: 73.45
Valid loss: 1.32
Valid accuracy: 47.20

Epoch 5/10
Training loss: 0.31
Training accuracy: 89.85
Valid loss: 1.74
Valid accuracy: 46.73

Epoch 6/10
Training loss: 0.10
Training accuracy: 97.73
Valid loss: 2.28
Valid accuracy: 47.20

Epoch 7/10
Training loss: 0.04
Training accuracy: 99.22
Valid loss: 2.49
Valid accuracy: 46.75

Epoch 8/10
Training loss: 0.03
Training accuracy: 99.44
Valid loss: 2.60
Valid accuracy: 45.94

Epoch 9/10
Training loss: 0.03
Training accuracy: 99.27
Valid loss: 2.75
Valid accuracy: 45.56

Epoch 10/10
Training loss: 0.04
Training accuracy: 99.05
Valid loss: 2.87
Valid accuracy: 46.73

Total tr

In [63]:
# tensor to accumulate all predictions in
all_outputs = torch.tensor([], device=device)

net.eval()
with torch.no_grad():
    for X, _ in test_dataloader:
        X = X.to(device)
        pred = net(X).argmax(dim=1).type(torch.float32) # keep the class with highest probability
        all_outputs = torch.cat((all_outputs, pred), 0) # concatenate predictions to the list of all predictions

In [66]:
# read the submission dataframe
test = pd.read_csv('./SageMaker/solution.csv')
test['quantity'] = all_outputs.type(torch.int32).cpu().numpy() # move tensor to CPU and convert to numpy arrays

# write the csv file
test[['index', 'quantity']].to_csv('./SageMaker/pred.csv', index=False)