In [1]:
import torch
import numpy as np
import torchvision
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')


CUDA is available!  Training on GPU ...


## Data Preparation

In [3]:
TRAIN_DIR = "../DATASET/TRAIN/"
train_dataset = torchvision.datasets.ImageFolder(root=TRAIN_DIR)
print(f"Classes: {train_dataset.classes}")
print(f"Class count: {train_dataset.targets.count(0)}, {train_dataset.targets.count(1)}")
print(f"Samples:",len(train_dataset))
print(f"First sample: {train_dataset.samples[0]}")

Classes: ['O', 'R']
Class count: 12565, 9999
Samples: 22564
First sample: ('../DATASET/TRAIN/O\\O_1.jpg', 0)


`SubsetRandomSampler`: This is a PyTorch class used to sample data from a dataset based on provided indices. It ensures that the batches returned are from the specified subset of data (training or validation) and that the samples are randomly selected.

In [4]:
# Shuffling and Splitting Train data into train and valid data
valid_size = 0.2
num_train = len(train_dataset)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# Creating Samplers
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)


## Preprocessing and Data Augmentation
`ImageFolder` loads our data using the `PIL` package. We need to convert it into tensors. Pre-processing will be done using `torchvision.transforms`.
1) Resizing: Ensure all images are of the same size, which is often reuqired for CNN
2) Normalization: Normalize images to have a mean and standard deviation of O and 1 respectively. Ensure the data is on the same scale as what the prer-trained model expects.
3) Random Horizontal Flip: Adds data augmentation to improve model generalization.
4) Random Rotation: Helps in generalizaing by rotating images randomly within a specified range.
5) Color Jitter: Slightly adjusts bightness, contrast, saturation, and hue to make model more robust


In [5]:
IMAGE_SIZE = (64, 64)

data_transforms = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = torchvision.datasets.ImageFolder(root=TRAIN_DIR, transform=data_transforms)

Working with batches of data using `dataloader`

In [6]:
BATCH_SIZE = 64

train_loader = torch.utils.data.DataLoader(
    train_dataset,          # our raw data
    batch_size=BATCH_SIZE,  # the size of batches we want the dataloader to return
    sampler=train_sampler,  # indicates which sampler to use to get the batches of data
    drop_last=False         # don't drop the last batch even if it's smaller than batch_size
)

valid_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE, 
    sampler=valid_sampler,
    drop_last=False         # don't drop the last batch even if it's smaller than batch_size
)

## Preparing the test dataset
Only apply resize and normalization transformation for the test data and not any data augmentation.

In [7]:
TEST_DIR = "../DATASET/TEST/"
data_transforms_test = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_dataset = torchvision.datasets.ImageFolder(root=TEST_DIR, transform=data_transforms_test)

In [8]:
test_loader = torch.utils.data.DataLoader(
    test_dataset,          # our raw data
    batch_size=BATCH_SIZE,  # the size of batches we want the dataloader to return
    drop_last=False         # don't drop the last batch even if it's smaller than batch_size
)

## Using Transfer Learning 
Take advantage of pre-trained model on large dataset. We will use ResNet as feature extractor and add extra layers to the pre-trained network. We will customize the last classification layer to a binary classification. We will also fine tune the last two layers.,

In [9]:
# Load a pre-trained resnet34
model = models.resnet34(pretrained=True)

# Freeze all layers parameter
for param in model.parameters(): 
    param.requires_grad = False

# List the last classification layer, a fully connected layer outputting 1000 values
list(model.named_children())[-1]
num_features = model.fc.in_features

new_layers = nn.Sequential(
    nn.Linear(num_features,250),
    nn.ReLU(),
    nn.Linear(250,2)
)

model.fc = new_layers



## Set Up Training
Define the loss function and optimizer 

In [10]:
def trainer(model, criterion, optimizer, train_loader, valid_loader, device, epochs=5, verbose=True):
    """Simple training wrapper for PyTorch network."""
    
    train_accuracy = []
    valid_accuracy = []
    for epoch in range(epochs):  # for each epoch
        train_batch_loss = 0
        train_batch_acc = 0
        valid_batch_loss = 0
        valid_batch_acc = 0
        
        # Training
        model.train()
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            y_hat = model(X)
            loss = criterion(y_hat, y.long())  # Ensure target labels are Long
            loss.backward()
            optimizer.step()
            train_batch_loss += loss.item()
            _, y_hat_labels = torch.max(y_hat, 1)
            train_batch_acc += (y_hat_labels == y).type(torch.float32).mean().item()
        train_accuracy.append(train_batch_acc / len(train_loader))
        
        # Validation
        model.eval()
        with torch.no_grad():
            for X, y in valid_loader:
                X, y = X.to(device), y.to(device)
                y_hat = model(X)
                loss = criterion(y_hat, y.long())  # Ensure target labels are Long
                valid_batch_loss += loss.item()
                _, y_hat_labels = torch.max(y_hat, 1)
                valid_batch_acc += (y_hat_labels == y).type(torch.float32).mean().item()
        valid_accuracy.append(valid_batch_acc / len(valid_loader))
        
        # Print progress
        if verbose:
            print(f"Epoch {epoch + 1}:",
                  f"Train Accuracy: {train_accuracy[-1]:.2f}",
                  f"Valid Accuracy: {valid_accuracy[-1]:.2f}")
    
    return {"train_accuracy": train_accuracy, "valid_accuracy": valid_accuracy}

In [11]:
# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

results = trainer(model, criterion, optimizer, train_loader, valid_loader, device, epochs=10)

Epoch 1: Train Accuracy: 0.80 Valid Accuracy: 0.82
Epoch 2: Train Accuracy: 0.82 Valid Accuracy: 0.83
Epoch 3: Train Accuracy: 0.83 Valid Accuracy: 0.83
Epoch 4: Train Accuracy: 0.83 Valid Accuracy: 0.84
Epoch 5: Train Accuracy: 0.84 Valid Accuracy: 0.84
Epoch 6: Train Accuracy: 0.84 Valid Accuracy: 0.84
Epoch 7: Train Accuracy: 0.84 Valid Accuracy: 0.84
Epoch 8: Train Accuracy: 0.84 Valid Accuracy: 0.83
Epoch 9: Train Accuracy: 0.85 Valid Accuracy: 0.84
Epoch 10: Train Accuracy: 0.84 Valid Accuracy: 0.85


In [13]:
torch.save(model.state_dict(), 'model_waste.pt')

## Evaluating the model with test data

In [12]:
# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

#defining classes

classes=['O','R']

model.eval()
# iterate over test data
for data, target in test_loader:
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
        data, target = data.cuda(), target.cuda()
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    # calculate the batch loss
    loss = criterion(output, target)
    # update test loss 
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)    
    # compare predictions to true label
    correct_tensor = pred.eq(target.data.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
    # calculate test accuracy for each object class
    for i in range(2):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# average test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(2):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            classes[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 0.405638

Test Accuracy of     O: 97% (43/44)
Test Accuracy of     R: 80% (29/36)

Test Accuracy (Overall): 90% (72/80)
