Perform fine tuning on Resnet18. Data in `dataset` dir needs to be present. Train on Mac will take 10-20 mins

In [1]:
import os
import torch
from torchvision import transforms as T
from torchvision.datasets import ImageFolder
from torchvision import models
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from copy import deepcopy
import glob

import numpy as np

In [2]:
data_path = '../dataset'

for path in os.listdir(data_path):
    if os.path.isdir(os.path.join(data_path, path)):
        dir_path = os.path.join(data_path, path)
        jpegs = glob.glob(str(dir_path) + "/*/*.jpg", recursive=True)
        print(f"{dir_path} contains {len(jpegs)} jpegs")

../dataset/test contains 150 jpegs
../dataset/train contains 2500 jpegs
../dataset/validation contains 750 jpegs


In [3]:
train_data_path = os.path.join(data_path, 'train')
test_data_path = os.path.join(data_path, 'test')
val_data_path = os.path.join(data_path, 'validation')

In [4]:
train_classes = dict()

for path in sorted(os.listdir(train_data_path)):
    if os.path.isdir(os.path.join(train_data_path, path)):
        train_classes.setdefault(len(train_classes), path)
        
train_classes

{0: 'chicken_curry',
 1: 'chicken_wings',
 2: 'fried_rice',
 3: 'grilled_salmon',
 4: 'hamburger',
 5: 'ice_cream',
 6: 'pizza',
 7: 'ramen',
 8: 'steak',
 9: 'sushi'}

In [5]:
test_classes = dict()

for path in sorted(os.listdir(test_data_path)):
    if os.path.isdir(os.path.join(test_data_path, path)):
        test_classes.setdefault(len(test_classes), path)
        
# test_classes

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [7]:
try:
    torch.cuda.get_device_name(0)
    torch.cuda.empty_cache()
except Exception as exc:
    print(exc)

Torch not compiled with CUDA enabled


In [8]:
train_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [9]:
val_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [10]:
test_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [11]:
train_dataset = ImageFolder(
    root=train_data_path,
    transform=train_transform
)

train_dataset

Dataset ImageFolder
    Number of datapoints: 2500
    Root location: ../dataset/train
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=bilinear)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [12]:
test_dataset = ImageFolder(
    root=test_data_path,
    transform=val_transform
)

test_dataset

Dataset ImageFolder
    Number of datapoints: 150
    Root location: ../dataset/test
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=bilinear)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [13]:
val_dataset = ImageFolder(
    root=val_data_path,
    transform=val_transform
)

val_dataset

Dataset ImageFolder
    Number of datapoints: 750
    Root location: ../dataset/validation
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=bilinear)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [14]:
BATCH_SIZE = 16

In [15]:
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=0,
)

In [16]:
val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    num_workers=0,
)

In [17]:
test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    num_workers=0,
)

Load model

In [18]:
model = models.resnet18(pretrained=True)

In [19]:
for param in model.parameters():
    param.requires_grad = False

In [20]:
n_inputs = model.fc.in_features
n_outputs = 10

In [21]:
sequential_layers = nn.Sequential(
    nn.Linear(n_inputs, 128),
    nn.ReLU(),
    nn.Dropout(.2),
    nn.Linear(128, n_outputs),
    nn.LogSoftmax(dim=1)
)

In [22]:
# sequential_layers = nn.Linear(n_inputs, n_outputs)

In [23]:
model.fc = sequential_layers

In [24]:
try:
    model = model.to('cuda')
except Exception as exc:
    print(exc)

Torch not compiled with CUDA enabled


In [25]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=.9)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=.1)

In [26]:
loaders = {
    'train': train_loader,
    'val': val_loader,
    'test': test_loader
}

In [27]:
dataset_sizes = {
    'train': len(train_dataset),
    'val': len(val_dataset),
    'test': len(test_dataset)
}

print(dataset_sizes)

{'train': 2500, 'val': 750, 'test': 150}


In [None]:
%%time

EPOCHS = 15

for epoch in range(1, EPOCHS+1):
    best_acc = .0
    print(f"\nEpoch {epoch}/{EPOCHS}\n{'='*25}")
    for phase in ['train', 'val']:
        running_loss = .0
        running_corrects = .0
        if phase == 'train': model.train()
        if phase == 'val': model.eval()
        for inputs, labels in loaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels)
        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]
        if phase == 'train': scheduler.step()
        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_weights = deepcopy(model.state_dict())
        print(f"Loss ({phase}): {epoch_loss}, Acc ({phase}): {epoch_acc}")


Epoch 1/15
Loss (train): 2.279353141450882, Acc (train): 0.1816
Loss (val): 2.3057477111816405, Acc (val): 0.09866666666666667

Epoch 2/15
Loss (train): 2.3439768371582033, Acc (train): 0.0024
Loss (val): 2.3051966565450033, Acc (val): 0.092

Epoch 3/15
Loss (train): 2.3422943481445313, Acc (train): 0.0028
Loss (val): 2.3050144138336184, Acc (val): 0.092

Epoch 4/15
Loss (train): 2.3416239433288575, Acc (train): 0.002
Loss (val): 2.304685712814331, Acc (val): 0.09866666666666667

Epoch 5/15
Loss (train): 2.3415963554382326, Acc (train): 0.0028
Loss (val): 2.303981034596761, Acc (val): 0.096

Epoch 6/15
Loss (train): 2.3408355701446535, Acc (train): 0.0024


In [None]:
torch.save(best_model_weights, '../model/foodnet_resnet18.pth')

In [None]:
# model.eval()

In [None]:
%%time

for inputs, labels in test_loader:
    inputs, labels = inputs.to(device), labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

    running_loss += loss.item() * inputs.size(0)
    running_corrects += torch.sum(preds == labels)
    
loss = running_loss / dataset_sizes['test']
acc = running_corrects.double() / dataset_sizes['test']

In [None]:
print(f"Test Loss: {epoch_loss}, Test Accuracy: {epoch_acc}")

Check we can load the model

---

In [None]:
import torch.nn as nn

from torchvision.models.resnet import ResNet, BasicBlock


class ImageClassifier(ResNet):
    def __init__(self):
        super(ImageClassifier, self).__init__(BasicBlock, [2,2,2,2], num_classes=10)

        self.fc = nn.Sequential(
            nn.Linear(512 * BasicBlock.expansion, 128),
            nn.ReLU(),
            nn.Dropout(.2),
            nn.Linear(128, 10),
            nn.LogSoftmax(dim=1)
        )

In [None]:
mod = ImageClassifier()

In [None]:
mod.load_state_dict(torch.load("../model/foodnet_resnet18.pth"))

In [None]:
mod.eval()