In [None]:
import torch
import torch.nn as nn
import torchvision
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, models
from PIL import Image
import os
import time

from torch.utils.tensorboard import SummaryWriter

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from shutil import copy
from collections import defaultdict
def prepare_data(filepath, src, dest):
  classes_images = defaultdict(list)
  with open(filepath, 'r') as txt:
      paths = [read.strip() for read in txt.readlines()]
      for p in paths:
        food = p.split('/')
        classes_images[food[0]].append(food[1] + '.jpg')

  for food in classes_images.keys():
    print("\nCopying images into ",food)
    if not os.path.exists(os.path.join(dest,food)):
      os.makedirs(os.path.join(dest,food))
    for i in classes_images[food]:
      copy(os.path.join(src,food,i), os.path.join(dest,food,i))
  print("Copying Done!")

In [None]:
print("Creating train data...")
prepare_data('food-101/meta/train.txt', 'food-101/images', 'food-101/train')
print("Creating test data...")
prepare_data('food-101/meta/test.txt', 'food-101/images', 'food-101/test')

In [None]:
train_transforms = torchvision.transforms.Compose([torchvision.transforms.RandomResizedCrop((224, 224)),
                                       torchvision.transforms.RandomHorizontalFlip(),
                                       torchvision.transforms.RandomVerticalFlip(),
                                       torchvision.transforms.RandomRotation(45),
                                       torchvision.transforms.RandomAffine(45),
                                       torchvision.transforms.ColorJitter(),
                                       torchvision.transforms.ToTensor(),
                                       torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

# test_transforms = torchvision.transforms.Compose([torchvision.transforms.Resize(256),
#                                       torchvision.transforms.TenCrop(224),
#                                       torchvision.transforms.Lambda(lambda crops: torch.stack([torchvision.transforms.ToTensor()(crop) for crop in crops])),
#                                       torchvision.transforms.Lambda(lambda crops: torch.stack([torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(crop) for crop in crops]))])

# train_transforms = torchvision.transforms.Compose([torchvision.transforms.RandomResizedCrop((224, 224)),
#                                             torchvision.transforms.ToTensor(),
#                                             torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

                                            
test_transforms = torchvision.transforms.Compose([torchvision.transforms.Resize((224, 224)),
                                            torchvision.transforms.ToTensor(),
                                            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])


train_data = datasets.ImageFolder("../data/food-101/train", transform=train_transforms)
test_data = datasets.ImageFolder("../data/food-101/test", transform=test_transforms)
train_data_size = len(train_data)
test_data_size = len(test_data)
print("Training dataset size    {}".format(train_data_size))
print("Testing dataset size     {}".format(test_data_size))

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=True)

# init tensorboard writer
writer = SummaryWriter("../logs")


In [None]:
def training(model, optimizer, scheduler, last_epoch, epochs, best_valid_loss, model_name, writer):
    for i in range(last_epoch, epochs):
        start_time = time.time()
        epoch = i + 1
        print("==================== epoch {} ====================".format(epoch))
        train_loss = 0.0
        train_accuracy = 0.0
        valid_loss = 0.0
        valid_accuracy = 0.0
        model.train()
        for input, target in train_loader:
            input, target = input.to(device), target.to(device)
            optimizer.zero_grad()
            outputs = model(input)
            loss = loss_fn(outputs, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_accuracy += (outputs.argmax(1) == target).sum()
        model.eval()
        print("start validation")
        with torch.no_grad():
            for input, target in test_loader:
                input, target = input.to(device), target.to(device)
                # TenCrop validation 
                # https://pytorch.org/vision/main/generated/torchvision.transforms.TenCrop.html
                # bs, ncrops, c, h, w = input.size()
                # temp_output = model(input.view(-1, c, h, w))
                # outputs = temp_output.view(bs, ncrops, -1).mean(1)
                outputs = model(input)
                loss = loss_fn(outputs, target)
                valid_loss += loss.item()
                valid_accuracy += (outputs.argmax(1) == target).sum()

        avg_train_loss = train_loss / train_data_size
        avg_train_accuracy = train_accuracy / train_data_size
        avg_valid_loss = valid_loss / test_data_size
        avg_valid_accuracy = valid_accuracy / test_data_size
        
        scheduler.step(avg_valid_loss)

        print("epoch {}\t train loss {:.4f}\t train accuracy {:.4f}\t validation loss {:.4f}\t validation accuracy {:.4f}".format(epoch, avg_train_loss, avg_train_accuracy, avg_valid_loss, avg_valid_accuracy))
        writer.add_scalar("avg_valid_loss", avg_valid_loss, epoch)
        writer.add_scalar("avg_valid_accuracy", avg_valid_accuracy, epoch)

        if avg_valid_loss <= best_valid_loss:
            best_valid_loss = avg_valid_loss
            checkpoint = {
                "model": model,
                "model_state": model.state_dict(),
                "optimizer_state": optimizer.state_dict(),
                "loss_fn": loss_fn,
                "epochs": epoch,
                "avg_train_loss": avg_train_loss,
                "avg_train_accuracy": avg_train_accuracy,
                "avg_valid_loss": avg_valid_loss,
                "avg_valid_accuracy": avg_valid_accuracy,
            }
            model_location = "{}_{}.pth".format(model_name, epoch)
            torch.save(checkpoint, model_location)
            print("checkpoint saved as {}".format(model_location))
        writer.flush()
        torch.cuda.synchronize()
        end_time = time.time()
        elapsed = end_time - start_time
        print("time elapsed {:.2f}".format(elapsed))
        


In [None]:

model = models.resnet34(pretrained=True, progress=True)

# freeze initial layers
for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(in_features=512, out_features=101, bias=True)
model = model.to(device)

# loss function
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

# optimizer = torch.optim.Adam(model.fc.parameters(), lr=1e-2)
optimizer = torch.optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
training(model, optimizer, scheduler, 0, 3, 1_000_000., "../models/freeze", writer)


In [None]:
# Freeze continue

model_name = "../models/freeze_{}.pth".format(4)
checkpoint = torch.load(model_name, map_location="cpu")

model = models.resnet34(pretrained=False)

# freeze initial layers
for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(in_features=512, out_features=101, bias=True)
model = model.to(device)
model.load_state_dict(checkpoint["model_state"], strict=False)

# loss function
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

# optimizer = torch.optim.Adam(model.fc.parameters(), lr=1e-2)
optimizer = torch.optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
training(model, optimizer, checkpoint["epochs"], 10, checkpoint["avg_valid_loss"], "../models/freeze", writer)

In [None]:
# Unfreeze

model_name = "../models/freeze_{}.pth".format(6)
checkpoint = torch.load(model_name, map_location="cpu")

model = models.resnet34(pretrained=False)

# freeze initial layers
# for param in model.parameters():
#     param.requires_grad = False

model.fc = nn.Linear(in_features=512, out_features=101, bias=True)
model = model.to(device)
model.load_state_dict(checkpoint["model_state"], strict=False)

# loss function
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
training(model, optimizer, checkpoint["epochs"], 30, checkpoint["avg_valid_loss"], "../models/unfreeze", writer)


In [None]:
# Unfreeze continue with data augmentation

model_name = "../models/unfreeze_{}.pth".format(18)
checkpoint = torch.load(model_name, map_location="cpu")

model = models.resnet34(pretrained=False)

model.fc = nn.Linear(in_features=512, out_features=101, bias=True)
model = model.to(device)
model.load_state_dict(checkpoint["model_state"], strict=False)

# loss function
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)
print("start from epoch {} avg_valid_loss {}".format(checkpoint["epochs"], checkpoint["avg_valid_loss"]))
training(model, optimizer, scheduler, 30, 40, checkpoint["avg_valid_loss"], "../models/unfreeze", writer)

In [None]:
# Unfreeze continue with data augmentation

model_name = "../models/unfreeze_{}.pth".format(40)
checkpoint = torch.load(model_name, map_location="cpu")

model = models.resnet34(pretrained=False)

model.fc = nn.Linear(in_features=512, out_features=101, bias=True)
model = model.to(device)
model.load_state_dict(checkpoint["model_state"], strict=False)

# loss function
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)
print("start from epoch {} avg_valid_loss {}".format(checkpoint["epochs"], checkpoint["avg_valid_loss"]))
training(model, optimizer, scheduler, 40, 50, checkpoint["avg_valid_loss"], "../models/unfreeze", writer)

In [None]:
# Unfreeze continue with data augmentation

model_name = "../models/unfreeze_{}.pth".format(48)
checkpoint = torch.load(model_name, map_location="cpu")

model = models.resnet34(pretrained=False)

model.fc = nn.Linear(in_features=512, out_features=101, bias=True)
model = model.to(device)
model.load_state_dict(checkpoint["model_state"], strict=False)

# loss function
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)
print("start from epoch {} avg_valid_loss {}".format(checkpoint["epochs"], checkpoint["avg_valid_loss"]))
training(model, optimizer, scheduler, 50, 60, checkpoint["avg_valid_loss"], "../models/unfreeze", writer)

In [None]:
# Unfreeze continue with data augmentation

model_name = "../models/unfreeze_{}.pth".format(55)
checkpoint = torch.load(model_name, map_location="cpu")

model = models.resnet34(pretrained=False)

model.fc = nn.Linear(in_features=512, out_features=101, bias=True)
model = model.to(device)
model.load_state_dict(checkpoint["model_state"], strict=False)

# loss function
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, verbose=True)
print("start from epoch {} avg_valid_loss {}".format(checkpoint["epochs"], checkpoint["avg_valid_loss"]))
training(model, optimizer, scheduler, 55, 60, checkpoint["avg_valid_loss"], "../models/unfreeze", writer)

In [None]:
writer.close()