### First step, we load the data

In [1]:
import numpy as np
import pandas as pd

import os

from pathlib import Path

input_root_dir = "/home/jupyter/cs762/data/food-101/food-101"
input_root_path = Path(input_root_dir)
print(os.listdir(input_root_dir))
image_dir_path = input_root_path/'images'

class_path = input_root_dir+'/meta/classes.txt'
train_img_name_path = input_root_dir+'/meta/train.txt'
test_img_name_path = input_root_dir+'/meta/test.txt'

['README.txt', 'license_agreement.txt', 'images', 'meta']


In [2]:
def file2list(path):
    file1 = open(path,'r')
    lines = file1.readlines()
    final_list = [line.strip() for line in lines]
    return final_list

In [3]:
from sklearn import preprocessing

classes = file2list(class_path)
train_data = file2list(train_img_name_path)
test_data = file2list(test_img_name_path)
le = preprocessing.LabelEncoder()
targets = le.fit_transform(classes)

In [4]:
import cv2

from torch.utils.data import Dataset

class FoodData(Dataset):
    def __init__(self,img_path,img_dir,size,transform=None):
        self.img_path = img_path
        self.img_dir = img_dir
        self.transform = transform
        self.size = size
#         self.mode = mode
        
    def __len__(self):
        return len(self.img_path)
    
    def __getitem__(self,index):
        label,img_name = self.img_path[index].split('/')
        path = self.img_dir+'/images/'+label+'/'+img_name+'.jpg'
        img = cv2.imread(path)
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        img = cv2.resize(img,(self.size,self.size))
        if self.transform:
            img = self.transform(img)
        return img, torch.tensor(le.transform([label])[0])
#         return {
#                 'gt': img,
#                 'label': torch.tensor(le.transform([label])[0])
#             }

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
train_dataset = FoodData(train_data,input_root_dir,256,None)

In [6]:
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader

batch = 64
valid_size = 0.2
num = train_data.__len__()
# Dividing the indices for train and cross validation
indices = list(range(num))
np.random.shuffle(indices)
split = int(np.floor(valid_size*num))
train_idx,valid_idx = indices[split:], indices[:split]

#Create Samplers
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = DataLoader(train_dataset, batch_size = batch, sampler = train_sampler)
valid_loader = DataLoader(train_dataset, batch_size = batch, sampler = valid_sampler)

In [7]:
test_data = FoodData(test_data,input_root_dir,256,transform=None)
test_loader = DataLoader(test_data, batch_size=batch, shuffle=False)

# Attempt to train ResNet

### Define and train a big, teacher neural net

First we import the PyTorch libraries, then create the neural network.

In [8]:
import torch
import torch.nn as nn
from torch.optim import Adam
from models import *
from plot_funcs import *

In [9]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

In [10]:
device

device(type='cuda', index=0)

In [11]:
# Create our teacher model
import torchvision

big_model = torchvision.models.resnet50().to(device)

In [None]:
# Train the teacher model

from tqdm import tqdm

# Loss function
loss_fn = nn.CrossEntropyLoss()
# Create optimizer
lr = 5e-3
optimizer = Adam(big_model.parameters(), lr=lr)
epochs = 2
train_loss = []
train_acc = []
val_acc = []
it = 0
title = "progress"
for epoch in range(epochs):
    it_per_epoch = 0
    for features, labels in tqdm(train_loader):
        features = features.permute(0, 3, 2, 1).float().to(device)
        scores = big_model(features)
        labels = labels.to(device)
        loss = loss_fn(scores, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
        if it % 100 == 0 and it != 0 and len(train_acc) > 1:
            train_acc.append(evaluate(big_model, train_loader, max_ex=100))
            val_acc.append(evaluate(big_model, val_loader))
            plot_loss(train_loss, it, it_per_epoch, base_name="loss_"+title, title=title)
            plot_acc(train_acc, val_acc, it, it_per_epoch, base_name="acc_"+title, title=title)
        it += 1
        it_per_epoch += 1
#perform last book keeping
train_acc.append(evaluate(big_model, train_loader, max_ex=100))
val_acc.append(evaluate(big_model, val_loader))

 19%|█▉        | 182/947 [03:27<14:32,  1.14s/it]

In [None]:
train_acc = evaluate(big_model, train_loader)
print("\nTrain accuracy: %.2f%%" % train_acc)
val_acc = evaluate(big_model,val_loader)
print("Validation accuracy: %.2f%%" % val_acc)
test_acc = evaluate(big_model, test_loader)
print("Test accuracy: %.2f%%" % test_acc)

### Define and train a small, student neural net

In [None]:
class small_linear_net(nn.Module):
    def __init__(self):
        super(small_linear_net, self).__init__()
        self.linear_1 = nn.Linear(784, 50)
        self.relu = nn.ReLU()
        self.linear_2 = nn.Linear(50, 10)

    def forward(self, input):
        scores = self.linear_1(input)
        scores = self.relu(scores)
        scores = self.linear_2(scores)
        return scores

In [None]:
# Create our student model
small_model = small_linear_net().to(device)

In [None]:
# Load our pre-trained student model
# This is just if you want to check the accuracy of this model
# trained with the original MNIST data
load_path = "small_linear_model/"
checkpoint = torch.load(load_path + "modelo", map_location=torch.device('cpu'))
small_model.load_state_dict(checkpoint['model_state_dict'])
small_model.eval()

In [None]:
train_acc = evaluate(small_model, train_loader)
print("\nTrain accuracy: %.2f%%" % train_acc)
val_acc = evaluate(small_model,val_loader)
print("Validation accuracy: %.2f%%" % val_acc)
test_acc = evaluate(small_model, test_loader)
print("Test accuracy: %.2f%%" % test_acc)

### Distillation training

In [None]:
# Set output directory and create if needed
import os
output_dir = "small_linear_model_distill1/"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
### Define our custom loss function
softmax_op = nn.Softmax(dim=1)
mseloss_fn = nn.MSELoss()

def my_loss(scores, targets, T=5):
    soft_pred = softmax_op(scores / T)
    soft_targets = softmax_op(targets / T)
    loss = mseloss_fn(soft_pred, soft_targets)
    return loss

In [None]:
# Create a new student model to start training from zero
small_model = small_linear_net().to(device)
from tqdm import tqdm
%matplotlib inline

# Hyperparameters
lr = 5e-3
epochs = 5
temp = 5

# Create optimizer
optimizer = Adam(small_model.parameters(), lr=lr)
val_acc = []
train_acc = []
train_loss = [0]  # loss at iteration 0
for epoch in range(epochs):
    for features, labels in tqdm(train_loader):
        scores = small_model(features)
        targets = big_model(features)
        loss = my_loss(scores, targets, T = temp)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Book-keeping
        if it % 100 == 0:
            train_acc.append(evaluate(small_model, train_loader, max_ex=100))
            val_acc.append(evaluate(small_model, val_loader))
        it += 1
#perform last book-keeping
train_acc.append(evaluate(small_model, train_loader, max_ex=100))
val_acc.append(evaluate(small_model, val_loader))
plot_acc(train_acc, val_acc, it, it_per_epoch)

In [None]:
train_acc = evaluate(small_model, train_loader)
print("\nTrain accuracy: %.2f%%" % train_acc)
val_acc = evaluate(small_model,val_loader)
print("Validation accuracy: %.2f%%" % val_acc)
test_acc = evaluate(small_model, test_loader)
print("Test accuracy: %.2f%%" % test_acc)