# Food Image Classifcation

### Dataset

We are going learn [food images](https://www.kaggle.com/kmader/food41) from kaggle

### Use the kaggle-cli to download the image

```kaggle datasets download -d kmader/food41```

In [1]:
import os
import torch

In [2]:
CUDA  = torch.cuda.is_available()

In [3]:
HOME = os.environ["HOME"]
DATA  = HOME+"/.kaggle/datasets/kmader/food41/"
META = DATA + "meta/meta/"
IMG = DATA+"images/"
VERSION = "0.0.4"

### Datasets

In [4]:
from torch.utils.data import DataLoader
from torch import nn
from ray.matchbox import Trainer
from torchvision.models.densenet import densenet121 as feature_extractor
from torch.nn import functional as F
from torch.optim import Adam
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms

In [5]:
SCALE = 224
transform = transforms.Compose([
    transforms.Resize((SCALE,SCALE)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
])

In [6]:
img_set = ImageFolder(IMG,transform = transform, )

#### Train /Valid Split

In [7]:
import numpy as np

In [8]:
trn_set = ImageFolder(IMG,transform = transform, )
val_set = ImageFolder(IMG,transform = transform, )

In [9]:
val_pick = np.random.rand(len(img_set.samples))>0.8
trn_pick = ~val_pick

In [10]:
trn_set.samples = np.array(img_set.samples)[trn_pick].tolist()
val_set.samples = np.array(img_set.samples)[val_pick].tolist()

In [11]:
trn_set.imgs = trn_set.samples
val_set.imgs = val_set.samples

In [12]:
len(trn_set),len(val_set)

(80755, 20245)

In [13]:
# gen = iter(DataLoader(trn_set,batch_size=2,shuffle=True))
# next(gen)

In [13]:
class Flatten(nn.Module):
    def forward(self, input):
        """
        a pytorch version of Flatten layer
        """
        return input.view(input.size(0), -1)

def argmax(x):
    """
    Arg max of a torch tensor (2 dimensional, dim=1)
    :param x:  torch tensor
    :return: index the of the max
    """
    return torch.max(x, dim=1)[1]

def accuracy(y_pred, y_true):
    """
    :param y_pred: predition of y (will be argmaxed)
    :param y_true: true label of y (index)
    :return:
    """
    return (argmax(y_pred) == y_true).float().mean()

In [14]:
def save_model(model,path):
    """
    model:pytorch model
    path:save to path, end with pkl
    """
    torch.save(model.state_dict(), path)
    
def load_model(model,path):
    model.load_state_dict(torch.load(path))

## Transfer Learning

In [16]:
conv_model = feature_extractor(pretrained=True)

  nn.init.kaiming_normal(m.weight.data)


In [18]:
conv_layers = conv_model.features

In [19]:
FEATURE_WIDTH = 1024

In [20]:
class  top_half(nn.Module):
    def __init__(self,ks = 7):
        super(top_half,self).__init__()
        self.ks = ks
        self.classifier = nn.Linear(FEATURE_WIDTH,len(img_set.classes),bias = True)
        self.flatten = Flatten()
        nn.init.constant_(self.classifier.weight, 1)
        nn.init.constant_(self.classifier.bias, 0)
        
    def forward(self,x):
        x = F.relu(x,inplace=True)
        x = F.avg_pool2d(x,kernel_size = self.ks, stride = 1 )
        x = self.flatten(x)
        x = self.classifier(x)
        return x

In [21]:
top_half_  = top_half()

if CUDA:
    top_half_.cuda()
    conv_layers.cuda()

In [25]:
def action(*args,**kwargs):
    """
    single training step, 
    take in data, spit out loss/ metric
    and 
    """
    x,y = args[0]
    y = torch.LongTensor(np.array(y).astype(int))
    if CUDA:
        x,y = x.cuda(),y.cuda()
    opt.zero_grad()
    y_ = top_half_(conv_layers(x))
    
    loss = loss_func(y_,y)
    acc = accuracy(y_,y)
    
    loss.backward()
    opt.step()
    
    if kwargs["ite"] % 10 ==9:
        save_model(conv_layers,"convlayers2.%s.npy"%(VERSION))
        save_model(top_half_,"food_top.%s.npy"%(VERSION))
    
    return {"loss":loss.item(),
            "acc":acc.item()}

def val_action(*args,**kwargs):
    x,y = args[0]
    y = torch.LongTensor(np.array(y).astype(int))
    
    if CUDA:
        x,y = x.cuda(),y.cuda()
    y_ = top_half_(conv_layers(x))
    
    loss = loss_func(y_,y)
    acc = accuracy(y_,y)
    
    return {"loss":loss.item(),
            "acc":acc.item()}

In [None]:
loss_func = nn.CrossEntropyLoss()
opt = Adam(list(top_half_.parameters()) + list(conv_layers.parameters()))

trainer = Trainer(trn_set, val_dataset = val_set, batch_size = 32, print_on = 5)

trainer.action = action
trainer.val_action = val_action

In [27]:
# load_model(dense_conv2,"food_dense_conv2.0.0.1.npy")
# load_model(top_half_,"food_top.0.0.1.npy")

In [28]:
trainer.train(2)

⭐[ep_0_i_2519]	acc	0.388✨	loss	2.457: 100%|██████████| 2524/2524 [41:57<00:00,  1.00it/s]
😎[val_ep_0_i_632]	acc	0.400😂	loss	2.394: 100%|██████████| 633/633 [05:57<00:00,  1.77it/s]
⭐[ep_1_i_2519]	acc	0.613✨	loss	1.588: 100%|██████████| 2524/2524 [42:48<00:00,  1.02s/it]
😎[val_ep_1_i_632]	acc	0.534😂	loss	1.786: 100%|██████████| 633/633 [07:19<00:00,  1.44it/s]


### Resnet

In [15]:
from torchvision.models.resnet import resnet101

In [16]:
conv_model = resnet101(pretrained = True)

In [17]:
conv_model.fc = nn.Linear(2048,len(img_set.imgs),bias = True)
nn.init.constant_(conv_model.fc.weight,1)
nn.init.constant_(conv_model.fc.bias,0)

Parameter containing:
tensor([ 0.,  0.,  0.,  ...,  0.,  0.,  0.])

In [18]:
torch.cuda.empty_cache()
if CUDA:
    conv_model.cuda()
loss_func = nn.CrossEntropyLoss()
opt = Adam(list(conv_model.layer4.parameters())+list(conv_model.fc.parameters()))

In [None]:
def action(*args,**kwargs):
    """
    single training step, 
    take in data, spit out loss/ metric
    and 
    """
    x,y = args[0]
    y = torch.LongTensor(np.array(y).astype(int))
    if CUDA:
        x,y = x.cuda(),y.cuda()
    opt.zero_grad()
    y_ = conv_model(x)
    
    loss = loss_func(y_,y)
    acc = accuracy(y_,y)
    
    loss.backward()
    opt.step()
    
    if kwargs["ite"] % 10 ==9:
        save_model(conv_model,"food_rn101.%s.npy"%(VERSION))
    
    return {"loss":loss.item(),
            "acc":acc.item()}

def val_action(*args,**kwargs):
    x,y = args[0]
    y = torch.LongTensor(np.array(y).astype(int))
    
    if CUDA:
        x,y = x.cuda(),y.cuda()
    y_ = conv_model(x)
    
    loss = loss_func(y_,y)
    acc = accuracy(y_,y)
    
    return {"loss":loss.item(),
            "acc":acc.item()}

trainer = Trainer(trn_set, val_dataset = val_set, batch_size = 16, print_on = 5)

trainer.action = action
trainer.val_action = val_action

In [None]:
trainer.train(2)

⭐[ep_0_i_5044]	acc	0.713✨	loss	1.227: 100%|██████████| 5048/5048 [1:38:33<00:00,  1.17s/it]
😎[val_ep_0_i_1265]	acc	0.651😂	loss	1.328: 100%|██████████| 1266/1266 [08:54<00:00,  2.37it/s]
⭐[ep_1_i_2919]	acc	0.738✨	loss	0.919:  58%|█████▊    | 2921/5048 [55:58<40:45,  1.15s/it]  

### Excercise

Please work on at least 2 of the following challenge

1. Optimize all the layers instead of only linear classifier
2. Optimize the last convblock(the conv block close the linear layer) and linear classifier
3. Try other image classify datasets, like [monekey image set](https://www.kaggle.com/slothkong/10-monkey-species) or [flower classifying problem](https://www.kaggle.com/alxmamaev/flowers-recognition) or [blood cell images](https://www.kaggle.com/paultimothymooney/blood-cells). You'll pretty soon find out convolutional neural network is a universal tool for this kind of problem
4. Try keras to work out a better accuaracy, keras import pretrained model by ```from keras.application import ... ```

how we break down a pytorch model to several pytorch models:

```python
dense_conv1 = nn.Sequential(*[getattr(conv_model.features,nn_name) for nn_name in ["conv0","norm0","relu0","pool0","denseblock1","transition1",
                                                                                   "denseblock2","transition2","denseblock3","transition3",]])

dense_conv2 = nn.Sequential(*[getattr(conv_model.features,nn_name) for nn_name in ["denseblock4","norm5"]])
```

