# Food Classification with Food-101 Dataset

## Part 1: Feature Extraction with pretrained AlexNet

The Food-101 dataset is designed for a classification challenge. 101 food classes have 1000 images per class. 250 images from each class are designated for the validation set, 750 images per class for training.

Considering the dataset might be too large for us to work on using only GPU on Google Colab, we decided to keep only 55 classes of food. This notebook shows the step of extracting features using the pretrained AlexNet model. 

## 1.1 Exploration of the Dataset

Displaying the name of each class of food

In [0]:
with open('drive/My Drive/CV Project/food-101/meta/labels.txt','r') as f:
    labels = f.readlines()
foodClasses = [label[:-1].replace(" ",'_').lower() for label in labels]
print("Food Classes:")
print(" ")
for food in foodClasses:
  print(food)

Food Classes:
 
apple_pie
baby_back_ribs
baklava
beef_carpaccio
beef_tartare
beet_salad
beignets
bibimbap
bread_pudding
breakfast_burrito
bruschetta
caesar_salad
cannoli
caprese_salad
carrot_cake
ceviche
cheesecake
cheese_plate
chicken_curry
chicken_quesadilla
chicken_wings
chocolate_cake
chocolate_mousse
churros
clam_chowder
club_sandwich
crab_cakes
creme_brulee
croque_madame
cup_cakes
deviled_eggs
donuts
dumplings
edamame
eggs_benedict
escargots
falafel
filet_mignon
fish_and_chips
foie_gras
french_fries
french_onion_soup
french_toast
fried_calamari
fried_rice
frozen_yogurt
garlic_bread
gnocchi
greek_salad
grilled_cheese_sandwich
grilled_salmon
guacamole
gyoza
hamburger
hot_and_sour_soup


In [0]:
print ("number of food class in the dataset: ", len(foodClasses))

number of food class in the dataset:  55


## 1.2 Feature Extraction with pretrained AlexNet

In [0]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import glob
import os
import matplotlib.pyplot as plt
import torchvision.models as models
import cv2


In [14]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Load Pretrained AlexNet

In [8]:
models.alexnet(pretrained=True)

Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /root/.cache/torch/checkpoints/alexnet-owt-4df8aa71.pth
100%|██████████| 233M/233M [00:02<00:00, 83.8MB/s]


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [0]:
class Extractor(nn.Module):
    def __init__(self):
        super(Extractor, self).__init__()
        
        self.net = models.alexnet(pretrained=True)
        self.net.classifier = nn.Sequential(
            *(self.net.classifier[i] for i in range(6)))
        
    def forward(self, images): 
        return self.net(images)

### Load Testing and Training Data

In [0]:
# Resizing the images in the dataset

def read_img(img_path):
    img = plt.imread(img_path)
    img = cv2.resize(img,(224,224))
    img = img / 255
    img = img.transpose((2,0,1))
    return img
    

In [0]:
# Handling the dataset

with open("drive/My Drive/CV Project/food-101/meta/train.txt","r") as f:
    training_list = f.readlines()
with open("drive/My Drive/CV Project/food-101/meta/test.txt","r") as f:
    test_list = f.readlines()
    
with open('drive/My Drive/CV Project/food-101/meta/labels.txt','r') as f:
    labels = f.readlines()
    
training_list = ["drive/My Drive/CV Project/food-101/images/"+ path[:-1] +".jpg" for path in training_list]
test_list = ["drive/My Drive/CV Project/food-101/images/"+ path[:-1] +".jpg" for path in test_list]
labels = [label[:-1].replace(" ",'_').lower() for label in labels]

### Data Preprocessing with AlexNet

In [9]:
net = Extractor()
net.cuda()

Extractor(
  (net): AlexNet(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace=True)
      (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
    (classifier): Sequential(
      (0): Dropout(p=0.5, inplace=False)
      (1)

####Testing Data

In [0]:
import pickle

test_data = []
counter = 0
for i in range(len(test_list)):
    img = read_img(test_list[i])
    x = torch.FloatTensor(img).view(1,3,224,224)
    x = Variable(x.cuda())
    feature = net(x)
    label_id = labels.index(test_list[i].split("/")[-2])
    test_data.append((label_id,feature.cpu().data.numpy()))

In [0]:
 #Save preprocessed data into a pickle file
 
 with open("drive/My Drive/CV Project/test_data_alexnet.pkl","wb") as f:
        pickle.dump(test_data, f)

In [0]:
len(test_data)

13750

####Training Data

In [0]:
import pickle

training_data = []
counter = 0
for i in range(len(training_list)):
    img = read_img(training_list[i])
    x = torch.FloatTensor(img).view(1,3,224,224)
    x = Variable(x.cuda())
    feature = net(x)
    label_id = labels.index(training_list[i].split("/")[-2])
    training_data.append((label_id,feature.cpu().data.numpy()))

In [0]:
len(training_data)

41248

In [0]:
 #Save preprocessed data into a pickle file
 
 with open("drive/My Drive/CV Project/train_data_alexnet.pkl","wb") as f:
        pickle.dump(training_data, f)

In [31]:
print (len(testing_data2))

9
