In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
count = 0
val_names = [] 

# get the list of all images in the validation set  
for dirname, _, filenames in os.walk('/kaggle/input/ifood-data/val_set'):
    for filename in filenames:
        val_names.append(filename)
        # print(os.path.join(dirname, filename))
        count += 1
print(count)

In [None]:
train_names = [] 
# get the list of all images in the training set 
for dirname, _, filenames in os.walk('/kaggle/input/ifood-data/train_set'):
    for filename in filenames:
        # print(os.path.join(dirname, filename))
        train_names.append(filename)
        count += 1
print(count)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import torch.nn.functional as F
import numpy as np
import os

from PIL import Image 
from torch.utils.data import Dataset

NUM_TRAIN = 0
NUM_VAL = 0

class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. 
    Extendstorchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

class MyDataset(Dataset):
    """Custom dataset object for loading images and image labels
    Extends torch.utils.data.Dataset
    -----------
    image_path: location of the folder where the images are stored 
    image_names: list of all of the image files at the location 
    label_path: the path to the csv of the corresponding label to each image
    """                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          
    def __init__(self, 
                 image_path, 
                 image_names, 
                 label_path,  transform=None):

        self.image_path = image_path 
        self.image_names = image_names 
        self.transform = transform
        # reads a csv of all the labels 
        self.labels = pd.read_csv(label_path, 
                                  names=["img_name", "label"])
        
    def get_class_label(self, image_name):
        y = self.labels[self.labels["img_name"] == image_name].iloc[0]["label"]
        return y
        
    def __getitem__(self, index):
        path = self.image_path + "/" + self.image_names[index]
        x = Image.open(path)
        y = self.get_class_label(path.split('/')[-1])
        
        # apply transformations 
        if self.transform is not None:
            x = self.transform(x)
        return x, y
    
    def __len__(self):
        return len(self.image_names)

# the following will apply a list of transformations to the images
# transformations for the training set 
train_transforms = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.768986940,.6641706 ,0.5923363),
                         (0.18613161, 0.22524446, 0.23932885))
])

# transformations for the validation set 
val_transforms = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.768986940,.6641706 ,0.5923363),
                         (0.18613161, 0.22524446, 0.23932885))
])

# transformations for the test set 
test_transforms = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.768986940,.6641706 ,0.5923363),
                         (0.18613161, 0.22524446, 0.23932885))
])    

# create dataset for training, validation images with the correct labels
# use of the path variable assumes that train_set, val_set and test_set are 
# all stored at the same location 

path = "/kaggle/input/ifood-data/" 

train_data = MyDataset(path +"train_set/train_set", # location of the train_set
                       train_names, # list of image_names 
                       "/kaggle/input/ifood-rice/train_info.csv", # csv for labels 
                       transform= train_transforms) # apply the transformations 

val_data = MyDataset(path +"val_set/val_set", 
                     val_names, 
                     "/kaggle/input/ifood-rice/val_info.csv", 
                     transform= val_transforms)

# load testing images from path 
test_data = ImageFolderWithPaths(path + "test_set", 
                                 transform = test_transforms)

In [None]:
batch_size = 16 # edit to change batch size 

# create data loader objects for batch gradient descent 
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=True)

# load an image one at a time for test set 
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1)

In [None]:
!nvidia-smi

In [None]:
USE_GPU = True # set this to False if you want to use CPU 

dtype = torch.float32 
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 11800
print('using device:', device)

In [None]:
def check_accuracy(loader, model):
    """calculates model accuracy on data loader 
    ----------
    loader: the dataset to check on, e.g train_loader, val_loader, test_loader
    model: a pytorch deeplearning model
    ----------
    output: prints the accuracy 
    """
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            # idenitfy the group with the highest value 
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

def train(model, optimizer, epochs):
    """performs gradient descent with optimizer 
    ------------
    model: a pytorch model 
    optimizer: optimizer from pytorch.optim 
    epochs: the number of epochs to train for 
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        
        for t, (x, y) in enumerate(train_loader):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % 100 == 0:
                print(t, loss.item())
            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy(val_loader, model)
                print()

In [None]:
# load a previously trained model from path 
model = torch.load("/kaggle/input/workingresnet152-trainpth/resnet152_train4.pth")

In [None]:
print(model.fc)

In [None]:
# you can edit the parameters of the optimizer
optimizer = optim.SGD(model.parameters(), 
                      lr=.0005, momentum=.9, nesterov=True)
train(model, optimizer, 6)

In [None]:
# check accuracy 
check_accuracy(val_loader, model)
model.eval()
model.to(device=device)

# load testing images and write predicted labels into a txt file 
with open('submission_thurs1.txt', 'w') as file:
    file.write("img_name,label\n")
    with torch.no_grad():
        for x, y, path in test_loader:
            name = path[0][-15:]
            x = x.to(device=device, dtype=dtype)
            scores = model(x)
            out_labels = [int(x) for x in (torch.topk(scores, 3)[1][0])]
            file.write(name + "," + str(out_labels[0]) + " " + str(out_labels[1]) + " " + str(out_labels[2]) + "\n")
print("done writing")

In [None]:
torch.save(model, "/kaggle/working/resnet152_10epochs.pth")