In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        pass
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session



# This code is for generating predictions only

## For this code to work as intended, please make sure the following:

1. _exp_name is the same as your training code
2. Classifier is the same as you training code
3. Your trained model is save as a dataset and loaded with + Add data (See slides for detail)
4. model.load_state_dict({path}) is the correct path to the model

In [None]:
_exp_name = "sample"

In [None]:
# Import necessary packages.
import numpy as np
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random
from datetime import datetime
now = str(datetime.now())

In [None]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

In [None]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((224, 224)),
    # You may add some transforms here.
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
])



In [None]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label

### Loader

In [None]:
batch_size = 64
_dataset_dir = "../input/ml2022spring-hw3b/food11"
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

In [None]:
_your_dataset_name = "../input/model18-2" 
_your_dataset_name1 = "../input/model18-3"
_your_dataset_name2 = "../input/model18"
if not len(_your_dataset_name):
    print(_your_dataset_name)
    assert ("Default name detected, please fill in the name of your dataset(model checkpoint)")
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
model_best = models.resnext50_32x4d(pretrained = False).to(device)
model_best.load_state_dict(torch.load(f"{_your_dataset_name}/{_exp_name}_best.ckpt"))
model_best.eval()
model_best1 = models.resnext50_32x4d(pretrained = False).to(device)
model_best1.load_state_dict(torch.load(f"{_your_dataset_name1}/{_exp_name}_best.ckpt"))
model_best1.eval()
model_best2 = models.resnext50_32x4d(pretrained = False).to(device)
model_best2.load_state_dict(torch.load(f"{_your_dataset_name2}/{_exp_name}_best.ckpt"))
model_best2.eval()

### Predict

#### Modify this part to do test time augmentation

In [None]:
prediction = []
with torch.no_grad():
    for data,_ in test_loader:
        test_pred = model_best(data.to(device))+model_best1(data.to(device))+model_best2(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

In [None]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)