In [None]:
import os
import time

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision as tv

import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
if torch.cuda.is_available():
    DEVICE = 'cuda'
else:
    DEVICE = 'cpu'
print('Device:', DEVICE)

In [None]:
if os.getcwd() == '/kaggle/working':
    INPUT_DIR = '/kaggle/input/plant-pathology-2020-fgvc7'
else:
    INPUT_DIR = './input'

IMAGE_DIR = os.path.join(INPUT_DIR, 'images')

In [None]:
train_csv = os.path.join(INPUT_DIR, 'train.csv')
test_csv = os.path.join(INPUT_DIR, 'test.csv')

train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)

CLASSES = list(train_df.columns[1:])

print('Classes:', CLASSES)

In [None]:
def read_image_as_numpy(name):
    path = os.path.join(IMAGE_DIR, name+'.jpg')
    tmp = Image.open(path)
    image = np.array(tmp)
    tmp.close()
    return image

In [None]:
class ImageDataset(torch.utils.data.Dataset):
    H = 1365
    W = 2048
    HCROP = 1344
    WCROP = 2016

    def __init__(self, df, size=None, tflag=False):
        super().__init__()
        self.df = df
        self.size = size
        self.tflag = tflag
        
        classes = df.columns[1:]
        if len(classes) == 0:
            self.labels = None
        else:
            v01 = df[classes].values
            self.labels = (v01 * np.array([0,1,2,3])).sum(axis=1)

        box = []
        if size is not None:
            h, w = size
            assert h <= self.HCROP and w <= self.WCROP
            if tflag:
                box.append(A.RandomCrop(self.HCROP, self.WCROP))
            else:
                box.append(A.CenterCrop(self.HCROP, self.WCROP))
            if h != self.HCROP or w != self.WCROP:
                box.append(A.Resize(h, w))       
        if tflag:
            add = [
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.5),
                A.RandomBrightness(p=0.5),
                A.RandomContrast(p=0.5),
                A.RandomGamma(p=0.5),
            ]
            box.extend(add)
        self.transform_numpy = A.Compose(box)
        box.append(A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)))
        box.append(ToTensorV2())
        self.transform_torch = A.Compose(box)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image = self.get_oriented_image(idx)
        image = self.transform_torch(image=image)['image']
        if self.labels is None:
            return image
        else:
            return image, self.labels[idx]

    def get_name(self, idx):
        return self.df.iloc[idx, 0]
    
    def get_oriented_image(self, idx):
        name = self.df.iloc[idx, 0]
        image = read_image_as_numpy(name)
        if image.shape[1] < image.shape[0]:
            image = image.transpose(1, 0, 2)
        return image

    def get(self, idx):
        image = self.get_oriented_image(idx)
        image = self.transform_numpy(image=image)['image']
        return image

In [None]:
def fetch_from_batch(batch):
    if isinstance(batch, torch.Tensor):
        x, y = batch, None
    else:
        x, y = batch
    return x, y

In [None]:
def make_features(model, nf, ds):
    dl = torch.utils.data.DataLoader(ds, batch_size=4, shuffle=False)
    model.eval()
    model.to(DEVICE)
    features = np.empty([len(ds), nf])
    beg = 0
    with torch.no_grad():
        for batch in dl:
            x, y = fetch_from_batch(batch)
            x = x.to(DEVICE)
            batch_features = model(x)
            end = beg + len(batch_features)
            features[beg:end] = batch_features.cpu()
            beg = end
    return features

In [None]:
rn18 = tv.models.resnet18(pretrained=True)
rn18.fc = nn.Identity()

sizes = [
    (224, 224),
    (224, 336),
    (336, 504),
    (448, 672),
    (672, 1008),
]

for size in sizes:
    dirname = 'rn18-%d-%d' % size
    os.system('mkdir -p ' + dirname)

    ds = ImageDataset(train_df, size, False)
    features = make_features(rn18, 512, ds)
    path = os.path.join(dirname, 'train.txt')
    np.savetxt(path, features, fmt='%.9e')

    ds = ImageDataset(test_df, size, False)
    features = make_features(rn18, 512, ds)
    path = os.path.join(dirname, 'test.txt')
    np.savetxt(path, features, fmt='%.9e')