In [1]:
%%bash
pip install transformers -q



In [2]:
import os
import cv2
import torch
import pickle
import numpy as np
import pandas as pd

from torchvision import transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader as DL
from sklearn.preprocessing import LabelEncoder
from transformers import AutoFeatureExtractor, SwinForImageClassification, SwinModel, SwinConfig

In [3]:
def get_image(path: str) -> np.ndarray:
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB)
    return cv2.resize(src=image, dsize=(384, 384), interpolation=cv2.INTER_AREA)

le = LabelEncoder()

NAMES = [
    "swin-tiny-patch4-window7-224",
    "swin-small-patch4-window7-224",
    "swin-base-patch4-window7-224",
    "swin-base-patch4-window12-384",
]

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
TRANSFORM = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize([0.485, 0.456, 0.406], 
                                                     [0.229, 0.224, 0.225]),
                               ])

In [4]:
class DS(Dataset):
    def __init__(self, base_path: str, filenames: np.ndarray, transform):
        self.base_path = base_path
        self.filenames = filenames
        self.transform = transform
    
    def __len__(self):
        return self.filenames.shape[0]
    
    def __getitem__(self, idx):
        return self.transform(get_image(os.path.join(self.base_path, self.filenames[idx])))

In [5]:
train_df = pd.read_csv("../input/sorghum-id-fgvc-9/train_cultivar_mapping.csv")

broken_images = [filename for filename in train_df.image if filename not in os.listdir("../input/sorghum-id-fgvc-9/train_images")]
for broken_image in broken_images:
    index = train_df.index[train_df.image == broken_image]
    train_df = train_df.drop(index=index)
    
filenames = train_df.image.copy().values
labels    = train_df.cultivar.copy().values 

labels = le.fit_transform(labels)
np.save("labels.npy", labels)

features = np.zeros((len(filenames), 1024))

In [6]:
pretrained = AutoFeatureExtractor.from_pretrained("microsoft/" + NAMES[3])
model = SwinModel.from_pretrained("microsoft/" + NAMES[3]).to(DEVICE)

dataloader_setup = DS("../input/sorghum-id-fgvc-9/train_images", filenames, TRANSFORM)
dataloader = DL(dataloader_setup, batch_size=64, shuffle=False)

features = torch.zeros(1, 1024).to(DEVICE)

for X in dataloader:
    X = X.to(DEVICE)
    with torch.no_grad():
        output = model(X).last_hidden_state
    features = torch.cat((features, output[:, 0, :]), dim=0)

features = features[1:].detach().cpu().numpy()

np.save("features.npy", features)

Downloading:   0%|          | 0.00/70.1k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/255 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/339M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/swin-base-patch4-window12-384 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
