# Library

# Configs

In [2]:
CFG = {"device": "cuda" if torch.cuda.is_available() else "cpu",
       "eva": {"model_name": "eva02_large_patch14_448.mim_in22k_ft_in22k_in1k",
               "imgsz": (448, 448)},
       "caformer": {"model_name": "caformer_b36.sail_in22k_ft_in1k_384",
                    "imgsz": (384, 384)},
       "seed": 42,
       "num_epochs": 50,
       "skf_n_splits": 5,
       "lr": 1e-5,
       "early_stop_count": 5,
       "batch_size": 16,
       "num_workers": 8}

# Fix Seed

In [3]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

seed_everything(CFG['seed'])

# Define categories & Number of Class

In [4]:
train_df = pd.read_csv('./data/train.csv')
train_df.head()

Unnamed: 0,img_path,upscale_img_path,label
0,./train/TRAIN_00000.jpg,./upscale_train/TRAIN_00000.png,Ruddy Shelduck
1,./train/TRAIN_00001.jpg,./upscale_train/TRAIN_00001.png,Gray Wagtail
2,./train/TRAIN_00002.jpg,./upscale_train/TRAIN_00002.png,Indian Peacock
3,./train/TRAIN_00003.jpg,./upscale_train/TRAIN_00003.png,Common Kingfisher
4,./train/TRAIN_00004.jpg,./upscale_train/TRAIN_00004.png,Common Kingfisher


In [5]:
CFG['num_classes'] = len(train_df['label'].unique())

In [6]:
categories = {}
for i, c in enumerate(train_df['label'].unique()):
    categories[c] = i

categories

{'Ruddy Shelduck': 0,
 'Gray Wagtail': 1,
 'Indian Peacock': 2,
 'Common Kingfisher': 3,
 'Common Rosefinch': 4,
 'Jungle Babbler': 5,
 'Common Tailorbird': 6,
 'White-Breasted Waterhen': 7,
 'Sarus Crane': 8,
 'Common Myna': 9,
 'Forest Wagtail': 10,
 'Indian Roller': 11,
 'Northern Lapwing': 12,
 'Indian Grey Hornbill': 13,
 'Hoopoe': 14,
 'Indian Pitta': 15,
 'Red-Wattled Lapwing': 16,
 'Cattle Egret': 17,
 'White-Breasted Kingfisher': 18,
 'Rufous Treepie': 19,
 'White Wagtail': 20,
 'House Crow': 21,
 'Coppersmith Barbet': 22,
 'Brown-Headed Barbet': 23,
 'Asian Green Bee-Eater': 24}

# Define Custom Model

In [7]:
# Use ML-Decoder
class CAFormerCustomModel(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=True, num_classes=CFG['num_classes']).to(CFG['device'])
        self.model.head = nn.Identity()
        
        self.ml_decoder_head = MLDecoder(num_classes=CFG['num_classes'],
                                         decoder_embedding=768,
                                         initial_num_features=768).to(CFG['device'])
        
    def forward(self, x):
        x = self.model.forward_features(x)
        x = self.ml_decoder_head(x)
        
        return x

In [8]:
# Use ML-Decoder
class EVACustomModel(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=True, num_classes=CFG['num_classes']).to(CFG['device'])
        self.model.head = nn.Identity()
        
        self.ml_decoder_head = MLDecoder(num_classes=CFG['num_classes'],
                                         decoder_embedding=768,
                                         initial_num_features=self.model.embed_dim).to(CFG['device'])
        
    def forward(self, x):
        x = self.model.forward_features(x)
        x = self.ml_decoder_head(x)
        
        return x

# Define Test Dataset

In [9]:
# Datasets
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        
    def __getitem__(self, idx):
        # origin
        img_path = self.df.iloc[idx]['img_path']
        image = cv2.imread(os.path.join('./data', img_path))
        
        if self.transform:
            image = self.transform(image=np.array(image))['image']
        
        return image
       
    def __len__(self):
        return len(self.df)

# Transforms

In [10]:
def get_transforms(imgsz):
    mean=(0.485, 0.456, 0.406)
    std=(0.229, 0.224, 0.225)

    test_transform = A.Compose([
        A.Resize(imgsz[0], imgsz[1], p=1.0),
        A.Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0),
    ], p=1.0)
    
    return test_transform

In [11]:
# TTA(Test Time Augmentation)
tta_transforms = tta.Compose([
    tta.HorizontalFlip()
])

# Make DataLoader

In [12]:
test_df = pd.read_csv('./data/test.csv')
caformer_test_dataset = TestDataset(test_df, transform=get_transforms(CFG['caformer']['imgsz']))
eva_test_dataset = TestDataset(test_df, transform=get_transforms(CFG['eva']['imgsz']))
efficientv2_test_dataset = TestDataset(test_df, transform=get_transforms((256, 256)))
                                    
caformer_test_dataloader = DataLoader(caformer_test_dataset,
                                      batch_size=CFG['batch_size'],
                                      num_workers=CFG['num_workers'],
                                      shuffle=False)

eva_test_dataloader = DataLoader(eva_test_dataset,
                                 batch_size=CFG['batch_size'],
                                 num_workers=CFG['num_workers'],
                                 shuffle=False)
                                       
                                       
efficientv2_test_dataloader = DataLoader(efficientv2_test_dataset,
                                         batch_size=CFG['batch_size']*2,
                                         num_workers=CFG['num_workers'],
                                         shuffle=False)

# Convert categorical data to numerical data

In [13]:
test_label = {v:k for k, v in categories.items()}

# Test(single model)

In [14]:
# model = EVACustomModel(CFG['eva']['model_name'])
# # model = nn.DataParallel(model)
# model.load_state_dict(torch.load('./for_ensemble/eva02_large_patch14_448.mim_in22k_ft_in22k_in1k_sss_fold1_best.pth'))
# tta_model = tta.ClassificationTTAWrapper(model, tta_transforms)

In [15]:
# all_predictions = []

# tta_model.eval()
# with torch.no_grad():
#     for inputs in tqdm.tqdm(eva_test_dataloader):
#         inputs = inputs.to(CFG['device'])

#         outputs = tta_model(inputs)
#         preds = torch.argmax(outputs, dim=-1)
        
#         all_predictions.extend(preds.detach().cpu().numpy())

# Test(ensemble)

In [16]:
caformer_model_list = sorted(glob.glob('./for_ensemble/caformer_b36.sail_in22k_ft_in1k_384_sss_fold*.pth'))
eva_model_list = sorted(glob.glob('./for_ensemble/eva02_large_patch14_448.mim_in22k_ft_in22k_in1k_*.pth'))
efficientv2_model_list = sorted(glob.glob('./for_ensemble/tf_efficientnetv2_xl.in21k_ft_in1k_sss_fold*.pth'))
num_models = len(caformer_model_list) + len(eva_model_list) + len(efficientv2_model_list)
all_predictions = None

In [None]:
# CAFormer
for i, path in enumerate(caformer_model_list):    
    print(f'----- Step {i} -----')
    
    model = CAFormerCustomModel(CFG['caformer']['model_name'])
    model = nn.DataParallel(model)
    model.load_state_dict(torch.load(path))
    tta_model = tta.ClassificationTTAWrapper(model, tta_transforms)
    
    fold_preds = []
    
    tta_model.eval()
    with torch.no_grad():
        for inputs in tqdm.tqdm(caformer_test_dataloader):
            inputs = inputs.to(CFG['device'])

            outputs = tta_model(inputs)

            fold_preds.extend(outputs.detach().cpu().numpy())
    
    if all_predictions is None:
        # oof_preds = np.array(fold_preds) / CFG['skf_n_splits']
        all_predictions = np.array(fold_preds) / num_models
    else:
        # oof_preds += (np.array(fold_preds) / CFG['skf_n_splits'])
        all_predictions += (np.array(fold_preds) / num_models)
                      
    del model
    torch.cuda.empty_cache()

In [24]:
# EVA
for i, path in enumerate(eva_model_list):
    print(f'----- fold {i} -----')
    
    model = EVACustomModel(CFG['eva']['model_name'])
    model = nn.DataParallel(model)
    model.load_state_dict(torch.load(path))
    tta_model = tta.ClassificationTTAWrapper(model, tta_transforms)
    
    fold_preds = []
    
    tta_model.eval()
    with torch.no_grad():
        for inputs in tqdm.tqdm(eva_test_dataloader):
            inputs = inputs.to(CFG['device'])

            outputs = tta_model(inputs)

            fold_preds.extend(outputs.detach().cpu().numpy())
    
    if all_predictions is None:
        # oof_preds = np.array(fold_preds) / CFG['skf_n_splits']
        all_predictions = np.array(fold_preds) / num_models
    else:
        # oof_preds += (np.array(fold_preds) / CFG['skf_n_splits'])
        all_predictions += (np.array(fold_preds) / num_models)
                      
    del model
    torch.cuda.empty_cache()

----- fold 0 -----


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
100%|██████████| 425/425 [06:30<00:00,  1.09it/s]


----- fold 1 -----


100%|██████████| 425/425 [06:27<00:00,  1.10it/s]


----- fold 2 -----


100%|██████████| 425/425 [06:27<00:00,  1.10it/s]


----- fold 3 -----


100%|██████████| 425/425 [06:27<00:00,  1.10it/s]


----- fold 4 -----


100%|██████████| 425/425 [06:27<00:00,  1.10it/s]


----- fold 5 -----


100%|██████████| 425/425 [06:27<00:00,  1.10it/s]


In [None]:
# efficientnet
for i, path in enumerate(efficientv2_model_list):
    print(f'----- fold {i} -----')
    
    model = timm.create_model('tf_efficientnetv2_xl.in21k_ft_in1k', pretrained=True, num_classes=CFG['num_classes']).to(CFG['device'])
    model = nn.DataParallel(model)
    model.load_state_dict(torch.load(path))
    tta_model = tta.ClassificationTTAWrapper(model, tta_transforms)
    
    fold_preds = []
    
    tta_model.eval()
    with torch.no_grad():
        for inputs in tqdm.tqdm(caformer_test_dataloader):
            inputs = inputs.to(CFG['device'])

            outputs = tta_model(inputs)

            fold_preds.extend(outputs.detach().cpu().numpy())
    
    if all_predictions is None:
        # oof_preds = np.array(fold_preds) / CFG['skf_n_splits']
        all_predictions = np.array(fold_preds) / num_models
    else:
        # oof_preds += (np.array(fold_preds) / CFG['skf_n_splits'])
        all_predictions += (np.array(fold_preds) / num_models)
                      
    del model
    torch.cuda.empty_cache()

In [25]:
ensemble_preds = np.argmax(all_predictions, axis=1)

# Submission

In [26]:
results = [test_label[p] for p in ensemble_preds]

In [27]:
submission = pd.read_csv('./data/sample_submission.csv')
submission.head()

Unnamed: 0,id,label
0,TEST_00000,Indian Roller
1,TEST_00001,Indian Roller
2,TEST_00002,Indian Roller
3,TEST_00003,Indian Roller
4,TEST_00004,Indian Roller


In [28]:
submission['label'] = results
submission.to_csv('./submission.csv', index=False)