スコア : base で 10epc 1fold 0.03768 tiny で 10epc 5fold 0.03643  
派生 : mixup  
追加 : convnext
  

In [13]:
import os
if 'KAGGLE_URL_BASE' in set(os.environ.keys()) :
    # os.environ.pop('TPU_PROCESS_ADDRESSES')
    !pip install lightning

In [14]:
import os
import gc
import re
import sys
import time
import copy
import random 
import glob 
import zipfile
import shutil

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

import lightning.pytorch as pl  
from lightning.pytorch import seed_everything
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2   
from PIL import Image

import transformers

from tqdm import tqdm
from sklearn.metrics import log_loss, accuracy_score, roc_auc_score
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [15]:
class CFG :
    debug_one_epoch = False
    debug_one_fold = False
    only_infer = False
    num_workers = 16
    batch_size = 64
    num_epochs = 10
    lr = 1e-4
    early_stopping_round = 5
    warmup_prop = 0.1
    random_seed = 42
    n_splits = 5
    model_name = "convnext_tiny.in12k_ft_in1k" # timm で使うモデル名  convnext_small.fb_in22k
    pretrained_path = None
    train_dir = None # 学習データセットのパス
    test_dir = None # テストデータセットのパス
    optimizer = torch.optim.AdamW
    criterion = nn.BCEWithLogitsLoss()
    scheduler = transformers.get_linear_schedule_with_warmup
    input_imgsize = 224
    upscale_imsize = 256
    mixup_alpha = 1.0
    mixup_finish_epoch = 5
    data_dir = "../input/dogs-vs-cats-redux-kernels-edition/"
    kaggle_working_dir = "/kaggle/working/"
  
def seed_torch(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_torch(CFG.random_seed)

if  CFG.debug_one_epoch :
    CFG.num_epochs = 1

print('KAGGLE_URL_BASE' in set(os.environ.keys()))


False


In [16]:
submission = pd.read_csv(os.path.join(CFG.data_dir, "sample_submission.csv"))
if 'KAGGLE_URL_BASE' in set(os.environ.keys()) :
    kaggle_train_dir = os.path.join(CFG.kaggle_working_dir, "train")
    # すでに解凍されている場合は解凍しない
    if not os.path.exists(kaggle_train_dir) :
        shutil.unpack_archive(os.path.join(CFG.data_dir, "train.zip"), CFG.kaggle_working_dir)
    
    kaggle_test_dir = os.path.join(CFG.kaggle_working_dir, "test")
    if not os.path.exists(kaggle_test_dir) :
        shutil.unpack_archive(os.path.join(CFG.data_dir, "test.zip"), CFG.kaggle_working_dir)
        
    CFG.data_dir = CFG.kaggle_working_dir
    
CFG.train_dir = os.path.join(CFG.data_dir, "train")
CFG.test_dir = os.path.join(CFG.data_dir, "test")

train_list = glob.glob(os.path.join(CFG.data_dir, "train", "*.jpg"))
test_list = glob.glob(os.path.join(CFG.data_dir, "test", "*.jpg"))

In [17]:
train_df = pd.DataFrame(train_list, columns=["path"])
train_df["class"] = train_df["path"].apply(lambda x : x.split("/")[-1].split(".")[0])
train_df["class"] = train_df["class"].map({"dog" : 1, "cat" : 0})
test_df = pd.DataFrame(test_list, columns=["path"])
test_df["class"] = -1
# test_df に対しては path の数字が昇順であることを保証するために id を追加
test_df["id"] = test_df["path"].apply(lambda x : int(x.split("/")[-1].split(".")[0]))
test_df = test_df.sort_values("id").reset_index(drop=True)

In [18]:
train_transform = A.Compose([
    A.Resize(CFG.upscale_imsize, CFG.upscale_imsize),
    A.RandomCrop(CFG.input_imgsize, CFG.input_imgsize),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.Normalize(),
    ToTensorV2()
])
test_transform = A.Compose([
    A.Resize(CFG.upscale_imsize, CFG.upscale_imsize),
    A.CenterCrop(CFG.input_imgsize, CFG.input_imgsize),
    A.Normalize(),
    ToTensorV2()
])
tta_a_transform = A.Compose([
    A.Resize(CFG.upscale_imsize, CFG.upscale_imsize),
    A.CenterCrop(CFG.input_imgsize, CFG.input_imgsize),
    A.HorizontalFlip(p=1.0),
    A.Normalize(),
    ToTensorV2()
])

In [19]:
class DogsCatsDataset(Dataset) :
    def __init__(self, df, transform=None) :
        self.df = df # さっきの pandas dataframe を受け取る
        self.transform = transform # 画像の変換処理を受け取る

    def __len__(self) :
        return len(self.df)
    
    def __getitem__(self, idx) :
        img = Image.open(self.df.iloc[idx, 0])
        img = self.transform(image = np.array(img))["image"]
        label = self.df.iloc[idx, 1].astype(np.float32)
        return img, label

In [20]:
class GeM(nn.Module) :
    def __init__(self, p=3, eps=1e-6) :
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps
        
    def forward(self, x) :
        return self.gem(x, p=self.p, eps=self.eps)
    
    def gem(self, x, p=3, eps=1e-6) :
        return nn.functional.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)

In [21]:
class DogCatModel(nn.Module) :
    def __init__(self) :
        super(DogCatModel, self).__init__()
        self.model = timm.create_model(CFG.model_name, pretrained=True,num_classes=0,global_pool = "") # (bs,c,h,w)
        self.model.global_pool = nn.Identity()
        self.pool = GeM()
        self.fc1 = nn.Linear(self.model.num_features, 1)
        
    def forward(self, x) :
        x = self.model(x)
        x = self.pool(x).squeeze(3).squeeze(2)
        x = self.fc1(x)
        return x

In [22]:
class dog_vs_cats_pl_model(pl.LightningModule) :
    def __init__(self, model) :
        super(dog_vs_cats_pl_model, self).__init__()
        self.model = model
        self.criterion = CFG.criterion
        
    def forward(self, x) :
        return self.model(x)
    
    def training_step(self, batch, batch_idx) :
        img, label = batch
        # 6epoch 目までは mixup
        # timm で簡単に書き直せるらしいので、 todo
        if self.current_epoch < CFG.mixup_finish_epoch :
            mixup_alpha = CFG.mixup_alpha
            lam = np.random.beta(mixup_alpha, mixup_alpha)
            index = torch.randperm(img.size(0))
            img = lam*img + (1-lam)*img[index]
            label = lam*label + (1-lam)*label[index]
            
        output = self(img)
        loss = self.criterion(output.squeeze(-1), label)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log("lr", self.trainer.optimizers[0].param_groups[0]["lr"], on_step=True, on_epoch=False, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx) :
        img, label = batch
        output = self(img)
        loss = self.criterion(output.squeeze(-1), label)
        self.log("valid_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def predict_step(self, batch, batch_idx) :
        img, _ = batch
        output = self(img)
        output = torch.sigmoid(output).cpu().numpy()
        return output
        
    
    def configure_optimizers(self) :
        optimizer = CFG.optimizer(self.parameters(), lr=CFG.lr)
        num_training_steps = len(self.train_dataloader)*CFG.num_epochs
        num_warmup_steps = int(num_training_steps * CFG.warmup_prop)
        scheduler = {
            "scheduler" :  transformers.get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps),
            "interval" : "step",
            "frequency" : 1
        }
        
        return [optimizer], [scheduler]
    

In [23]:
def run_train_cv_pl(train, test):
    kf = KFold(n_splits=CFG.n_splits, shuffle=True, random_state=CFG.random_seed)
    oof = np.zeros((len(train), 1)) 
    predictions =[]
    test_df = test.copy()
    for fold, (train_idx, valid_idx) in enumerate(kf.split(train)) :
        print(f"====================fold : {fold}====================")
        train_df = train.iloc[train_idx].reset_index(drop=True)
        valid_df = train.iloc[valid_idx].reset_index(drop=True)
        
        train_dataset = DogsCatsDataset(train_df, transform=train_transform)
        valid_dataset = DogsCatsDataset(valid_df, transform=test_transform)
        test_dataset = DogsCatsDataset(test_df, transform=test_transform)
        tta_a_dataset = DogsCatsDataset(test_df, transform=tta_a_transform)
        
        train_loader = DataLoader(train_dataset, batch_size=CFG.batch_size, shuffle=True, num_workers=CFG.num_workers, drop_last=True, pin_memory=True)
        valid_loader = DataLoader(valid_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers)
        test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers)
        tta_a_loader = DataLoader(tta_a_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers)
        
        model = DogCatModel()
        lightning_model = dog_vs_cats_pl_model(model)
        lightning_model.train_dataloader = train_loader
        lightning_model.valid_dataloader = valid_loader

        early_stopping = EarlyStopping(
            monitor="valid_loss",
            mode="min", 
            patience=CFG.early_stopping_round,
        )
        checkpoint = ModelCheckpoint(
            monitor="valid_loss", 
            mode="min", 
            dirpath="checkpoints", 
            filename=f"{CFG.model_name}_fold{fold}", 
            save_top_k=1,
        )
        seed_everything(CFG.random_seed)
        logger = pl.loggers.TensorBoardLogger("logs", name=f"{CFG.model_name}_fold{fold}")
        trainer = pl.Trainer(max_epochs=CFG.num_epochs, accelerator="gpu", precision="16-mixed", logger=logger, callbacks=[early_stopping, checkpoint])
        trainer.fit(lightning_model, train_loader, valid_loader)

        # best_model = DogCatModel()
        # best_model.load_state_dict(torch.load(os.path.join("checkpoints", f"{CFG.model_name}_fold{fold}.ckpt"))["state_dict"])
        # best_model.eval()
        
        valid_preds_list = trainer.predict(lightning_model, valid_loader)
        valid_preds_arr = np.concatenate(valid_preds_list)
        oof[valid_idx] = valid_preds_arr
        
        test_preds_list = trainer.predict(lightning_model, test_loader)
        test_preds_arr = np.concatenate(test_preds_list)
        
        tta_a_preds_list = trainer.predict(lightning_model, tta_a_loader)
        tta_a_preds_arr = np.concatenate(tta_a_preds_list)
        
        test_preds_arr = (test_preds_arr + tta_a_preds_arr) / 2
        
        predictions.append(test_preds_arr)
        
        del model, lightning_model, trainer
        gc.collect()
        # torch.cuda.empty_cache()
        
        if CFG.debug_one_fold :
            break
    
    predictions = np.mean(predictions, axis=0)
    
    return {
        "oof" : oof,
        "predictions" : predictions
    }

In [24]:
def main() :
    if CFG.only_infer :
        test_dataset = DogsCatsDataset(test_df, transform=test_transform)
        test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers)
        model = DogCatModel()
        lightning_model = dog_vs_cats_pl_model(model)
        predictions = []
        for fold in range(CFG.n_splits) :
            model = lightning_model.load_from_checkpoint(f"checkpoints/{CFG.model_name}_fold{fold}.ckpt")
            trainer = pl.Trainer(accelerator="gpu", precision="16-mixed", logger=False)
            predictions.append(trainer.predict(model, test_loader))
        submission["label"] = np.mean(predictions, axis=0)
        submission.to_csv("submission.csv", index=False)
        
    else :
        result = run_train_cv_pl(train_df, test_df)
        oof_preds = result["oof"]
        predictions = result["predictions"]
        submission["label"] = predictions
        submission.to_csv("submission.csv", index=False)
        train_df["oof_preds"] = oof_preds   
        train_df.to_csv("oof_preds.csv", index=False)
        if CFG.debug_one_fold == False :
            print(f"oof log loss : {log_loss(train_df['class'], oof_preds)}")
        
if __name__ == "__main__" :
    main()



Seed set to 42


Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params | Mode 
--------------------------------------------------------
0 | model     | DogCatModel       | 4.0 M  | train
1 | criterion | BCEWithLogitsLoss | 0      | train
--------------------------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params
16.035    Total estimated model params size (MB)
339       Modules in trai

Epoch 0: 100%|██████████| 312/312 [01:16<00:00,  4.09it/s, v_num=0, train_loss_step=0.0225, lr=3.12e-8, valid_loss_step=0.000988, valid_loss_epoch=0.0256, train_loss_epoch=0.0796]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 312/312 [01:16<00:00,  4.07it/s, v_num=0, train_loss_step=0.0225, lr=3.12e-8, valid_loss_step=0.000988, valid_loss_epoch=0.0256, train_loss_epoch=0.0796]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 79/79 [00:06<00:00, 12.52it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 196/196 [00:15<00:00, 12.77it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 196/196 [00:16<00:00, 11.98it/s]


Seed set to 42
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params | Mode 
--------------------------------------------------------
0 | model     | DogCatModel       | 4.0 M  | train
1 | criterion | BCEWithLogitsLoss | 0      | train
--------------------------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params
16.035    Total estimated model params size (MB)
339       Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 312/312 [01:10<00:00,  4.40it/s, v_num=0, train_loss_step=0.0311, lr=3.12e-8, valid_loss_step=0.00231, valid_loss_epoch=0.0305, train_loss_epoch=0.0796]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 312/312 [01:11<00:00,  4.38it/s, v_num=0, train_loss_step=0.0311, lr=3.12e-8, valid_loss_step=0.00231, valid_loss_epoch=0.0305, train_loss_epoch=0.0796]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 79/79 [00:07<00:00, 11.00it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 196/196 [00:15<00:00, 12.65it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 196/196 [00:17<00:00, 11.44it/s]


Seed set to 42
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params | Mode 
--------------------------------------------------------
0 | model     | DogCatModel       | 4.0 M  | train
1 | criterion | BCEWithLogitsLoss | 0      | train
--------------------------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params
16.035    Total estimated model params size (MB)
339       Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 312/312 [01:09<00:00,  4.47it/s, v_num=0, train_loss_step=0.0691, lr=3.12e-8, valid_loss_step=0.0391, valid_loss_epoch=0.0278, train_loss_epoch=0.0796]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 312/312 [01:10<00:00,  4.43it/s, v_num=0, train_loss_step=0.0691, lr=3.12e-8, valid_loss_step=0.0391, valid_loss_epoch=0.0278, train_loss_epoch=0.0796]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 79/79 [00:07<00:00, 10.60it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 196/196 [00:15<00:00, 12.33it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 196/196 [00:16<00:00, 11.61it/s]


Seed set to 42
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params | Mode 
--------------------------------------------------------
0 | model     | DogCatModel       | 4.0 M  | train
1 | criterion | BCEWithLogitsLoss | 0      | train
--------------------------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params
16.035    Total estimated model params size (MB)
339       Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 312/312 [01:07<00:00,  4.60it/s, v_num=0, train_loss_step=0.0288, lr=3.12e-8, valid_loss_step=0.000239, valid_loss_epoch=0.0356, train_loss_epoch=0.0747]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 312/312 [01:08<00:00,  4.56it/s, v_num=0, train_loss_step=0.0288, lr=3.12e-8, valid_loss_step=0.000239, valid_loss_epoch=0.0356, train_loss_epoch=0.0747]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 79/79 [00:06<00:00, 11.58it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 196/196 [00:17<00:00, 11.22it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 196/196 [00:18<00:00, 10.87it/s]


Seed set to 42
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params | Mode 
--------------------------------------------------------
0 | model     | DogCatModel       | 4.0 M  | train
1 | criterion | BCEWithLogitsLoss | 0      | train
--------------------------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params
16.035    Total estimated model params size (MB)
339       Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 312/312 [01:08<00:00,  4.55it/s, v_num=0, train_loss_step=0.0347, lr=3.12e-8, valid_loss_step=0.00263, valid_loss_epoch=0.0301, train_loss_epoch=0.077]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 312/312 [01:09<00:00,  4.52it/s, v_num=0, train_loss_step=0.0347, lr=3.12e-8, valid_loss_step=0.00263, valid_loss_epoch=0.0301, train_loss_epoch=0.077]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 79/79 [00:06<00:00, 11.81it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 196/196 [00:16<00:00, 11.79it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 196/196 [00:17<00:00, 11.53it/s]
oof log loss : 0.033991093324274024


In [25]:
submission = pd.read_csv('submission.csv')
submission['label'] = submission['label'].clip(0.01, 0.99)
submission.to_csv('submission_clip.csv', index=False)