In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torchvision.models as models

from sklearn import metrics
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings(action = 'ignore')

In [None]:
print(f"PyTorch version:{torch.__version__}") 

PyTorch version:1.13.0


In [None]:
#M1 Mac GPU
#Python 3.10 이상 & PyTorch 1.12.1 이상
device = torch.device('mps:0' if torch.backends.mps.is_available() else 'cpu') #
print (f"PyTorch version:{torch.__version__}") # 1.12.1 이상
print(f"MPS 장치를 지원하도록 build 되었는지: {torch.backends.mps.is_built()}") # True 여야 합니다.
print(f"MPS 장치가 사용 가능한지: {torch.backends.mps.is_available()}") # True 여야 합니다.
!python -c 'import platform;print(platform.platform())'

#device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

PyTorch version:1.13.0
MPS 장치를 지원하도록 build 되었는지: True
MPS 장치가 사용 가능한지: True
macOS-12.3-arm64-arm-64bit


device(type='mps', index=0)

### Hyperparameter Setting

In [None]:
CFG = {
    "IMG_SIZE" : 512,
    "EPOCHS" : 5,
    "LEARNING_RATE": 1e-4,
    "BATCH_SIZE": 16,
    "SEED" : 2
}

### Fixed RandomSeed

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(CFG["SEED"]) #Seed 고정

### Data Pre-processing

In [None]:
#Load Data
df_train = pd.read_csv("./data_original/data/train.csv")
df_test = pd.read_csv("./data_original/data/test.csv")

df_train["N_category"].value_counts(ascending = False)

1    514
0    486
Name: N_category, dtype: int64

#### 1. 암의 장경과 label

In [None]:
df_train[df_train["암의 장경"].isnull() & (df_train["N_category"] == 1)]
#임파선 전이를 하였지만 암의 장경이 결측값인 경우는 positive label(514)의 1% 미만인 4개 뿐임. >> 해당 데이터 제거

Unnamed: 0,ID,img_path,mask_path,나이,수술연월일,진단명,암의 위치,암의 개수,암의 장경,NG,...,ER_Allred_score,PR,PR_Allred_score,KI-67_LI_percent,HER2,HER2_IHC,HER2_SISH,HER2_SISH_ratio,BRCA_mutation,N_category
112,BC_01_0553,./train_imgs/BC_01_0553.png,-,40,2019-07-09,1,1,1,,2.0,...,8.0,1.0,7.0,34.25,0.0,2.0,0.0,1.11,,1
123,BC_01_0596,./train_imgs/BC_01_0596.png,-,49,2017-04-28,1,2,1,,2.0,...,5.0,1.0,7.0,30.0,0.0,1.0,,,,1
303,BC_01_1470,./train_imgs/BC_01_1470.png,-,37,2016-11-24,1,1,1,,3.0,...,7.0,1.0,6.0,10.74,0.0,1.0,,,0.0,1
424,BC_01_2056,./train_imgs/BC_01_2056.png,-,61,2015-09-14,1,1,1,,3.0,...,,0.0,,5.0,1.0,3.0,,,,1


In [None]:
df_train[(df_train["암의 장경"] <= 0) & (df_train["N_category"] == 1)]
#임파선 전이를 하였지만 암의 장경이 0인 경우는 positive label(514)의 1% 미만인 3개 뿐임. >> 해당 데이터 제거

Unnamed: 0,ID,img_path,mask_path,나이,수술연월일,진단명,암의 위치,암의 개수,암의 장경,NG,...,ER_Allred_score,PR,PR_Allred_score,KI-67_LI_percent,HER2,HER2_IHC,HER2_SISH,HER2_SISH_ratio,BRCA_mutation,N_category
109,BC_01_0547,./train_imgs/BC_01_0547.png,-,57,2019-04-22,1,2,1,0.0,2.0,...,,0.0,,28.27,1.0,3.0,,,,1
115,BC_01_0561,./train_imgs/BC_01_0561.png,-,54,2019-03-14,1,2,1,0.0,,...,,0.0,,42.81,0.0,1.0,,,,1
117,BC_01_0567,./train_imgs/BC_01_0567.png,-,42,2019-02-01,1,2,1,0.0,3.0,...,7.0,1.0,7.0,47.79,0.0,0.0,,,,1


In [None]:
drop_row_idx_01 = df_train[df_train["암의 장경"].isnull() & (df_train["N_category"] == 1)].index
drop_row_idx_02 = df_train[(df_train["암의 장경"] <= 0) & (df_train["N_category"] == 1)].index
df_train.drop(drop_row_idx_01, axis = 0, inplace = True)
df_train.drop(drop_row_idx_02, axis = 0, inplace = True)
df_train.reset_index(drop = True, inplace = True)
len(df_train)

993

In [None]:
#결측치 보완
df_train["암의 장경"] = df_train["암의 장경"].fillna(df_train["암의 장경"].mean())
df_train = df_train.fillna(0)

df_test["암의 장경"] = df_test["암의 장경"].fillna(df_test["암의 장경"].mean())
df_test = df_test.fillna(0)

#### 2. Tubule formation (HG_score_1)과 label

In [None]:
def get_ratio_featureVStarget(feature:str, ratio_criterion = 0.15):
    labels = [0, 1]
    feature_value_counts = df_train[feature].value_counts()
    drop_idx_label_list = []
    
    print(feature + "\n")
    print(feature_value_counts)
    for label in labels:
        print("\n<", label, ">")
        for feature_unique in feature_value_counts.index:
            feature_unique = int(feature_unique)
            result = len(df_train[(df_train[feature] == feature_unique) & (df_train["N_category"] == label)]) / feature_value_counts[feature_unique]
            if result < ratio_criterion:
                drop_idx_label_list.append([feature_unique, label])
            print(feature_unique, ":",  result)
    print("\n 10% 이하 feature index : label: ", drop_idx_label_list)
    return drop_idx_label_list

def delete_traindata(drop_lists:list, feature:str):
    for drop_list in drop_lists:
        drop_row_idx = df_train[(df_train[feature] == drop_list[0]) & (df_train["N_category"] == drop_list[1])].index
        df_train.drop(drop_row_idx, axis = 0, inplace = True)
        df_train.reset_index(drop = True, inplace = True)
        print(len(df_train))
        

In [None]:
feature_name = "HG_score_1"
delete_traindata(get_ratio_featureVStarget(feature_name), feature_name)

HG_score_1

3.0    615
2.0    206
0.0     89
1.0     83
Name: HG_score_1, dtype: int64

< 0 >
3 : 0.367479674796748
2 : 0.5145631067961165
0 : 0.9438202247191011
1 : 0.8433734939759037

< 1 >
3 : 0.6325203252032521
2 : 0.4854368932038835
0 : 0.056179775280898875
1 : 0.1566265060240964

 10% 이하 feature index : label:  [[0, 1]]
988


#### 3. Nuclear Pleomorphism (HG_score_2)과 label

In [None]:
feature_name = "HG_score_2"
delete_traindata(get_ratio_featureVStarget(feature_name), feature_name)

HG_score_2

2.0    497
3.0    268
1.0    139
0.0     84
Name: HG_score_2, dtype: int64

< 0 >
2 : 0.38832997987927564
3 : 0.33955223880597013
1 : 0.8489208633093526
0 : 1.0

< 1 >
2 : 0.6116700201207244
3 : 0.6604477611940298
1 : 0.1510791366906475
0 : 0.0

 10% 이하 feature index : label:  [[0, 1]]
988


#### 4. Mitotic Rate (HG_score_3)와 label

In [None]:
feature_name = "HG_score_3"
delete_traindata(get_ratio_featureVStarget(feature_name), feature_name)

HG_score_3

1.0    637
2.0    156
3.0    111
0.0     81
4.0      3
Name: HG_score_3, dtype: int64

< 0 >
1 : 0.47095761381475665
2 : 0.3525641025641026
3 : 0.42342342342342343
0 : 1.0
4 : 1.0

< 1 >
1 : 0.5290423861852434
2 : 0.6474358974358975
3 : 0.5765765765765766
0 : 0.0
4 : 0.0

 10% 이하 feature index : label:  [[0, 1], [4, 1]]
988
988


#### 5. T_category

In [None]:
feature_name = "T_category"
delete_traindata(get_ratio_featureVStarget(feature_name), feature_name)

T_category

1.0    597
2.0    305
0.0     67
3.0     15
4.0      4
Name: T_category, dtype: int64

< 0 >
1 : 0.5494137353433836
2 : 0.29180327868852457
0 : 1.0
3 : 0.13333333333333333
4 : 0.0

< 1 >
1 : 0.45058626465661644
2 : 0.7081967213114754
0 : 0.0
3 : 0.8666666666666667
4 : 1.0

 10% 이하 feature index : label:  [[3, 0], [4, 0], [0, 1]]
986
986
986


#### 6. Dual Probe ISH Group Definitions

In [None]:
feature_name = "HER2_IHC"
delete_traindata(get_ratio_featureVStarget(feature_name), feature_name)

HER2_IHC

2.0    322
1.0    310
0.0    270
3.0     84
Name: HER2_IHC, dtype: int64

< 0 >
2 : 0.4472049689440994
1 : 0.4774193548387097
0 : 0.5518518518518518
3 : 0.5119047619047619

< 1 >
2 : 0.5527950310559007
1 : 0.5225806451612903
0 : 0.44814814814814813
3 : 0.4880952380952381

 10% 이하 feature index : label:  []


#### 7. Mutation 여부

In [None]:
feature_name = "BRCA_mutation"
delete_traindata(get_ratio_featureVStarget(feature_name), feature_name)

BRCA_mutation

0.0    982
2.0      4
Name: BRCA_mutation, dtype: int64

< 0 >
0 : 0.49185336048879835
2 : 0.25

< 1 >
0 : 0.5081466395112016
2 : 0.75

 10% 이하 feature index : label:  []


#### 8. Confirm

In [None]:
df_train["N_category"].value_counts()

1    502
0    484
Name: N_category, dtype: int64

In [None]:
df_train.shape

(986, 28)

In [None]:
assert df_train.duplicated().sum() == 0
assert df_train.isnull().sum().sum() == 0

In [None]:
#Train / Validation Split
df_train, df_val, train_labels, val_labels = train_test_split(df_train.drop(columns = ["N_category"]),
                                                              df_train["N_category"],
                                                              test_size = 0.20,
                                                              random_state = CFG["SEED"])

In [None]:
len(df_train)

788

In [None]:
df_train.corr()

Unnamed: 0,나이,진단명,암의 위치,암의 개수,암의 장경,NG,HG,HG_score_1,HG_score_2,HG_score_3,...,ER,ER_Allred_score,PR,PR_Allred_score,KI-67_LI_percent,HER2,HER2_IHC,HER2_SISH,HER2_SISH_ratio,BRCA_mutation
나이,1.0,-0.038191,0.051735,-0.119127,0.127336,0.093297,0.038615,0.07534,0.051959,-0.009986,...,-0.06491,0.03402,-0.206476,-0.139083,-0.004382,0.001427,0.034529,0.049752,0.022949,-0.010448
진단명,-0.038191,1.0,-0.012793,0.113807,-0.046082,-0.140691,-0.135438,-0.13983,-0.129428,-0.101089,...,0.107956,0.193821,0.045481,0.124718,-0.039696,-0.092851,0.003772,-0.023692,-0.00386,-0.026806
암의 위치,0.051735,-0.012793,1.0,0.007784,-0.005584,0.083808,0.070736,0.065673,0.078235,0.069702,...,-0.017127,-0.012661,-0.012018,0.000366,0.032449,-0.008468,-0.001212,0.049344,-0.029634,0.023328
암의 개수,-0.119127,0.113807,0.007784,1.0,0.021251,0.122681,0.083197,0.132031,0.10204,-0.010854,...,0.080815,0.151459,0.059811,0.13684,0.103477,-0.111823,0.020915,0.055067,0.006976,0.023815
암의 장경,0.127336,-0.046082,-0.005584,0.021251,1.0,0.252928,0.280348,0.186273,0.239336,0.249179,...,-0.101798,-0.068172,-0.058886,-0.055193,0.255256,-0.006572,0.00251,0.067173,0.198497,-0.011374
NG,0.093297,-0.140691,0.083808,0.122681,0.252928,1.0,0.701167,0.51875,0.806051,0.499323,...,-0.226925,-0.052482,-0.208276,-0.118824,0.425087,0.095879,0.129152,0.145762,0.058263,0.026093
HG,0.038615,-0.135438,0.070736,0.083197,0.280348,0.701167,1.0,0.773355,0.843047,0.777704,...,-0.317607,-0.220544,-0.272869,-0.247039,0.465966,0.163884,0.112519,0.118223,0.056021,-0.019472
HG_score_1,0.07534,-0.13983,0.065673,0.132031,0.186273,0.51875,0.773355,1.0,0.713936,0.507668,...,-0.142104,-0.09629,-0.152026,-0.153974,0.285143,0.102676,0.048718,0.105913,0.020735,-0.046794
HG_score_2,0.051959,-0.129428,0.078235,0.10204,0.239336,0.806051,0.843047,0.713936,1.0,0.652483,...,-0.268358,-0.182635,-0.243917,-0.228803,0.415114,0.158998,0.12168,0.137943,0.054408,-0.016527
HG_score_3,-0.009986,-0.101089,0.069702,-0.010854,0.249179,0.499323,0.777704,0.507668,0.652483,1.0,...,-0.332231,-0.252695,-0.278104,-0.257236,0.452362,0.179882,0.11484,0.035212,0.06631,-0.003929


In [None]:
#Numeric Feature Scaling / Categorical Feature Label-Encoding

def get_values(value):
    return value.values.reshape(-1, 1)

numeric_cols = ["나이", "암의 장경", "ER_Allred_score", "PR_Allred_score", "KI-67_LI_percent", "HER2_SISH_ratio"]
ignore_cols = ["ID", "img_path", "mask_path", "수술연월일", "N_category"]

for col in df_train.columns:
    if col in ignore_cols:
        continue
    if col in numeric_cols:
        scaler = StandardScaler()
        df_train[col] = scaler.fit_transform(get_values(df_train[col]))
        df_val[col] = scaler.transform(get_values(df_val[col]))
        df_test[col] = scaler.transform(get_values(df_test[col]))
    else:
        le = LabelEncoder()
        df_train[col] = le.fit_transform(get_values(df_train[col]))
        df_val[col] = le.transform(get_values(df_val[col]))
        df_test[col] = le.transform(get_values(df_test[col]))

### CustomDataset

In [None]:
class CustomDataset(Dataset):
    def __init__(self, df_medical, labels, transforms = None):
        self.df_medical = df_medical
        self.transforms = transforms
        self.labels = labels
    
    def __getitem__(self, index):
        img_path_prefix = "./data_original/data"
        img_path = img_path_prefix + self.df_medical["img_path"].iloc[index][1:]
        print(img_path)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #BGR >> RGB
        
        if self.transforms is not None:
            image = self.transforms(image = image)["image"]
            
        if self.labels is not None:
            tabular = torch.Tensor(self.df_medical.drop(columns = ["ID", "img_path", "mask_path", "수술연월일"]).iloc[index])
            label = self.labels[index]
            return image, tabular, label
        else:
            tabular = torch.Tensor(self.df_medical.drop(columns = ["ID", "img_path", "수술연월일"]).iloc[index])
            return image, tabular
        
    def __len__(self):
        return len(self.df_medical)

In [None]:
img_path_prefix = "./data_original/data"
img_path = img_path_prefix + df_train["img_path"].iloc[1][1:]
img_path

'./data_original/data/train_imgs/BC_01_3076.png'

In [None]:
img_path = "./data_original/data/train_imgs/BC_01_0001.png"
image = cv2.imread(img_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image

array([[[244, 244, 244],
        [244, 244, 244],
        [244, 244, 244],
        ...,
        [244, 244, 244],
        [244, 244, 244],
        [244, 244, 244]],

       [[244, 244, 244],
        [244, 244, 244],
        [244, 244, 244],
        ...,
        [244, 244, 244],
        [244, 244, 244],
        [244, 244, 244]],

       [[244, 244, 244],
        [244, 244, 244],
        [244, 244, 244],
        ...,
        [244, 244, 244],
        [244, 244, 244],
        [244, 244, 244]],

       ...,

       [[242, 242, 242],
        [242, 242, 242],
        [243, 243, 243],
        ...,
        [240, 241, 241],
        [240, 241, 241],
        [240, 240, 241]],

       [[242, 242, 242],
        [242, 242, 242],
        [243, 243, 243],
        ...,
        [240, 241, 241],
        [240, 241, 241],
        [240, 240, 241]],

       [[233, 239, 242],
        [240, 243, 242],
        [240, 244, 242],
        ...,
        [240, 240, 240],
        [240, 240, 240],
        [240, 240, 240]]

In [None]:
train_transforms = A.Compose([
                A.HorizontalFlip(),
                A.VerticalFlip(),
                A.Rotate(limit = 90, border_mode = cv2.BORDER_CONSTANT, p = 0.3),
                A.Resize(CFG["IMG_SIZE"], CFG['IMG_SIZE']),
                A.Normalize(mean = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225), max_pixel_value = 255.0, always_apply = False, p = 1.0),
    ToTensorV2()
    ])

test_transforms = A.Compose([
    A.Resize(CFG["IMG_SIZE"], CFG['IMG_SIZE']),
    A.Normalize(mean = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225), max_pixel_value = 255.0, always_apply = False, p = 1.0),
    ToTensorV2()
])

In [None]:
train_dataset = CustomDataset(df_train, train_labels.values, train_transforms)
train_loader = DataLoader(train_dataset, batch_size = CFG["BATCH_SIZE"], shuffle = True, num_workers = 0)

val_dataset = CustomDataset(df_val, val_labels.values, test_transforms)
val_loader = DataLoader(val_dataset, batch_size = CFG["BATCH_SIZE"], shuffle = False, num_workers = 0)

### Model Architecture

In [None]:
class ImgFeatureExtraction(nn.Module):
    def __init__(self):
        super(ImgFeatureExtraction, self).__init__()
        self.backbone = models.efficientnet_b0(pretrained = True)
        self.embedding = nn.Linear(1000, 512)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.embedding(x)
        return x

In [None]:
class TabularFeatureExtractor(nn.Module):
    def __init__(self):
        super(TabularFeatureExtractor, self).__init__()
        self.embedding = nn.Sequential(
            nn.Linear(in_features = 23, out_features = 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(in_features = 64, out_features = 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(in_features = 128, out_features = 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(in_features = 256, out_features = 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(in_features = 512, out_features = 512)
        )
    
    def forward(self, x):
        x = self.embedding(x)
        return x

In [None]:
class ClassificationModel(nn.Module):
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.img_feature_extractor = ImgFeatureExtraction()
        self.tabular_feature_extractor = TabularFeatureExtractor()
        self.classifier = nn.Sequential(
            nn.Linear(in_features = 1024, out_features = 1),
            nn.Sigmoid(),
        )
        
    def forward(self, img, tabular):
        img_feature = self.img_feature_extractor(img)
        tabular_feature = self.tabular_feature_extractor(tabular)
        feature = torch.cat([img_feature, tabular_feature], dim = -1)
        output = self.classifier(feature)
        return output

### Train

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.BCEWithLogitsLoss().to(device)
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, CFG["EPOCHS"] + 1):
        model.train()
        train_loss = []
        for img, tabular, label in tqdm(iter(train_loader)):
            img = img.float().to(device)
            tabular = tabular.float().to(device)
            label = label.float().to(device)
            
            optimizer.zero_grad()
            
            model_pred = model(img, tabular)
            
            loss = criterion(model_pred, label.reshape(-1, 1))
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
            
        val_loss, val_score = validation(model, criterion, val_loader, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] Val Score : [{val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_score)
            
        if best_score < val_score:
            best_score = val_score
            best_model = model
    
    return best_model


def validation(model, criterion, val_loader, device):
    model.eval()
    pred_labels = []
    true_labels = []
    val_loss = []
    threshold = 0.5
    with torch.no_grad():
        for img, tabular, label in tqdm(iter(val_loader)):
            true_labels += label.tolist()
            
            img = img.float().to(device)
            tabular = tabular.float().to(device)
            label = label.float().to(device)
            
            model_pred = model(img, tabular)
            
            loss = criterion(model_pred, label.reshape(-1, 1))
            
            val_loss.append(loss.item())
            
            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
        
        pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
        val_score = metrics.f1_score(y_true = true_labels, y_pred = pred_labels, average = "macro")
        return np.mean(val_loss), val_score

### Run

In [None]:
model = nn.DataParallel(ClassificationModel())
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = "max", factor = 0.5, patience = 1, threshold_mode = "abs", min_lr = 1e-8, verbose = True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/50 [00:00<?, ?it/s]

./data_original/data/train_imgs/BC_01_2268.png
./data_original/data/train_imgs/BC_01_1021.png
./data_original/data/train_imgs/BC_01_1864.png
./data_original/data/train_imgs/BC_01_1231.png
./data_original/data/train_imgs/BC_01_1202.png
./data_original/data/train_imgs/BC_01_1374.png
./data_original/data/train_imgs/BC_01_0923.png
./data_original/data/train_imgs/BC_01_3109.png


  0%|          | 0/50 [00:01<?, ?it/s]

Unexpected exception formatting exception. Falling back to standard exception



Traceback (most recent call last):
  File "/Users/doukkim/miniforge3/envs/dacon_bc2/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/v3/4ynwqd612pg7gcxwzzh5d8km0000gn/T/ipykernel_9864/3326714095.py", line 6, in <cell line: 6>
    infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)
  File "/var/folders/v3/4ynwqd612pg7gcxwzzh5d8km0000gn/T/ipykernel_9864/3773371507.py", line 11, in train
    for img, tabular, label in tqdm(iter(train_loader)):
  File "/Users/doukkim/miniforge3/envs/dacon_bc2/lib/python3.9/site-packages/tqdm/std.py", line 1180, in __iter__
    for obj in iterable:
  File "/Users/doukkim/miniforge3/envs/dacon_bc2/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 628, in __next__
    #      set before library resources we use are freed (which, at least in
  File "/Users/doukkim/miniforge3/envs/dacon_bc2/lib/python3.9/

In [None]:
test_dataset = CustomDataset(df_test, None, test_transforms)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    threshold = 0.5
    
    with torch.no_grad():
        for img, tabular in tqdm(iter(test_loader)):
            img = img.float().to(device)
            tabular = tabular.float().to(device)
            
            model_pred = model(img, tabular)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            
            preds += model_pred.tolist()
    
    preds = np.where(np.array(preds) > threshold, 1, 0)
    
    return preds

In [None]:
preds = inference(infer_model, test_loader, device)

  0%|          | 0/16 [00:00<?, ?it/s]

./data_original/data/test_imgs/BC_01_0011.png
./data_original/data/test_imgs/BC_01_0220.png
./data_original/data/test_imgs/BC_01_0233.png
./data_original/data/test_imgs/BC_01_0258.png
./data_original/data/test_imgs/BC_01_0260.png
./data_original/data/test_imgs/BC_01_0266.png
./data_original/data/test_imgs/BC_01_0269.png
./data_original/data/test_imgs/BC_01_0272.png
./data_original/data/test_imgs/BC_01_0275.png
./data_original/data/test_imgs/BC_01_0278.png
./data_original/data/test_imgs/BC_01_0279.png
./data_original/data/test_imgs/BC_01_0286.png
./data_original/data/test_imgs/BC_01_0296.png
./data_original/data/test_imgs/BC_01_0303.png
./data_original/data/test_imgs/BC_01_0310.png
./data_original/data/test_imgs/BC_01_0322.png


  6%|▋         | 1/16 [00:10<02:35, 10.38s/it]

./data_original/data/test_imgs/BC_01_0340.png
./data_original/data/test_imgs/BC_01_0344.png
./data_original/data/test_imgs/BC_01_0390.png
./data_original/data/test_imgs/BC_01_0488.png
./data_original/data/test_imgs/BC_01_0507.png
./data_original/data/test_imgs/BC_01_0511.png
./data_original/data/test_imgs/BC_01_0526.png
./data_original/data/test_imgs/BC_01_0632.png
./data_original/data/test_imgs/BC_01_0635.png
./data_original/data/test_imgs/BC_01_0712.png
./data_original/data/test_imgs/BC_01_0715.png
./data_original/data/test_imgs/BC_01_0721.png
./data_original/data/test_imgs/BC_01_0726.png
./data_original/data/test_imgs/BC_01_0790.png
./data_original/data/test_imgs/BC_01_0793.png
./data_original/data/test_imgs/BC_01_0801.png


 12%|█▎        | 2/16 [00:19<02:12,  9.47s/it]

./data_original/data/test_imgs/BC_01_0820.png
./data_original/data/test_imgs/BC_01_0848.png
./data_original/data/test_imgs/BC_01_0860.png
./data_original/data/test_imgs/BC_01_0884.png
./data_original/data/test_imgs/BC_01_0933.png
./data_original/data/test_imgs/BC_01_1011.png
./data_original/data/test_imgs/BC_01_1051.png
./data_original/data/test_imgs/BC_01_1053.png
./data_original/data/test_imgs/BC_01_1078.png
./data_original/data/test_imgs/BC_01_1079.png
./data_original/data/test_imgs/BC_01_1084.png
./data_original/data/test_imgs/BC_01_1109.png
./data_original/data/test_imgs/BC_01_1114.png
./data_original/data/test_imgs/BC_01_1139.png
./data_original/data/test_imgs/BC_01_1158.png
./data_original/data/test_imgs/BC_01_1160.png


 19%|█▉        | 3/16 [00:27<01:56,  8.96s/it]

./data_original/data/test_imgs/BC_01_1177.png
./data_original/data/test_imgs/BC_01_1183.png
./data_original/data/test_imgs/BC_01_1187.png
./data_original/data/test_imgs/BC_01_1196.png
./data_original/data/test_imgs/BC_01_1222.png
./data_original/data/test_imgs/BC_01_1246.png
./data_original/data/test_imgs/BC_01_1251.png
./data_original/data/test_imgs/BC_01_1257.png
./data_original/data/test_imgs/BC_01_1299.png
./data_original/data/test_imgs/BC_01_1304.png
./data_original/data/test_imgs/BC_01_1353.png
./data_original/data/test_imgs/BC_01_1361.png
./data_original/data/test_imgs/BC_01_1363.png
./data_original/data/test_imgs/BC_01_1391.png
./data_original/data/test_imgs/BC_01_1412.png
./data_original/data/test_imgs/BC_01_1438.png


 25%|██▌       | 4/16 [00:36<01:45,  8.76s/it]

./data_original/data/test_imgs/BC_01_1454.png
./data_original/data/test_imgs/BC_01_1460.png
./data_original/data/test_imgs/BC_01_1523.png
./data_original/data/test_imgs/BC_01_1531.png
./data_original/data/test_imgs/BC_01_1546.png
./data_original/data/test_imgs/BC_01_1550.png
./data_original/data/test_imgs/BC_01_1563.png
./data_original/data/test_imgs/BC_01_1566.png
./data_original/data/test_imgs/BC_01_1606.png
./data_original/data/test_imgs/BC_01_1614.png
./data_original/data/test_imgs/BC_01_1659.png
./data_original/data/test_imgs/BC_01_1660.png
./data_original/data/test_imgs/BC_01_1665.png
./data_original/data/test_imgs/BC_01_1678.png
./data_original/data/test_imgs/BC_01_1681.png
./data_original/data/test_imgs/BC_01_1689.png


 31%|███▏      | 5/16 [00:44<01:37,  8.82s/it]

./data_original/data/test_imgs/BC_01_1710.png
./data_original/data/test_imgs/BC_01_1742.png
./data_original/data/test_imgs/BC_01_1749.png
./data_original/data/test_imgs/BC_01_1751.png
./data_original/data/test_imgs/BC_01_1808.png
./data_original/data/test_imgs/BC_01_1828.png
./data_original/data/test_imgs/BC_01_1845.png
./data_original/data/test_imgs/BC_01_1857.png
./data_original/data/test_imgs/BC_01_1885.png
./data_original/data/test_imgs/BC_01_1896.png
./data_original/data/test_imgs/BC_01_1897.png
./data_original/data/test_imgs/BC_01_1902.png
./data_original/data/test_imgs/BC_01_1911.png
./data_original/data/test_imgs/BC_01_1912.png
./data_original/data/test_imgs/BC_01_1949.png
./data_original/data/test_imgs/BC_01_1955.png


 38%|███▊      | 6/16 [00:53<01:27,  8.74s/it]

./data_original/data/test_imgs/BC_01_1957.png
./data_original/data/test_imgs/BC_01_1994.png
./data_original/data/test_imgs/BC_01_2021.png
./data_original/data/test_imgs/BC_01_2041.png
./data_original/data/test_imgs/BC_01_2103.png
./data_original/data/test_imgs/BC_01_2131.png
./data_original/data/test_imgs/BC_01_2153.png
./data_original/data/test_imgs/BC_01_2191.png
./data_original/data/test_imgs/BC_01_2219.png
./data_original/data/test_imgs/BC_01_2227.png
./data_original/data/test_imgs/BC_01_2256.png
./data_original/data/test_imgs/BC_01_2258.png
./data_original/data/test_imgs/BC_01_2262.png
./data_original/data/test_imgs/BC_01_2263.png
./data_original/data/test_imgs/BC_01_2284.png
./data_original/data/test_imgs/BC_01_2315.png


 44%|████▍     | 7/16 [01:01<01:16,  8.52s/it]

./data_original/data/test_imgs/BC_01_2316.png
./data_original/data/test_imgs/BC_01_2321.png
./data_original/data/test_imgs/BC_01_2344.png
./data_original/data/test_imgs/BC_01_2356.png
./data_original/data/test_imgs/BC_01_2366.png
./data_original/data/test_imgs/BC_01_2400.png
./data_original/data/test_imgs/BC_01_2409.png
./data_original/data/test_imgs/BC_01_2410.png
./data_original/data/test_imgs/BC_01_2416.png
./data_original/data/test_imgs/BC_01_2422.png
./data_original/data/test_imgs/BC_01_2451.png
./data_original/data/test_imgs/BC_01_2452.png
./data_original/data/test_imgs/BC_01_2462.png
./data_original/data/test_imgs/BC_01_2495.png
./data_original/data/test_imgs/BC_01_2499.png
./data_original/data/test_imgs/BC_01_2501.png


 50%|█████     | 8/16 [01:10<01:07,  8.50s/it]

./data_original/data/test_imgs/BC_01_2508.png
./data_original/data/test_imgs/BC_01_2511.png
./data_original/data/test_imgs/BC_01_2522.png
./data_original/data/test_imgs/BC_01_2527.png
./data_original/data/test_imgs/BC_01_2550.png
./data_original/data/test_imgs/BC_01_2574.png
./data_original/data/test_imgs/BC_01_2581.png
./data_original/data/test_imgs/BC_01_2582.png
./data_original/data/test_imgs/BC_01_2595.png
./data_original/data/test_imgs/BC_01_2598.png
./data_original/data/test_imgs/BC_01_2622.png
./data_original/data/test_imgs/BC_01_2627.png
./data_original/data/test_imgs/BC_01_2629.png
./data_original/data/test_imgs/BC_01_2636.png
./data_original/data/test_imgs/BC_01_2637.png
./data_original/data/test_imgs/BC_01_2643.png


 56%|█████▋    | 9/16 [01:18<00:58,  8.38s/it]

./data_original/data/test_imgs/BC_01_2653.png
./data_original/data/test_imgs/BC_01_2662.png
./data_original/data/test_imgs/BC_01_2668.png
./data_original/data/test_imgs/BC_01_2672.png
./data_original/data/test_imgs/BC_01_2675.png
./data_original/data/test_imgs/BC_01_2677.png
./data_original/data/test_imgs/BC_01_2679.png
./data_original/data/test_imgs/BC_01_2685.png
./data_original/data/test_imgs/BC_01_2689.png
./data_original/data/test_imgs/BC_01_2692.png
./data_original/data/test_imgs/BC_01_2694.png
./data_original/data/test_imgs/BC_01_2697.png
./data_original/data/test_imgs/BC_01_2698.png
./data_original/data/test_imgs/BC_01_2707.png
./data_original/data/test_imgs/BC_01_2713.png
./data_original/data/test_imgs/BC_01_2714.png


 62%|██████▎   | 10/16 [01:26<00:49,  8.25s/it]

./data_original/data/test_imgs/BC_01_2715.png
./data_original/data/test_imgs/BC_01_2716.png
./data_original/data/test_imgs/BC_01_2718.png
./data_original/data/test_imgs/BC_01_2741.png
./data_original/data/test_imgs/BC_01_2742.png
./data_original/data/test_imgs/BC_01_2751.png
./data_original/data/test_imgs/BC_01_2759.png
./data_original/data/test_imgs/BC_01_2763.png
./data_original/data/test_imgs/BC_01_2764.png
./data_original/data/test_imgs/BC_01_2768.png
./data_original/data/test_imgs/BC_01_2770.png
./data_original/data/test_imgs/BC_01_2774.png
./data_original/data/test_imgs/BC_01_2781.png
./data_original/data/test_imgs/BC_01_2783.png
./data_original/data/test_imgs/BC_01_2784.png
./data_original/data/test_imgs/BC_01_2797.png


 69%|██████▉   | 11/16 [01:34<00:41,  8.25s/it]

./data_original/data/test_imgs/BC_01_2798.png
./data_original/data/test_imgs/BC_01_2803.png
./data_original/data/test_imgs/BC_01_2806.png
./data_original/data/test_imgs/BC_01_2810.png
./data_original/data/test_imgs/BC_01_2814.png
./data_original/data/test_imgs/BC_01_2818.png
./data_original/data/test_imgs/BC_01_2828.png
./data_original/data/test_imgs/BC_01_2850.png
./data_original/data/test_imgs/BC_01_2852.png
./data_original/data/test_imgs/BC_01_2855.png
./data_original/data/test_imgs/BC_01_2863.png
./data_original/data/test_imgs/BC_01_2865.png
./data_original/data/test_imgs/BC_01_2870.png
./data_original/data/test_imgs/BC_01_2874.png
./data_original/data/test_imgs/BC_01_2882.png
./data_original/data/test_imgs/BC_01_2883.png


 75%|███████▌  | 12/16 [01:42<00:33,  8.34s/it]

./data_original/data/test_imgs/BC_01_2885.png
./data_original/data/test_imgs/BC_01_2891.png
./data_original/data/test_imgs/BC_01_2892.png
./data_original/data/test_imgs/BC_01_2893.png
./data_original/data/test_imgs/BC_01_2894.png
./data_original/data/test_imgs/BC_01_2904.png
./data_original/data/test_imgs/BC_01_2905.png
./data_original/data/test_imgs/BC_01_2909.png
./data_original/data/test_imgs/BC_01_2910.png
./data_original/data/test_imgs/BC_01_2912.png
./data_original/data/test_imgs/BC_01_2914.png
./data_original/data/test_imgs/BC_01_2915.png
./data_original/data/test_imgs/BC_01_2919.png
./data_original/data/test_imgs/BC_01_2925.png
./data_original/data/test_imgs/BC_01_2933.png
./data_original/data/test_imgs/BC_01_2945.png


 81%|████████▏ | 13/16 [01:51<00:25,  8.38s/it]

./data_original/data/test_imgs/BC_01_2946.png
./data_original/data/test_imgs/BC_01_2949.png
./data_original/data/test_imgs/BC_01_2954.png
./data_original/data/test_imgs/BC_01_2955.png
./data_original/data/test_imgs/BC_01_2964.png
./data_original/data/test_imgs/BC_01_2970.png
./data_original/data/test_imgs/BC_01_2971.png
./data_original/data/test_imgs/BC_01_2973.png
./data_original/data/test_imgs/BC_01_2974.png
./data_original/data/test_imgs/BC_01_2975.png
./data_original/data/test_imgs/BC_01_2979.png
./data_original/data/test_imgs/BC_01_2980.png
./data_original/data/test_imgs/BC_01_2981.png
./data_original/data/test_imgs/BC_01_2995.png
./data_original/data/test_imgs/BC_01_2996.png
./data_original/data/test_imgs/BC_01_3003.png


 88%|████████▊ | 14/16 [02:01<00:17,  8.95s/it]

./data_original/data/test_imgs/BC_01_3009.png
./data_original/data/test_imgs/BC_01_3022.png
./data_original/data/test_imgs/BC_01_3023.png
./data_original/data/test_imgs/BC_01_3024.png
./data_original/data/test_imgs/BC_01_3027.png
./data_original/data/test_imgs/BC_01_3029.png
./data_original/data/test_imgs/BC_01_3052.png
./data_original/data/test_imgs/BC_01_3055.png
./data_original/data/test_imgs/BC_01_3057.png
./data_original/data/test_imgs/BC_01_3059.png
./data_original/data/test_imgs/BC_01_3074.png
./data_original/data/test_imgs/BC_01_3082.png
./data_original/data/test_imgs/BC_01_3114.png
./data_original/data/test_imgs/BC_01_3124.png
./data_original/data/test_imgs/BC_01_3167.png
./data_original/data/test_imgs/BC_01_3201.png


 94%|█████████▍| 15/16 [02:11<00:09,  9.15s/it]

./data_original/data/test_imgs/BC_01_3205.png
./data_original/data/test_imgs/BC_01_3211.png
./data_original/data/test_imgs/BC_01_3256.png
./data_original/data/test_imgs/BC_01_3257.png
./data_original/data/test_imgs/BC_01_3261.png
./data_original/data/test_imgs/BC_01_3328.png
./data_original/data/test_imgs/BC_01_3404.png
./data_original/data/test_imgs/BC_01_3418.png
./data_original/data/test_imgs/BC_01_3438.png
./data_original/data/test_imgs/BC_01_3446.png


100%|██████████| 16/16 [02:16<00:00,  8.52s/it]


In [None]:
preds

array([1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
       0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0])

### Submission

In [None]:
submit = pd.read_csv("./data_original/data/sample_submission.csv")

submit["N_category"] = preds
submit.to_csv("./submit/submission.csv", index = False)