## Import

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import random
import pandas as pd
import numpy as np
import os
import cv2
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# 이미지 증강해주는 모델
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torchvision.models as models

from sklearn import metrics
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings(action='ignore') 

# Xception
import tensorflow as tf
# from tensorflow.keras.applications.xception import Xception

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# 1. 모델 세팅

## Hyperparameter Setting

In [None]:
CFG = {
    # Resnet, efficient 
    'IMG_SIZE':224,
    'EPOCHS':20,
    #'EPOCHS':16,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':20,
    'SEED':41
}

## Fixed RandomSeed

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

# 2. 데이터 전처리

## Data Pre-processing
  - 1. Load Dataframe
  - 2. 결측치 보완
  - 3. Train / Validation Split
  - 4. Numeric Feature Scaling / Categorical Featrue Label-Encoding

In [None]:
%cd '/content/drive/MyDrive/5팀(윤정준, 최애림, 진청아, 이정현a)/코드'

/content/drive/.shortcut-targets-by-id/1niQkF2oFYkuV2aahMfHbpa3kXs5yrcAd/5팀(윤정준, 최애림, 진청아, 이정현a)/코드


### 1) 데이터 불러오기

In [None]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [None]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 28 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   ID                 1000 non-null   object 
 1   img_path           1000 non-null   object 
 2   mask_path          1000 non-null   object 
 3   나이                 1000 non-null   int64  
 4   수술연월일              1000 non-null   object 
 5   진단명                1000 non-null   int64  
 6   암의 위치              1000 non-null   int64  
 7   암의 개수              1000 non-null   int64  
 8   암의 장경              931 non-null    float64
 9   NG                 949 non-null    float64
 10  HG                 914 non-null    float64
 11  HG_score_1         908 non-null    float64
 12  HG_score_2         908 non-null    float64
 13  HG_score_3         911 non-null    float64
 14  DCIS_or_LCIS_여부    1000 non-null   int64  
 15  DCIS_or_LCIS_type  126 non-null    float64
 16  T_category         996 no

-> 암의 장경, NG, HG, HG_score_1, HG_score_2, HG_score_3, DCIS_or_LCIS_type, T_category, ER, ER_Allred_score, PR, PR_Allred_score, KI-67_LI_percent, HER2, HER2_IHC, HER2_SISH, HER2_SISH_ratio, BRCA_mutation (18개 컬럼) null값 존재

In [None]:
import matplotlib.pyplot as plt

plt.rc('font', family='NanumBarunGothic') 

 유방암에서 겨드랑이 림프절 전
이율은 약 3∼37%(7-11)으로 보고되는데 림프절 전이에 영
향을 주는 가장 중요한 인자는 종양의 크기이다. 

다발성 종양은 상대위험도 5.93으로 단발성 종양에 비
해 약 6배 더 높은 위험도를 보여주었고(P=0.022),

 본 연구에서는 다발성 종양이 단발성 종양에 비해 상대
위험도 5.92로서 림프절 전이의 위험이 매우 높았다.

https://synapse.koreamed.org/upload/synapsedata/pdfdata/0037jkss/jkss-80-10.pdf


### 2) 컬럼 전처리 
- null값 : 1) 암의 장경 69개 mean값으로 처리, 2) 연속형 컬럼(4개) mean값으로 처리 3) category 컬럼 0으로 채움


In [None]:
def age_(x):
    if x<30:
        return 0
    elif 30<=x<40:
        return 1
    elif 40<=x<50:
        return 2
    elif 50<=x<60:
        return 3
    elif 60<=x<70:
        return 4
    elif 70<=x<80:
        return 5
    elif  x>=80:
        return 6
    
train_df['나이'] = train_df['나이'].apply(lambda x : age_(x))

test_df['나이'] = test_df['나이'].apply(lambda x : age_(x))

In [None]:
train_df['암의 장경'] = train_df['암의 장경'].fillna(train_df['암의 장경'].mean())
train_df['KI-67_LI_percent'] = train_df['KI-67_LI_percent'].fillna(train_df['KI-67_LI_percent'].mean())
train_df.loc[train_df['HER2_SISH'] == 1, 'HER2_SISH_ratio'] = train_df.loc[train_df['HER2_SISH'] == 1, 'HER2_SISH_ratio'].fillna(train_df.loc[train_df['HER2_SISH'] == 1,'HER2_SISH_ratio'].mean())
train_df.loc[train_df['HER2_SISH'] == 0, 'HER2_SISH_ratio'] = train_df.loc[train_df['HER2_SISH'] == 0, 'HER2_SISH_ratio'].fillna(train_df.loc[(train_df['HER2_SISH'] == 0) & (train_df['HER2_SISH_ratio'] < 30),'HER2_SISH_ratio'].mean())
train_df.loc[train_df['ER'] == 1, 'ER_Allred_score'] = train_df.loc[train_df['ER'] == 1, 'ER_Allred_score'].fillna(train_df.loc[train_df['ER'] == 1,'ER_Allred_score'].mean())
train_df.loc[train_df['ER'] == 0, 'ER_Allred_score'] = train_df.loc[train_df['ER'] == 0, 'ER_Allred_score'].fillna(train_df.loc[train_df['ER'] == 0,'ER_Allred_score'].mean())
train_df.loc[train_df['PR'] == 1, 'PR_Allred_score'] = train_df.loc[train_df['PR'] == 1, 'PR_Allred_score'].fillna(train_df.loc[train_df['PR'] == 1,'PR_Allred_score'].mean())
train_df.loc[train_df['PR'] == 0, 'PR_Allred_score'] = train_df.loc[train_df['PR'] == 0, 'PR_Allred_score'].fillna(train_df.loc[train_df['PR'] == 0,'PR_Allred_score'].mean())
train_df = train_df.fillna(0)

test_df['암의 장경'] = test_df['암의 장경'].fillna(test_df['암의 장경'].mean())
test_df['KI-67_LI_percent'] = test_df['KI-67_LI_percent'].fillna(test_df['KI-67_LI_percent'].mean())
test_df.loc[test_df['ER'] == 1, 'ER_Allred_score'] = test_df.loc[test_df['ER'] == 1, 'ER_Allred_score'].fillna(test_df.loc[test_df['ER'] == 1,'ER_Allred_score'].mean())
test_df.loc[test_df['ER'] == 0, 'ER_Allred_score'] = test_df.loc[test_df['ER'] == 0, 'ER_Allred_score'].fillna(test_df.loc[test_df['ER'] == 0,'ER_Allred_score'].mean())
test_df.loc[test_df['PR'] == 1, 'PR_Allred_score'] = test_df.loc[test_df['PR'] == 1, 'PR_Allred_score'].fillna(test_df.loc[test_df['PR'] == 1,'PR_Allred_score'].mean())
test_df.loc[test_df['PR'] == 0, 'PR_Allred_score'] = test_df.loc[test_df['PR'] == 0, 'PR_Allred_score'].fillna(test_df.loc[test_df['PR'] == 0,'PR_Allred_score'].mean())
test_df.loc[test_df['HER2_SISH'] == 1, 'HER2_SISH_ratio'] = test_df.loc[test_df['HER2_SISH'] == 1, 'HER2_SISH_ratio'].fillna(test_df.loc[test_df['HER2_SISH'] == 1,'HER2_SISH_ratio'].mean())
test_df.loc[test_df['HER2_SISH'] == 0, 'HER2_SISH_ratio'] = test_df.loc[test_df['HER2_SISH'] == 0, 'HER2_SISH_ratio'].fillna(test_df.loc[test_df['HER2_SISH'] == 0,'HER2_SISH_ratio'].mean())
test_df = test_df.fillna(0)

### 3) 데이터 분리

In [None]:
train_df, val_df, train_labels, val_labels = train_test_split(
                                                    train_df.drop(columns=['N_category']), 
                                                    train_df['N_category'], 
                                                    test_size=0.2, 
                                                    random_state=CFG['SEED']
                                                )

### 4) 컬럼 인코딩 (나이 범주형이라 카테고리로 들어감)
- StandardScaler : 연속형 값
- LabelEncoder : 카테고리 값

In [None]:
def get_values(value):
    return value.values.reshape(-1, 1)

numeric_cols = ['암의 장경', 'ER_Allred_score', 'PR_Allred_score', 'KI-67_LI_percent', 'HER2_SISH_ratio']
ignore_cols = ['ID', 'img_path', 'mask_path', '수술연월일', 'N_category']

for col in train_df.columns:
    if col in ignore_cols:
        continue
    if col in numeric_cols:
        scaler = StandardScaler()
        train_df[col] = scaler.fit_transform(get_values(train_df[col]))
        val_df[col] = scaler.transform(get_values(val_df[col]))
        test_df[col] = scaler.transform(get_values(test_df[col]))
    else:
        le = LabelEncoder()
        train_df[col] = le.fit_transform(get_values(train_df[col]))
        val_df[col] = le.transform(get_values(val_df[col]))
        test_df[col] = le.transform(get_values(test_df[col]))

### 5) 데이터 텐서화

- 이미지 변경 : 증식 후 정규화, 텐서화한 값 출력 

- 인코딩된 컬럼 텐서화 

#### CustomDataset class 생성 
- input: 1) train_df, 2) label값, 3) 이미지 텐서화한 값 <br>
- output : 1) 이미지 텐서화된 값, 2) 인코딩된 컬럼 텐서화된 값, 3) label값

In [None]:
class CustomDataset(Dataset):
    def __init__(self, medical_df, labels, transforms=None):
        self.medical_df = medical_df
        self.transforms = transforms
        self.labels = labels
        
      
    def __getitem__(self, index):
      ## 1. otsu활용
        # img_path = self.medical_df['img_path'].iloc[index]
        # image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        # _, image = cv2.threshold(image, -1, 255,  cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
      # 2. canny 사용
        img_path = self.medical_df['img_path'].iloc[index]
        image = cv2.imread(img_path)
        image= cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        min_intensity_grad, max_intensity_grad = 100, 200
        image = cv2.Canny(image, min_intensity_grad, max_intensity grad)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
                
        if self.labels is not None:
            tabular = torch.Tensor(self.medical_df.drop(columns=['ID', 'img_path', 'mask_path', '수술연월일']).iloc[index])
            label = self.labels[index]
            return image, tabular, label
        else:
            tabular = torch.Tensor(self.medical_df.drop(columns=['ID', 'img_path', '수술연월일']).iloc[index])
            return image, tabular
        
    def __len__(self):
        return len(self.medical_df)

- 이미지 변경 : 증식 후 정규화, 텐서화한 값 출력 (위에 customdataset 함수에 같이 사용)

In [None]:
# 이미지 증식, 정규화, 텐서화 같이 
train_transforms = A.Compose([
                            # 좌우반전
                            A.HorizontalFlip(),
                            # 상하반전
                            A.VerticalFlip(),
                            # 30%확률로 90도 이내로 변환을 적용해줌
                            A.Rotate(limit=60, border_mode=cv2.BORDER_CONSTANT,p=0.3),
                            # 지정사이즈로 리사이즈
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            # 정규화(p = 1 _ 전체다)
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            # 텐서화
                            ToTensorV2()
                            ])

test_transforms = A.Compose([
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

- CustomDataset class 적용

In [None]:
train_dataset = CustomDataset(train_df, train_labels.values, train_transforms)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0) # batch_size = 5

val_dataset = CustomDataset(val_df, val_labels.values, test_transforms)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

# 3. 모델 생성

## 1) Model Architecture class 생성

### 1. ImgFeatureExtractor (이미지 데이터)

In [None]:
class ImgFeatureExtractor(nn.Module):
    def __init__(self):
        super(ImgFeatureExtractor, self).__init__()
      # 1. efficientnet_b0 사용
        self.backbone = models.efficientnet_b0(pretrained=True)
      ## 2. Resnet18 사용
        # self.backbone = models.models.resnet18()
        
        # embedding
        self.embedding = nn.Linear(1000,512) # 입력 size, output size
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.embedding(x)
        return x

### 2. TabularFeatureExtractor (컬럼)
- input : 모델
- output : 3개의 층 생성 후 embedding 처리 값

In [None]:
class TabularFeatureExtractor(nn.Module):
    def __init__(self):
        super(TabularFeatureExtractor, self).__init__()
        self.embedding = nn.Sequential(
            nn.Linear(in_features=23, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Linear(in_features=256, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Linear(in_features=512, out_features=512)
        )
        
    def forward(self, x):
        x = self.embedding(x)
        return x

### 3. ClassificationModel (두 데이터 concat)
- input : 모델
- output : 완전층으로 출력 후 이미지데이터+컬럼데이터 concat

In [None]:
class ClassificationModel(nn.Module):
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.img_feature_extractor = ImgFeatureExtractor()
        self.tabular_feature_extractor = TabularFeatureExtractor()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=1024, out_features=1),
            nn.Sigmoid(),
        )
        
    def forward(self, img, tabular):
        img_feature = self.img_feature_extractor(img)
        tabular_feature = self.tabular_feature_extractor(tabular)
        feature = torch.cat([img_feature, tabular_feature], dim=-1)
        output = self.classifier(feature)
        return output

# 4. train, validation 함수 생성

### 1) Train 
- input : 사용모델, optimizer, train_loader, val_loader, scheduler, device 
- output : bestmodel

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.BCEWithLogitsLoss().to(device)
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for img, tabular, label in tqdm(iter(train_loader)):
            img = img.float().to(device)
            tabular = tabular.float().to(device)
            label = label.float().to(device)
            
            # 역전파
            optimizer.zero_grad()
            
            model_pred = model(img, tabular)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_score = validation(model, criterion, val_loader, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] Val Score : [{val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_score)
        
        if best_score < val_score:
            best_score = val_score
            best_model = model
    
    return best_model

### 2) Validation
- input : 사용모델, criterion, val_loader, device 
- output : 평균 val_loss, val_score

In [None]:
def validation(model, criterion, val_loader, device):
    model.eval()
    pred_labels = []
    true_labels = []
    val_loss = []
    threshold = 0.5
    with torch.no_grad():
        for img, tabular, label in tqdm(iter(val_loader)):
            true_labels += label.tolist()
            
            img = img.float().to(device)
            tabular = tabular.float().to(device)
            label = label.float().to(device)
            
            model_pred = model(img, tabular)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            val_loss.append(loss.item())
            
            model_pred = model_pred.squeeze(1).to('cpu')  
            pred_labels += model_pred.tolist()
    
    pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
    val_score = metrics.f1_score(y_true=true_labels, y_pred=pred_labels, average='macro')
    return np.mean(val_loss), val_score

# 5. Run

In [None]:
model = nn.DataParallel(ClassificationModel())
model.eval()

# 1. RMSprop
optimizer = torch.optim.RMSprop(params = model.parameters(), lr = CFG["LEARNING_RATE"])

## 2. Adam
# optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-3dd342df.pth


  0%|          | 0.00/20.5M [00:00<?, ?B/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.63784] Val Loss : [0.62659] Val Score : [0.76495]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.59715] Val Loss : [0.59862] Val Score : [0.77494]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.59615] Val Loss : [0.60683] Val Score : [0.78304]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.58386] Val Loss : [0.61100] Val Score : [0.76852]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.58154] Val Loss : [0.60450] Val Score : [0.78260]
Epoch 00005: reducing learning rate of group 0 to 5.0000e-05.


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.57202] Val Loss : [0.60069] Val Score : [0.77622]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.57610] Val Loss : [0.60392] Val Score : [0.76064]
Epoch 00007: reducing learning rate of group 0 to 2.5000e-05.


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.57357] Val Loss : [0.60271] Val Score : [0.77082]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.56699] Val Loss : [0.60327] Val Score : [0.77622]
Epoch 00009: reducing learning rate of group 0 to 1.2500e-05.


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.57011] Val Loss : [0.60243] Val Score : [0.78159]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.56354] Val Loss : [0.60201] Val Score : [0.78743]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.56568] Val Loss : [0.60477] Val Score : [0.78260]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.56562] Val Loss : [0.60589] Val Score : [0.77820]
Epoch 00013: reducing learning rate of group 0 to 6.2500e-06.


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.56257] Val Loss : [0.60439] Val Score : [0.78788]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.56273] Val Loss : [0.60875] Val Score : [0.77858]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.56463] Val Loss : [0.60629] Val Score : [0.78260]
Epoch 00016: reducing learning rate of group 0 to 3.1250e-06.


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.55708] Val Loss : [0.60547] Val Score : [0.76768]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.56355] Val Loss : [0.60270] Val Score : [0.77143]
Epoch 00018: reducing learning rate of group 0 to 1.5625e-06.


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.56501] Val Loss : [0.60719] Val Score : [0.77778]


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.56499] Val Loss : [0.60484] Val Score : [0.77198]
Epoch 00020: reducing learning rate of group 0 to 7.8125e-07.


# 6. Inference

In [None]:
test_dataset = CustomDataset(test_df, None, test_transforms)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    threshold = 0.5
    
    with torch.no_grad():
        for img, tabular in tqdm(iter(test_loader)):
            img = img.float().to(device)
            tabular = tabular.float().to(device)
            
            model_pred = model(img, tabular)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            
            preds += model_pred.tolist()
    
    preds = np.where(np.array(preds) > threshold, 1, 0)
    
    return preds

In [None]:
preds = inference(infer_model, test_loader, device)

  0%|          | 0/13 [00:00<?, ?it/s]

# 7. Submission

In [None]:
submit = pd.read_csv('./sample_submission.csv')

- 파일 저장

In [None]:
submit['N_category'] = preds
submit.to_csv('./sub/submit_efficient_otsu_rmsprop_lastnull_1130.csv', index=False)