In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import os
import argparse
import pprint
import tqdm

import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter

from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import accuracy_score, roc_auc_score
from dataset import get_dataloader
from model import get_model
import util
import util.config
import util.checkpoint
import util.metrics
import util.seed

import warnings
warnings.filterwarnings('ignore')

util.seed.seed_everything(2020)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
from dataset.dataset_factory import MelanomaDataset, get_transform
from model.model_factory import Model
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

In [5]:
img_sizes = [128, 192, 256, 384, 512, 768, 1024]
img_size= img_sizes[0]
input_dir = f'./input/{img_size}x{img_size}/'
train_img_dir = input_dir + 'train/'
test_img_dir = input_dir + 'test/'

In [12]:
# Read csv
#train_df = pd.read_csv(input_dir + 'train.csv')
train_df = pd.read_csv('train_new.csv')
test_df = pd.read_csv(input_dir + 'test.csv')

# Meta features
meta_features = ['sex', 'age_approx']

# Sex features
train_df['sex'] = train_df['sex'].map({'male': 1, 'female': 0})
test_df['sex'] = test_df['sex'].map({'male': 1, 'female': 0})
#train_df['sex'] = train_df['sex'].fillna(-1)
#test_df['sex'] = test_df['sex'].fillna(-1)

# Age features
train_df['age_approx'] /= 100.0
test_df['age_approx'] /= 100.0
#train_df['age_approx'] = train_df['age_approx'].fillna(0)
#test_df['age_approx'] = test_df['age_approx'].fillna(0)

# ID features
#train_df['patient_id'] = train_df['patient_id'].fillna(0)

KeyError: 'age_approx'

In [16]:
FOLDS = 5
SEED = 2020
skf = KFold(n_splits=FOLDS, shuffle=True, random_state=SEED)

for fold, (idxT, idxV) in enumerate(skf.split(np.arange(15))):
    print('=' * 20, 'Fold', fold + 1, '=' * 20)
    train_idx = train_df.loc[train_df['fold'].isin(idxT)].index
    valid_idx = train_df.loc[train_df['fold'].isin(idxV)].index



In [23]:
train_df.loc[train_df['fold'].isin(idxT)].reset_index(drop=True)

Unnamed: 0.1,Unnamed: 0,image_name,patient_id,sex,age,target,patient_code,fold
0,0,ISIC_2637011,IP_7279968,1,45.0,0,2,1
1,2,ISIC_0052212,IP_2842074,0,50.0,0,1551,7
2,3,ISIC_0068279,IP_6890425,0,45.0,0,505,2
3,5,ISIC_0074311,IP_2950485,0,40.0,0,40,1
4,6,ISIC_0074542,IP_4698288,1,25.0,0,495,4
...,...,...,...,...,...,...,...,...
26099,33119,ISIC_9998965,IP_3293337,1,50.0,0,1483,11
26100,33121,ISIC_9999134,IP_6526534,1,50.0,0,183,11
26101,33122,ISIC_9999320,IP_3650745,1,65.0,0,235,10
26102,33123,ISIC_9999515,IP_2026598,1,20.0,0,49,1


In [21]:
train_df.iloc[train_idx].reset_index(drop=True)

Unnamed: 0.1,Unnamed: 0,image_name,patient_id,sex,age,target,patient_code,fold
0,0,ISIC_2637011,IP_7279968,1,45.0,0,2,1
1,2,ISIC_0052212,IP_2842074,0,50.0,0,1551,7
2,3,ISIC_0068279,IP_6890425,0,45.0,0,505,2
3,5,ISIC_0074311,IP_2950485,0,40.0,0,40,1
4,6,ISIC_0074542,IP_4698288,1,25.0,0,495,4
...,...,...,...,...,...,...,...,...
26099,33119,ISIC_9998965,IP_3293337,1,50.0,0,1483,11
26100,33121,ISIC_9999134,IP_6526534,1,50.0,0,183,11
26101,33122,ISIC_9999320,IP_3650745,1,65.0,0,235,10
26102,33123,ISIC_9999515,IP_2026598,1,20.0,0,49,1


In [57]:
train_dataset = MelanomaDataset(train_df, train_img_dir, train=True, transforms=train_transforms, meta_features=meta_features)
valid_dataset = MelanomaDataset(train_df, train_img_dir, train=True, transforms=valid_transforms, meta_features=meta_features)
test_dataset = MelanomaDataset(test_df, test_img_dir, train=False, transforms=test_transforms, meta_features=meta_features)

In [60]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, num_workers=2)
valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=16, shuffle=False, num_workers=2)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=16, shuffle=False, num_workers=2)

In [62]:
test_iter = iter(train_dataloader)
next(test_iter)

[[tensor([[[[ 1.5810,  1.5297,  1.4612,  ...,  1.5297,  1.5297,  1.5297],
            [ 1.5297,  1.5125,  1.4954,  ...,  1.4954,  1.4269,  1.3413],
            [ 1.5125,  1.5125,  1.5125,  ...,  1.4269,  1.4783,  1.5297],
            ...,
            [ 1.5125,  1.4440,  1.4440,  ...,  1.6324,  1.6324,  1.6153],
            [ 1.3584,  1.4612,  1.5125,  ...,  1.5639,  1.5639,  1.5639],
            [ 1.3584,  1.3584,  1.3927,  ...,  1.5468,  1.5468,  1.5468]],
  
           [[ 1.0630,  1.0105,  0.9755,  ...,  1.0630,  1.0630,  1.0630],
            [ 1.0455,  1.0280,  1.0105,  ...,  1.0105,  0.9405,  0.8529],
            [ 1.0630,  1.0630,  1.0630,  ...,  0.9405,  0.9930,  1.0280],
            ...,
            [ 1.0805,  1.0105,  1.0105,  ...,  1.1681,  1.1681,  1.1506],
            [ 0.9230,  1.0280,  1.0805,  ...,  1.0980,  1.0980,  1.0980],
            [ 0.9230,  0.9230,  0.9580,  ...,  1.0805,  1.0980,  1.0980]],
  
           [[ 1.1585,  1.1062,  1.0539,  ...,  0.8971,  0.8622,  0.862

In [40]:
from sklearn.model_selection import StratifiedKFold, GroupKFold
skf = GroupKFold(n_splits=5)

In [12]:
split = 'test'
if split in ['train', 'valid']:
    print('a')