# - changes
    model : resnext101_32x8d_wsl
    transforms
    optimizer -- SGD
    scheduler -- cosineannealing
---

In [1]:
import gc
import os
import time
import numpy as np
import pandas as pd
from glob import glob
import tqdm
import argparse
import tqdm
from collections import defaultdict, Counter
from PIL import Image
import cv2

import torch
import torch.nn.functional as F
import torch.optim as optim
from torch import nn, cuda
from torch.autograd import Variable 
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import CenterCrop

from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR
from utils import count_parameters, seed_everything, AdamW, CosineAnnealingWithRestartsLR

from efficientnet_pytorch import EfficientNet
import torchvision.models as models

from model import Baseline, Resnet18, Resnet50, Resnext50, Resnext101
from customs import mixup_data, mixup_criterion
from transforms import get_transform
from dataloader import make_loader, TestDataset

from torchvision.models.resnet import ResNet, Bottleneck

def clahe(img, clip_limit=1.0, tile_grid_size=(16, 16)):
    if img.dtype != np.uint8:
        raise TypeError("clahe supports only uint8 inputs")

    clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size)

    if len(img.shape) == 2:
        img = clahe.apply(img)
    else:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
        img[:, :, 0] = clahe.apply(img[:, :, 0])
        img = cv2.cvtColor(img, cv2.COLOR_LAB2RGB)

    return img

def _resnext(arch, block, layers, pretrained, progress, **kwargs):
    model = ResNet(block, layers, **kwargs)
#     state_dict = load_state_dict_from_url(model_urls[arch], progress=progress)
#     model.load_state_dict(state_dict)
    return model

def resnext101_32x8d_wsl(progress=True, **kwargs):
    """Constructs a ResNeXt-101 32x8 model pre-trained on weakly-supervised data
    and finetuned on ImageNet from Figure 5 in
    `"Exploring the Limits of Weakly Supervised Pretraining" <https://arxiv.org/abs/1805.00932>`_
    Args:
        progress (bool): If True, displays a progress bar of the download to stderr.
    """
    kwargs['groups'] = 32
    kwargs['width_per_group'] = 8
    return _resnext('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], False, progress, **kwargs)

def to_numpy(t):
        return t.cpu().detach().numpy()

def make_folds(df, n_folds: int) -> pd.DataFrame:

    cls_counts = Counter([classes for classes in df['label']])
    fold_cls_counts = defaultdict()
    for class_index in cls_counts.keys():
        fold_cls_counts[class_index] = np.zeros(n_folds, dtype=np.int)

    df['fold'] = -1
    pbar = tqdm.tqdm(total=len(df))

    def get_fold(row):
        class_index = row['label']
        counts = fold_cls_counts[class_index]
        fold = np.argmin(counts)
        counts[fold] += 1
        fold_cls_counts[class_index] = counts
        row['fold']=fold
        pbar.update()
        return row

    df = df.apply(get_fold, axis=1)
    return df

def make_label(label):
    if label == 'MA':
        return 1
    elif label == 'GMA':
        return 2
    elif label == 'BOY':
        return 3
    elif label == 'FA':
        return 4
    elif label == 'GFA':
        return 5
    #elif label == 'GIRL':
    else:
        return 6


class TrainDataset(Dataset):
    def __init__(self, df, mode='train', transforms=None):
        self.df = df
        self.mode = mode
        self.transform = transforms[self.mode]

    def __len__(self):            
        return len(self.df)

    def __getitem__(self, idx):

        image = Image.open(IMAGES_PATH + '/' + self.df['filename'][idx]).convert("RGB")
        image = np.array(image)
        image = clahe((cv2.cvtColor(image, cv2.COLOR_BGR2RGB)))
        image = Image.fromarray(image)

        if self.transform:
            image = self.transform(image)
        label = self.df['label'][idx]

        return image, label

def make_loader(df, transforms, batch_size=256, num_workers=0):

    dataset = TrainDataset(df, transforms2)
    # dataset = image, label from Glass_Dataset

    loader = DataLoader(dataset, # image, label
                        batch_size=batch_size,
                        shuffle=True, 
                        num_workers=num_workers, 
                        pin_memory=True)

    return loader

class TestDataset(Dataset):
        def __init__(self, df, transform=None):
            self.df = df
            self.transform = transform

        def __len__(self):
            return len(self.df)

        def __getitem__(self, idx):

            image = Image.open(IMAGES_PATH+'/'+self.df['filename'][idx]).convert('RGB')
            image = np.array(image)
            image = clahe((cv2.cvtColor(image, cv2.COLOR_BGR2RGB)))
            image = Image.fromarray(image)

            if self.transform:
                image = self.transform(image)

            return image

IMAGES_PATH = 'C:/SEOYILGUK/JOB_VISION/faces_images'
csv_path = 'C:/SEOYILGUK/JOB_VISION/train.csv'
train_df = pd.read_csv('C:/SEOYILGUK/JOB_VISION/train.csv')
test_df = pd.read_csv('C:/SEOYILGUK/JOB_VISION/test.csv')

def make_df(image_path, csv_path):
    csv_df = pd.read_csv(csv_path)
    path_df = pd.DataFrame()
    file_list = os.listdir(image_path)
    path_df['filename'] = file_list
    path_df = path_df.iloc[1:,:] # remove .DS_Store filename
    df = csv_df.merge(path_df, on='filename', how='left')
    df['label'] = df['label']-1
    print(df.label.value_counts())
    return df

#num_fold = 0 #select fold number for training
for num_fold in range(5):
    
    print("===============================================================================================")
    print("=================================༼ つ ◕_◕ ༽つ　 STARTS {}_th FOLD ༼ つ ◕_◕ ༽つ ==================".format(num_fold))
    print("===============================================================================================")
    train_csv = pd.read_csv("../JOB_VISION/train.csv")

    SEED = 42
    seed_everything(SEED)

    device = 'cuda'
    use_gpu = cuda.is_available()

    if use_gpu:
        print("enable gpu use")
    else:
        print("enable cpu for debugging")

    target_size = (128, 128)

    num_classes = 6
    model = resnext101_32x8d_wsl(num_classes=num_classes)
    model.cuda()

    #model = models.resnet50() 
    #model.fc = nn.Linear(2048, args.num_classes)

    # elif args.model == 'resnet18': 
    #     model = Resnet18(args.num_classes, dropout=False)
    # elif args.model == 'resnet50':
    #     model = Resnet50(args.num_classes, dropout=False)
    # elif args.model == 'efficient':
    #     model = EfficientNet.from_pretrained('efficientnet-b0')
    #     in_features = model._fc.in_features
    #     model._fc = nn.Linear(in_features, args.num_classes)
    # elif args.model == 'densenet201':
    #     model = models.densenet201(pretrained=True)
    #     model.classifier = nn.Linear(1920, args.num_classes)
    # elif args.model == 'resnext50':
    #     model = Resnext50(args.num_classes, dropout=False)
    # elif args.model == 'resnext101':
    #     model = Resnext101(args.num_classes, dropout=False)
    # elif args.model == 'mobilenet':
    #     assert args.input_size == 158
    #     model = torch.hub.load('pytorch/vision', 'mobilenet_v2', pretrained=True)
    #     model.classifier = nn.Sequential(nn.Dropout(0.2),
    #                         nn.Linear(1280, args.num_classes))
    # else:
    #     raise NotImplementedError

    if use_gpu:
        model = model.to(device)

    # optimizer = optim.Adam(model.parameters(), args.lr, weight_decay=0.00025)
    #optimizer = AdamW(model.parameters(), 2.5e-4, weight_decay=0.000025)
    optimizer = optim.SGD(model.parameters(), 0.01, momentum=0.9, weight_decay=0.0025)

    ###### SCHEDULER #######
    #scheduler = ReduceLROnPlateau(optimizer, 'max', patience=5, factor=0.5)

    eta_min = 0.000001
    T_max = 10
    T_mult = 1
    restart_decay = 0.97
    scheduler = CosineAnnealingWithRestartsLR(optimizer, T_max=T_max, eta_min=eta_min, T_mult=T_mult, restart_decay=restart_decay)

    #scheduler = StepLR(optimizer, step_size=5, gamma=0.5)

    criterion = nn.CrossEntropyLoss() 

    num_parameters = count_parameters(model)
    print("==== number of parameters that can be trained: {}".format(num_parameters))

    df = make_df(IMAGES_PATH, csv_path)

    # 5 stratifiedKFold
    folds = make_folds(df, 5)
    print("==== fold_df created")
    print(folds.head())

    train_df = folds[folds['fold'] != num_fold]
    valid_df = folds[folds['fold'] == num_fold]

    train_df = train_df[['filename', 'label']].reset_index(drop=True)
    valid_df = valid_df[['filename', 'label']].reset_index(drop=True)
    print("number of train images: {}".format(len(train_df)))
    print("number of valid images: {}".format(len(valid_df)))

    start_time = time.time()
    
    print("shape of the df: {}".format(df.shape))    
    
    data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(target_size, scale=(0.7, 1.0)),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(
                brightness=0.4,
                contrast=0.4,
                saturation=0.4,
            ),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.5161, 0.4225, 0.3763], 
            [0.2442, 0.2184, 0.2164]
        )]),
    'valid': transforms.Compose([
        transforms.RandomResizedCrop(target_size, scale=(0.8, 1.0)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.5161, 0.4225, 0.3763], 
            [0.2442, 0.2184, 0.2164]
        )])
    }
    
    batch_size = 32
    train_dataset = TrainDataset(train_df, mode='train', transforms=data_transforms)
    valid_dataset = TrainDataset(valid_df, mode='valid', transforms=data_transforms)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
    print("number train batches: {}".format(len(train_loader)))
    print("number valid batches: {}".format(len(valid_loader)))
    elapsed = time.time() - start_time
    print("elapsed: {:.0f}".format(elapsed))

    best_val_acc = 0
    grad_clip_step = 100
    grad_clip = 100
    step = 0
    # accumulation_step = 2

    print("start training")

    for epoch_idx in range(1, 100 + 1):

        start_time = time.time()

        train_loss = 0
        train_total_correct = 0
        model.train()
        optimizer.zero_grad()

        for batch_idx, (image, labels) in enumerate(train_loader):
            if use_gpu:
                image = image.to(device)
                labels = labels.to(device)

            # print("shape of the image: {}".format(image.shape))
            # print("shape of the labels: {}".format(labels.shape))
            # print(image)
            # print(labels)

            #if args.mixup_loss:
            #    inputs, targets_a, targets_b, lam = mixup_data(image, labels, alpha=0.4, device=device)
            #    inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b))
            #    output = model(inputs)
            #    loss = mixup_criterion(criterion, output.to(device), targets_a.to(device), targets_b.to(device), lam)
            #else:
            output = model(image)
            loss = criterion(output, labels)

            # gradient explosion prevention
            if step > grad_clip_step:
                torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)

            step += 1

            loss.backward()

            optimizer.step()
            optimizer.zero_grad()

            # output_prob = F.softmax(output, dim=1)

            train_loss += loss.item() / len(train_loader)

        # Validation Starts
        model.eval()
        valid_loss = 0
        valid_total_correct = 0

        with torch.no_grad():
            for batch_idx, (image, labels) in enumerate(valid_loader):
                if use_gpu:
                    image = image.to(device)
                    labels = labels.to(device)


                output = model(image)
                loss = criterion(output, labels)

                output_prob = F.softmax(output, dim=1)

                predict_vector = np.argmax(to_numpy(output_prob), axis=1)
                label_vector = to_numpy(labels)
                bool_vector = predict_vector == label_vector
                #print('val_pred', predict_vector[:5])
                #print('val_label', label_vector[:5])
                #print("shape of prediction vector: {}".format(predict_vector.shape))
                #print(predict_vector.head())
                #print("shape of label vector: {}".format(label_vector.shape))

                valid_loss += loss.item() / len(valid_loader)
                valid_total_correct += bool_vector.sum()


        elapsed = time.time() - start_time

        val_acc = valid_total_correct / len(valid_loader.dataset)

        # best val_acc checkpoint
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            print("val_acc has improved !! ")
            torch.save(model.state_dict(), 'resnext101_32x8d_wsl_clahe_fold_{}_best_model.pt'.format(num_fold))
            print("================ ༼ つ ◕_◕ ༽つ BEST epoch : {}, Accuracy : {} ".format(epoch_idx, best_val_acc))
            #file_save_name = 'best_acc' + '_' + str(num_fold)
            #print(file_save_name)
        else:
            print("val acc has not improved")

        lr = [_['lr'] for _ in optimizer.param_groups]

        #if args.scheduler == 'plateau':
        scheduler.step(val_acc)
        #else:
        #    scheduler.step()

        # nsml.save(epoch_idx)

        print("Epoch {}/{}  train_loss: {:.5f}  valid_loss {:.5f}  valid_acc: {:.5f}  lr: {:.6f}  elapsed: {:.0f}".format(
               epoch_idx, 100, train_loss, valid_loss, val_acc, lr[0], elapsed))
                #epoch_idx, args.epochs, train_loss, valid_loss, val_acc, lr[0], elapsed

#    IMAGES_PATH = 'C:/SEOYILGUK/JOB_VISION/faces_images'
#    test_df = pd.read_csv('C:/SEOYILGUK/JOB_VISION/test.csv')

    batch_size = 32
    num_workers = 0

    # model = models.resnet18(pretrained=False) 
    # model.fc = nn.Linear(512, args.num_classes)

    # checkpoint = 'team012/KHD2019_FUNDUS/93'
    # nsml.load(checkpoint, load_fn=load)

    test_transforms = transforms.Compose([
                        transforms.RandomResizedCrop(target_size, scale=(0.8, 1.0)),
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        transforms.Normalize(
                            [0.5161, 0.4225, 0.3763], 
                            [0.2442, 0.2184, 0.2164])
                    ])

    test_dataset = TestDataset(test_df, test_transforms)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False, 
                             num_workers=num_workers, 
                             pin_memory=True)
    
    print("============== ༼ つ ◕_◕ ༽つ BEST epoch : {}, Accuracy : {} ====================================".format(epoch_idx, best_val_acc))
    print("========================== ༼ つ ◕_◕ ༽つ Model Load {}_th FOLD =================================".format(num_fold))
    model.load_state_dict(torch.load('resnext101_32x8d_wsl_clahe_fold_{}_best_model.pt'.format(num_fold)))

    model.eval()
    # for _ in range(tta):
        # print("tta {} predict".format(_+1))
    predictions = np.zeros((len(test_loader.dataset), 6))

    with torch.no_grad():
        for i, image in enumerate(test_loader):
            image = image.to(device)
            output = model(image) # output shape (batch_num, num_classes)

            predictions[i*batch_size: (i+1)*batch_size] = output.detach().cpu().numpy()
    print("predict values check : ",predictions[0])
    np.savetxt("../JOB_VISION/resnext101_32x8d_wsl_submission/resnext101_32x8d_wsl_clahe_{}_fold.csv".format(num_fold), predictions, delimiter=",")
    print("===============================================================================================")
    print("=================================༼ つ ◕_◕ ༽つ　 SAVED {}_th FOLD ༼ つ ◕_◕ ༽つ ==================".format(num_fold))
    print("===============================================================================================")

enable gpu use
==== number of parameters that can be trained: 86754630
3    2568
0    1723
1     601
4     374
5     354
2     230
Name: label, dtype: int64


5851it [00:00, 7606.85it/s]                                                                                            


==== fold_df created
        filename  label  fold
0  face_3267.png      0     1
1   face_587.png      3     0
2  face_6585.png      3     1
3  face_3094.png      3     2
4  face_1170.png      3     3
number of train images: 4679
number of valid images: 1171
shape of the df: (5850, 3)
number train batches: 147
number valid batches: 37
elapsed: 0
start training
val_acc has improved !! 
Epoch 1/100  train_loss: 2.57137  valid_loss 1.46804  valid_acc: 0.43040  lr: 0.010000  elapsed: 83
val_acc has improved !! 
Epoch 2/100  train_loss: 1.53559  valid_loss 1.41016  valid_acc: 0.45431  lr: 0.009954  elapsed: 87
val_acc has improved !! 
Epoch 3/100  train_loss: 1.51124  valid_loss 1.34955  valid_acc: 0.49274  lr: 0.009949  elapsed: 87
val_acc has improved !! 
Epoch 4/100  train_loss: 1.40694  valid_loss 1.36218  valid_acc: 0.50726  lr: 0.009940  elapsed: 87
val_acc has improved !! 
Epoch 5/100  train_loss: 1.35765  valid_loss 1.30959  valid_acc: 0.51751  lr: 0.009937  elapsed: 87
val_acc has 

5851it [00:00, 7168.72it/s]                                                                                            


==== fold_df created
        filename  label  fold
0  face_3267.png      0     1
1   face_587.png      3     0
2  face_6585.png      3     1
3  face_3094.png      3     2
4  face_1170.png      3     3
number of train images: 4679
number of valid images: 1171
shape of the df: (5850, 3)
number train batches: 147
number valid batches: 37
elapsed: 0
start training
val_acc has improved !! 
Epoch 1/100  train_loss: 2.43347  valid_loss 1.54765  valid_acc: 0.45602  lr: 0.010000  elapsed: 82
val acc has not improved
Epoch 2/100  train_loss: 1.56966  valid_loss 1.42263  valid_acc: 0.44150  lr: 0.009949  elapsed: 87
val_acc has improved !! 
Epoch 3/100  train_loss: 1.46083  valid_loss 1.42207  valid_acc: 0.50726  lr: 0.009952  elapsed: 87
val acc has not improved
Epoch 4/100  train_loss: 1.39103  valid_loss 1.70119  valid_acc: 0.50299  lr: 0.009937  elapsed: 87
val_acc has improved !! 
Epoch 5/100  train_loss: 1.32162  valid_loss 1.31308  valid_acc: 0.53629  lr: 0.009938  elapsed: 88
val_acc has 

5851it [00:00, 7064.82it/s]                                                                                            


==== fold_df created
        filename  label  fold
0  face_3267.png      0     1
1   face_587.png      3     0
2  face_6585.png      3     1
3  face_3094.png      3     2
4  face_1170.png      3     3
number of train images: 4679
number of valid images: 1171
shape of the df: (5850, 3)
number train batches: 147
number valid batches: 37
elapsed: 0
start training
val_acc has improved !! 
Epoch 1/100  train_loss: 2.48104  valid_loss 1.53611  valid_acc: 0.29462  lr: 0.010000  elapsed: 83
val_acc has improved !! 
Epoch 2/100  train_loss: 1.53146  valid_loss 1.39620  valid_acc: 0.43980  lr: 0.009979  elapsed: 88
val_acc has improved !! 
Epoch 3/100  train_loss: 1.44315  valid_loss 1.29000  valid_acc: 0.53886  lr: 0.009952  elapsed: 88
val_acc has improved !! 
Epoch 4/100  train_loss: 1.34934  valid_loss 1.16201  valid_acc: 0.57558  lr: 0.009929  elapsed: 89
val_acc has improved !! 
Epoch 5/100  train_loss: 1.18016  valid_loss 0.88157  valid_acc: 0.68232  lr: 0.009918  elapsed: 89
val acc has 

5851it [00:00, 7151.20it/s]                                                                                            


==== fold_df created
        filename  label  fold
0  face_3267.png      0     1
1   face_587.png      3     0
2  face_6585.png      3     1
3  face_3094.png      3     2
4  face_1170.png      3     3
number of train images: 4680
number of valid images: 1170
shape of the df: (5850, 3)
number train batches: 147
number valid batches: 37
elapsed: 0
start training
val_acc has improved !! 
Epoch 1/100  train_loss: 2.53593  valid_loss 1.67760  valid_acc: 0.29402  lr: 0.010000  elapsed: 82
val_acc has improved !! 
Epoch 2/100  train_loss: 1.53971  valid_loss 1.41951  valid_acc: 0.43932  lr: 0.009979  elapsed: 88
val acc has not improved
Epoch 3/100  train_loss: 1.49149  valid_loss 1.96168  valid_acc: 0.40598  lr: 0.009952  elapsed: 88
val_acc has improved !! 
Epoch 4/100  train_loss: 1.45351  valid_loss 1.49800  valid_acc: 0.47863  lr: 0.009959  elapsed: 87
val_acc has improved !! 
Epoch 5/100  train_loss: 1.35705  valid_loss 1.31234  valid_acc: 0.54359  lr: 0.009944  elapsed: 88
val acc has 

5851it [00:00, 7275.70it/s]                                                                                            


==== fold_df created
        filename  label  fold
0  face_3267.png      0     1
1   face_587.png      3     0
2  face_6585.png      3     1
3  face_3094.png      3     2
4  face_1170.png      3     3
number of train images: 4683
number of valid images: 1167
shape of the df: (5850, 3)
number train batches: 147
number valid batches: 37
elapsed: 0
start training
val_acc has improved !! 
Epoch 1/100  train_loss: 2.36748  valid_loss 1.57851  valid_acc: 0.43616  lr: 0.010000  elapsed: 83
val_acc has improved !! 
Epoch 2/100  train_loss: 1.54235  valid_loss 1.47614  valid_acc: 0.44816  lr: 0.009953  elapsed: 86
val_acc has improved !! 
Epoch 3/100  train_loss: 1.46290  valid_loss 1.37636  valid_acc: 0.52442  lr: 0.009951  elapsed: 86
val_acc has improved !! 
Epoch 4/100  train_loss: 1.39509  valid_loss 1.20326  valid_acc: 0.57069  lr: 0.009932  elapsed: 86
val_acc has improved !! 
Epoch 5/100  train_loss: 1.27033  valid_loss 1.04067  valid_acc: 0.61354  lr: 0.009920  elapsed: 86
val_acc has 