# submissionを行う  
datasetに作成したモデルのパラメータをnotebook内で読み込み、提出する。  
このnotebookはmelanoma_tachyon_simplemodel_v1の続きとなっている。  

In [None]:
local = False
DEBUG = False

In [None]:
#before import process
import sys
if local == True:
    package_path = 'Q:/kaggle/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master'
else:
    package_path = '../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master'
sys.path.append(package_path)

#imports
import os, warnings, random, time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
from IPython.display import display
from tqdm import tqdm_notebook as tqdm

import torch
import albumentations
from torch import nn, optim
from torch.functional import F 
from torch.utils.data import Dataset, DataLoader
from efficientnet_pytorch import model as enet

#Data Augmentation用ライブラリ
import albumentations as A

%matplotlib inline
warnings.filterwarnings('ignore')

In [None]:
SEED = 69
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

Progress_Bar = True

In [None]:
#params
enet_type = 'efficientnet-b0'
model_name = 'v1'
pretrained_models = "../input/melanoma-tachyon-simplemodel-v1-models"
n_fold = 4
TTA = 1
batch_size = 64
image_size = 224

num_workers = 2

In [None]:
train_df = pd.read_csv('/kaggle/input/jpeg-melanoma-256x256/train.csv')
test_df = pd.read_csv('/kaggle/input/jpeg-melanoma-256x256/test.csv')
train_images = '../input/jpeg-melanoma-256x256/train'
test_images = '../input/jpeg-melanoma-256x256/test'

if DEBUG:
    train_df = train_df[:1000]
    test_df = test_df[:500]
    
# One-hot encoding of anatom_site_general_challenge feature
concat = pd.concat([train_df['anatom_site_general_challenge'], test_df['anatom_site_general_challenge']], ignore_index=True)
dummies = pd.get_dummies(concat, dummy_na=True, dtype=np.uint8, prefix='site')
train_df = pd.concat([train_df, dummies.iloc[:train_df.shape[0]]], axis=1)
test_df = pd.concat([test_df, dummies.iloc[train_df.shape[0]:].reset_index(drop=True)], axis=1)

# Sex features
train_df['sex'] = train_df['sex'].map({'male': 1, 'female': 0})
test_df['sex'] = test_df['sex'].map({'male': 1, 'female': 0})
train_df['sex'] = train_df['sex'].fillna(-1)
test_df['sex'] = test_df['sex'].fillna(-1)

# Age features
train_df['age_approx'] /= train_df['age_approx'].max()
test_df['age_approx'] /= test_df['age_approx'].max()
train_df['age_approx'] = train_df['age_approx'].fillna(0)
test_df['age_approx'] = test_df['age_approx'].fillna(0)

train_df['patient_id'] = train_df['patient_id'].fillna(0)

In [None]:
meta_features = ['sex', 'age_approx'] + [col for col in train_df.columns if 'site_' in col]
meta_features.remove('anatom_site_general_challenge')

In [None]:
pretrained_model = {
        'efficientnet-b0': '../input/efficientnet-pytorch/efficientnet-b0-08094119.pth',
        'efficientnet-b1': '../input/efficientnet-pytorch/efficientnet-b1-dbc7070a.pth',
        'efficientnet-b2': '../input/efficientnet-pytorch/efficientnet-b2-27687264.pth',
        'efficientnet-b3': '../input/efficientnet-pytorch/efficientnet-b3-c8376fa2.pth',
        'efficientnet-b4': '../input/efficientnet-pytorch/efficientnet-b4-e116e8b3.pth',
        'efficientnet-b5': '../input/efficientnet-pytorch/efficientnet-b5-586e6cc6.pth',
        
    }
model_save_path = False


class enetv2(nn.Module):
    def __init__(self, backbone, out_dim=1):
        super(enetv2, self).__init__()
        self.enet = enet.EfficientNet.from_name(backbone)
        self.enet.load_state_dict(torch.load(pretrained_model[backbone]))

        self.myfc = nn.Linear(self.enet._fc.in_features, out_dim)
        self.enet._fc = nn.Identity()
        self.sigmoid = nn.Sigmoid()

    def extract(self, x):
        return self.enet(x)

    def forward(self, x):
        x = self.extract(x)
        x = self.myfc(x)
        #x = self.sigmoid(x)
        return x

# モデルの読み込み  
まずモデルの構造自体(enetv2)を読み込み、その後に保存しておいたパラメータをはめ込んでいく。  
ここのパラメータ数が合わなかったりするとエラーが出るので注意。  
(ありがちなのが、output sizeが合わなかったり。)  

In [None]:
def load_models(model_files,backbone):
    models = []
    for model_f in model_files:
        model_f = os.path.join(pretrained_models, model_f)
        model = enetv2(backbone, out_dim=1)
        model.load_state_dict(torch.load(model_f, map_location=lambda storage, loc: storage), strict=True)
        model.eval()
        model.to(device)
        models.append(model)
        print(f'{model_f} loaded!')
    return models

In [None]:
res = os.listdir(pretrained_models)
print("all:")
print(res)
model_files_path = []
model_files_pickle_path = []

#pytorchとpickleを分別
for i in res:
    if i[-3:] == ".pt":
        model_files_path.append(i)
    elif i[-7:] == ".pickle":
        model_files_pickle_path.append(i)
print("pytorch:")
print(model_files_path)
print("pickle:")
print(model_files_pickle_path)

In [None]:
#今回、pytorch側で出力したモデルはbestとfinalがある、finalだけに絞る。
for i in model_files_path:
    if "best" in i:
        model_files_path.remove(i)
print("pytorch:")
print(model_files_path)

In [None]:
#pytorchで、パラメータの読み込み
models = load_models(model_files_path,enet_type) #enet_type = 'efficientnet-b0'
#print(models)

# オマケ pickleでの読み込み  
今回、pickleはパラメータのみでなくモデル全体を読み込んでいるので、それを読み出してみる。  
モデル構造を読み込んで、そこにパラメータを当てはめていくという過程が無い分、ちょっと楽。  

In [None]:
import pickle

models_from_pickle = []

for i in model_files_pickle_path:
    i = os.path.join(pretrained_models, i)
    
    with open(i,mode="rb") as fp:
        model = pickle.load(fp)
    model.eval()
    model.to(device)
    models.append(model)
    print(f'{i} loaded!')
    models_from_pickle.append(model)
#print(models_from_pickle)

以上。pickleの方のモデルはここで削除する。  
メモリ解放にはgcを用いる。

In [None]:
import gc
del models_from_pickle
gc.collect()

In [None]:
class MelanomaDataset(Dataset):
    def __init__(self, df: pd.DataFrame, imfolder: str, train: bool = True, transforms = None):

        self.df = df
        self.imfolder = imfolder
        self.transforms = transforms
        self.train = train
        
    def __getitem__(self, index):
        if self.train:
            im_path = os.path.join(self.imfolder, self.df.iloc[index]['image_id'] + '.jpg')
        else:
            im_path = os.path.join(self.imfolder, self.df.iloc[index]['image_name'] + '.jpg')

        x = cv2.imread(im_path)

        if self.transforms:
            x = self.transforms(image = x) #albumentationsに画像を投げます
            x = x['image'].astype(np.float32) #帰ってきたデータから画像データを取り出します(SSDのような場合には矩形領域データも合わせて帰ってきたりするため、このような仕様になっています)
            
        x = x.transpose(2, 0, 1) #channel first
        
        if self.train:
            y = self.df.iloc[index]['target']
            return x, y
        else:
            return x
    
    def __len__(self):
        return len(self.df)

In [None]:
transforms_train = A.Compose([
    A.Transpose(p=0.5),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.Resize(image_size, image_size), 
    #A.Normalize()
])

test = MelanomaDataset(df=test_df,
                       imfolder=test_images, 
                       train=False,
                       transforms=transforms_train)

In [None]:
#(model数*TTA数)回すので注意
def get_predictions(model, iterator, device):
    
    preds = np.array([0.]*len(test_df))
    model.eval()
    bar = tqdm(iterator) if Progress_Bar else iterator
    
    with torch.no_grad():
        for tta in range(TTA):
            res = np.array([])
            for x in bar:
                x = torch.tensor(x, device=device, dtype=torch.float32)
                y_pred = model(x)
                y_pred = torch.sigmoid(y_pred)
                res = np.append(res, y_pred.detach().cpu().numpy())
            preds += res
    preds /= TTA
    return preds

In [None]:
prediction = np.array([0.]*len(test_df))
for i in range(len(models)):
    test_iterator = DataLoader(dataset=test, batch_size=16, shuffle=False, num_workers=num_workers)
    preds = get_predictions(models[i], test_iterator, device)
    prediction += preds
prediction /= len(models)

In [None]:
sub_df = pd.read_csv('../input/siim-isic-melanoma-classification/sample_submission.csv')
sub_df = sub_df[:50] if DEBUG else sub_df
sub_df['target'] = prediction

sub_df.to_csv('submission.csv', index=False) #indexをfalseにしないと、先頭列にindex情報が付加されたcsvファイルが出力されるので注意
sub_df.head()