# 概要
Tensorflow, Kerasによる画像分類の方法について説明します。<br> 
このnotebookでは学習済みモデルを使用して予測結果の提出を行います。<br> 

学習 : https://www.kaggle.com/takuyatone/cassava-keras-tf-baseline-training/notebook

### 1. 準備 
- ファイル構成
- ライブラリのインポート
- 設定
- データの読み込み

### 2. 推論
- 分類モデルの定義
- 推論用データセットの作成
- 評価用データに対しての推論
- 提出物の作成
- 評価スコアの改善に向けて

# 1. 準備 

## ファイル構成

In [None]:
!ls /kaggle/input/cassava-leaf-disease-classification

## ライブラリのインポート

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential, Model,load_model
from tensorflow.keras.applications.vgg16 import VGG16,preprocess_input
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import Conv2D, MaxPool2D, GlobalAveragePooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import cv2

In [None]:
# seed固定
def seed_everything(seed=1234):
    #random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything(seed=42)

In [None]:
# GPUの確認
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

## 設定

In [None]:
class CFG:
    debug=True
    size=64
    epochs=8
    batch_size=64
    val_batch_size=128
    seed=42
    target_size=5
    target_col='label'
    n_fold=5
    trn_fold=[0, 1, 2, 3, 4]

## データの読み込み

In [None]:
# ====================================================
# Directory settings
# ====================================================
if os.path.exists('/kaggle/input'):
    # kaggle環境
    DATA_DIR = '/kaggle/input/cassava-leaf-disease-classification/'
else:
    # ローカル環境
    DATA_DIR = '../../data/raw/'
    
OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [None]:
train = pd.read_csv(DATA_DIR + 'train.csv')
test = pd.read_csv(DATA_DIR + 'sample_submission.csv')
label_map = pd.read_json(DATA_DIR + 'label_num_to_disease_map.json', 
                         orient='index')
display(train.head())
display(test.head())
display(label_map)

# 2. 推論

## 分類モデルの定義

In [None]:
def vgg16_model(num_classes=None):

    base_model = VGG16(weights=None, include_top=False, input_shape=(CFG.size, CFG.size, 3), pooling='avg')
    output = Dense(num_classes, activation='softmax')(base_model.output)
    model = Model(base_model.input, output)
    
    return model

## 推論用データセットの作成

In [None]:
test['label'] = test['label'].astype(str)

model = vgg16_model(num_classes=CFG.target_size)
weights_path = [f'../input/cassava-tf-vgg16/fold-{fold}.h5' for fold in CFG.trn_fold]
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(dataframe = test,
                                        directory = DATA_DIR + "test_images",
                                        x_col = 'image_id',
                                        y_col = 'label',
                                        target_size = (CFG.size, CFG.size),
                                        color_mode = "rgb",
                                        class_mode = "categorical",
                                        batch_size = CFG.val_batch_size,
                                        shuffle = False)

In [None]:
weights_path

## 評価用データに対しての推論

In [None]:
def inference(model, weights_path, test_generator):
    
    preds = []
    for weight in weights_path:
        print('Loading best model...')
        model.load_weights(weight)
        print('Predicting Test...')
        y_preds = model.predict(test_generator, verbose=1)
        preds.append(y_preds)
    probs = np.mean(preds, axis=0)
    return probs

In [None]:
predictions = inference(model, weights_path, test_generator)
predictions

## 提出物の作成

In [None]:
test['label'] = predictions.argmax(1)
test[['image_id', 'label']].to_csv(OUTPUT_DIR+'submission.csv', index=False)

In [None]:
test[['image_id', 'label']]

## 評価スコアの改善に向けて

・より高性能なモデルの採用<br>
・学習時のデータオーギュメンテーション(データ水増し)の変更<br>
・損失関数・最適化手法の変更<br>
・推論時のデータオーギュメンテーション(Test Time Augmentation)<br>
・複数モデルのアンサンブル<br>
・データ特有の課題への対応<br>