# Dogs vs. Cats Redux
- 環境はGoogle Colaboraroryです。
- kaggle apiを使用するため、事前にgdriveにkaggle.jsonをアップロードしておく必要があります。
- kerasの学習済みモデルを利用し、5foldを行います。

### gdriveへのアクセスとか 
- 参考:https://qiita.com/yukkyo/items/eb9bae0b82248f9abd28

In [None]:
from googleapiclient.discovery import build
import io, os
from googleapiclient.http import MediaIoBaseDownload
from google.colab import auth

auth.authenticate_user()

drive_service = build('drive', 'v3')
results = drive_service.files().list(
        q="name = 'kaggle.json'", fields="files(id)").execute()
kaggle_api_key = results.get('files', [])

filename = "/content/.kaggle/kaggle.json"
os.makedirs(os.path.dirname(filename), exist_ok=True)

request = drive_service.files().get_media(fileId=kaggle_api_key[0]['id'])
fh = io.FileIO(filename, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%." % int(status.progress() * 100))
os.chmod(filename, 600)

In [None]:
# Install a Drive FUSE wrapper.
# https://github.com/astrada/google-drive-ocamlfuse
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse

# Generate creds for the Drive FUSE library.
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

### gdriveマウント用ディレクトリ作成、マウント

In [11]:
%cd /content
!mkdir -p drive
!google-drive-ocamlfuse -o nonempty drive

/content


### datasetのダウンロード

In [None]:
!pip install kaggle
!kaggle competitions download -c dogs-vs-cats-redux-kernels-edition

In [None]:
!unzip /content/.kaggle/competitions/dogs-vs-cats-redux-kernels-edition/train.zip 
!unzip /content/.kaggle/competitions/dogs-vs-cats-redux-kernels-edition/test.zip 

### ディレクトリの作成と学習データの移動

In [6]:
%cd /content
%mkdir valid
%mkdir valid/dogs
%mkdir valid/cats

%cd /content/train
%mkdir cats dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

%cd /content/test
%mkdir unknown
%mv *.jpg unknown/
%cd /content

/content


### 本文

In [12]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Dropout, GlobalAveragePooling2D
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import CSVLogger, ModelCheckpoint
from keras.applications import inception_resnet_v2,xception,densenet,resnet50,inception_v3

Using TensorFlow backend.


In [13]:
#lay = VGG16(weights='imagenet', include_top=True)
lay = inception_resnet_v2.InceptionResNetV2(weights='imagenet', include_top=True)
#lay = xception.Xception(include_top=True, weights='imagenet')
#lay=densenet.DenseNet201(include_top=True, weights='imagenet')
#lay=resnet50.ResNet50(include_top=True, weights='imagenet')
#lay=inception_v3.InceptionV3(include_top=True, weights='imagenet')

# 全層の重みをフリーズする
for layer in lay.layers:
    lay.trainable = False
    
# 2クラス分類する出力層を追加
x = lay.layers[-1].output
x = Dense(2, activation='softmax', name='predictionsv2')(x)
model = Model(inputs=lay.inputs, outputs=x)

# コンパイル
model.compile(optimizer=SGD(lr=0.01, momentum=0.9, decay=1e-6, nesterov=True),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.7/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5


### 学習の実行
- google colaboratoryはバックグラウンド実行は推奨されていない。
- いつ切断されてもいいよう1fold毎にモデルと出力をgdriveに保存する

In [27]:
import gc
import os
import shutil
import pandas as pd
from glob import glob
import numpy as np
from sklearn.model_selection import KFold

X=[i for i in range(12500)]
kf = KFold(n_splits=5)

#cv用リストの作成
doglist=np.array(os.listdir("/content/train/dogs/"))
catlist=np.array(os.listdir("/content/train/cats/"))

fnum=0
for train_index, valid_index in kf.split(X):
    fnum+=1   
    
    #fold毎にtrainとvalidのデータを変更
    for i in catlist[valid_index]:  
        os.rename("/content/train/cats/{}".format(i), "/content/valid/cats/{}".format(i))
    for i in doglist[valid_index]:  
        os.rename("/content/train/dogs/{}".format(i), "/content/valid/dogs/{}".format(i))

    gen = ImageDataGenerator()

    #64枚ずつ学習
    batch_size = 64
    train_batches = gen.flow_from_directory('/content/train/',
                                            target_size=(224, 224),
                                            class_mode='categorical',
                                            shuffle=True,
                                            batch_size=batch_size,
                                            )

    val_batches = gen.flow_from_directory('/content/valid/',
                                        target_size=(224, 224),
                                        class_mode='categorical',
                                        shuffle=True,
                                        batch_size=batch_size)

    #logger作成
    logger = CSVLogger('/content/history_k{}.log'.format(fnum))

    # monitorしている指標が改善したらモデル（構造・重み含む）をファイルに出力する
    #checkpoint = ModelCheckpoint(
    #    'dns.{epoch:02d}-{val_loss:.3f}-{val_acc:.3f}.h5',
    #    monitor='val_loss',
    #    verbose=1,
    #    save_best_only=True,
    #    mode='auto')

    #モデルの実行
    model.fit_generator(
        train_batches,
        steps_per_epoch=int(np.ceil(train_batches.samples / batch_size)),
        epochs=5,
        validation_data=val_batches,
        validation_steps=int(np.ceil(train_batches.samples / batch_size)),
        #callbacks=[logger, checkpoint])
        callbacks=[logger])

    #モデルを毎fold保存、gdriveにアップロード
    model.save('k{}_model_dogs_vs_cats.h5'.format(fnum))
    shutil.copyfile("k{}_model_dogs_vs_cats.h5".format(fnum), "/content/drive/k{}_model_dogs_vs_cats.h5".format(fnum))

    #予測
    test_batches = gen.flow_from_directory('/content/test/',
                                           target_size=(224, 224),
                                           class_mode='categorical',
                                           shuffle=False,
                                           batch_size=batch_size,
                                           )
    #数分かかる
    preds = model.predict_generator(test_batches, int(np.ceil(test_batches.samples / batch_size)))
    isdog = preds[:, 1]

    # ファイル名からIDを取得
    # unknown/8250.jpg => 8250
    filenames=np.array(test_batches.filenames)
    ids = np.array([int(f[8:f.find('.')]) for f in filenames])

    # IDと予測確率を列方向に結合
    subm = np.stack([ids, isdog], axis=1)

    #submitファイル作成
    out=pd.DataFrame(subm)
    out.columns=["id","label"]
    out.id=out.id.astype(int)
    out.to_csv("result_k{}.csv".format(fnum),index=False)

    #gdriveにアップロード
    shutil.copyfile("result_k{}.csv".format(fnum), "/content/drive/result_k{}.csv".format(fnum))
    shutil.copyfile("/content/history_k{}.log".format(fnum), "/content/drive/history_k{}.log".format(fnum))

    #メモリ解法
    gc.collect()

    #cvするため学習データを戻す
    %cd /content/valid
    for i in catlist[valid_index]:  
        os.rename("/content/valid/cats/{}".format(i), "/content/train/cats/{}".format(i))
    for i in doglist[valid_index]:  
        os.rename("/content/valid/dogs/{}".format(i), "/content/train/dogs/{}".format(i))

Found 20000 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Found 12500 images belonging to 1 classes.
/content/valid
