<a href="https://colab.research.google.com/github/nalgo-intern/team-a/blob/master/src/learning/transfer_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **機械学習コード**

In [0]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

前処理済みの学習データであるtrainフォルダとvalidationフォルダを圧縮してからGoogleDriveにあげて共有可能なリンクを取得してid以下を下のセルの該当箇所にそれぞれ入力する。

In [0]:
downloaded = drive.CreateFile({'id':'1WxOK8oyxZ_jJkaJ4zLa_nbfnmDijh0M9'}) #共有可能なリンクのid以下を入力

downloaded.GetContentFile('train.zip') #圧縮してドライブにあげたtrainフォルダ

In [0]:
downloaded = drive.CreateFile({'id':'1oiDVeU8A14ukiD247w8G_xueVhwiy2uK'})

downloaded.GetContentFile('validation.zip')

In [0]:
#学習データを解凍
!unzip train.zip
!unzip validation.zip

Archive:  train.zip
   creating: train/chimpanzee/
  inflating: train/chimpanzee/10037667855.jpg  
  inflating: train/chimpanzee/10059377785.jpg  
  inflating: train/chimpanzee/10803455094.jpg  
  inflating: train/chimpanzee/11581412744.jpg  
  inflating: train/chimpanzee/11950809213.jpg  
  inflating: train/chimpanzee/12184235744.jpg  
  inflating: train/chimpanzee/14974695853.jpg  
  inflating: train/chimpanzee/14993392243.jpg  
  inflating: train/chimpanzee/15039218068.jpg  
  inflating: train/chimpanzee/15165394638.jpg  
  inflating: train/chimpanzee/15276624803.jpg  
  inflating: train/chimpanzee/15610867861.jpg  
  inflating: train/chimpanzee/15618200812.jpg  
  inflating: train/chimpanzee/15657269754.jpg  
  inflating: train/chimpanzee/15896275615.jpg  
  inflating: train/chimpanzee/16115811169.jpg  
  inflating: train/chimpanzee/16277952091.jpg  
  inflating: train/chimpanzee/16501300762.jpg  
  inflating: train/chimpanzee/16624083417.jpg  
  inflating: train/chimpanzee/1669927

### 転移学習

In [0]:
#モデルの設計（ニューラルネットワークモデル）

from keras.models import Model
from keras.layers import Dense,Input,GlobalMaxPooling2D,Dropout
from keras.applications.vgg16 import VGG16 #keras.applications.vgg16モジュールに入っている学習済みモデルVGG16をインポート
from keras.preprocessing.image import ImageDataGenerator #訓練データと検証データを生成する道具をインポート
from keras.optimizers import SGD

N_CATEGORIES = 3 #分類するクラスの数を入力
IMAGE_SIZE = 224 #使う画像の大きさ
BATCH_SIZE = 8 #1バッチ（訓練データの1かたまり）に含めるデータ数であるバッチサイズを入力

NUM_TRAINING = 288 #訓練データの総数を入力
NUM_VALIDATION = 72 #検証データの総数を入力

#入力するデータのサイズを指定（切り取りではなく引き延ばし）
input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3)) #引数は、(縦, 横,RGB)

#重みと、VGG16のフル結合層を含むかどうかと、入力データを指定
base_model = VGG16(weights='imagenet', include_top=False,input_tensor=input_tensor)
#imagnetとすることで、ImageNetImageNetを使って学習した重みになる。
#VGG16の1000分類を使わないのでFalseとする。
#入力データをinput_tensorとする。

#学習済みモデルに独自の層を追加
x = base_model.output
x = GlobalMaxPooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dense(2048, activation='relu')(x)
x = Dropout(.25)(x)
x = Dense(1024, activation='relu')(x)


predictions = Dense(N_CATEGORIES, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers[:15]:
   layer.trainable = False

model.compile(optimizer=SGD(lr=1e-4, momentum=0.9), loss='categorical_crossentropy',metrics=['accuracy'])

model.summary() #モデル構造（VGG16の層と独自に作った層）を見る

train_datagen = ImageDataGenerator(
   rescale=1.0 / 255,
   shear_range=0,
   zoom_range=0.1,
   horizontal_flip=True,
   rotation_range=0)

test_datagen = ImageDataGenerator(
   rescale=1.0 / 255,
)
train_generator = train_datagen.flow_from_directory(
   'train',
   target_size=(IMAGE_SIZE, IMAGE_SIZE),
   batch_size=BATCH_SIZE,
   class_mode='categorical',
   shuffle=True
)

validation_generator = test_datagen.flow_from_directory(
   'validation',
   target_size=(IMAGE_SIZE, IMAGE_SIZE),
   batch_size=BATCH_SIZE,
   class_mode='categorical',
   shuffle=True
)

hist = model.fit_generator(train_generator,
   steps_per_epoch=NUM_TRAINING//BATCH_SIZE,
   epochs=50, #訓練データを何回学習させるかというエポック数を入力
   verbose=1, #0だとログを出力せず、1だと標準出力、2はエポックごとに1行のログを出力
   validation_data=validation_generator,
   validation_steps=NUM_VALIDATION//BATCH_SIZE,
   )
#monkey.hdf5という形でモデルを保存

model.save('monkey.hdf5')


#↓学習の進捗状況・精度のログ

Using TensorFlow backend.
W0830 03:30:10.501474 140300576204672 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:66: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0830 03:30:10.516591 140300576204672 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:541: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0830 03:30:10.519755 140300576204672 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4432: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0830 03:30:10.549531 140300576204672 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4267: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0830 03:30:10.940183 1403005

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0   

W0830 03:30:12.203651 140300576204672 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [0]:
upload_file_2 = drive.CreateFile()
upload_file_2.SetContentFile("monkey.hdf5")
upload_file_2.Upload()
#マイドライブに monkey.hdf5  があがる。