<a href="https://colab.research.google.com/github/dev-kentayamada/machine-learning/blob/main/transfer_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##パッケージのインポート

In [None]:
!pip install -U tensorflow-addons
!pip install -U plotly
!pip install -U kaleido

import tensorflow as tf
import tensorflow_addons as tfa
import pytz
import numpy as np
import os
import sys
import random
import datetime
import plotly.graph_objects as go
from PIL import Image
import plotly.express as px

random.seed(0) #乱数シードを固定
tf.random.set_seed(0) #乱数シードを固定

##TPUの初期化

In [None]:
try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
except ValueError:
  raise BaseException('ERROR: Not connected to a TPU runtime')
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)

##データセットの用意

###ダウンロード

In [None]:
from google.colab import files

files.upload()
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! mkdir /content/downloads
! kaggle competitions download -c dogs-vs-cats -p /content/downloads
! unzip /content/downloads/train.zip -d /content/downloads
! unzip /content/downloads/test1.zip -d /content/downloads
! rm kaggle.json /content/downloads/test1.zip /content/downloads/train.zip

###整理

In [None]:
import re
import shutil

train_downloads_dir = '/content/downloads/train'
train_path_v1 = '/content/datasets/v1/train'
val_path_v1 = '/content/datasets/v1/val'
test_path_v1 = '/content/datasets/v1/test'
train_path_v2 = '/content/datasets/v2/train'
test_path_v2 = '/content/datasets/v2/test'

all_imgs = os.listdir(train_downloads_dir)

all_dogs = [train_all_img for train_all_img in all_imgs if re.match('dog', train_all_img)]
all_cats = [train_all_img for train_all_img in all_imgs if re.match('cat', train_all_img)]

# シャッフル
random.shuffle(all_dogs)
random.shuffle(all_cats)

# v1
# train_dogs[5000] test_dogs[6250] val_dogs[1250]
# train_cats[5000] test_cats[6250] val_cats[1250]
os.makedirs(f'{train_path_v1}/dogs', exist_ok=True)
os.makedirs(f'{test_path_v1}/dogs', exist_ok=True)
os.makedirs(f'{val_path_v1}/dogs', exist_ok=True)
os.makedirs(f'{train_path_v1}/cats', exist_ok=True)
os.makedirs(f'{test_path_v1}/cats', exist_ok=True)
os.makedirs(f'{val_path_v1}/cats', exist_ok=True)

for dog, cat in zip(all_dogs[:5000], all_cats[:5000]):
  shutil.copy(f"{train_downloads_dir}/{dog}", f"{train_path_v1}/dogs/{dog}")
  shutil.copy(f"{train_downloads_dir}/{cat}", f"{train_path_v1}/cats/{cat}")

for dog, cat in zip(all_dogs[5000:11250], all_cats[5000:11250]):
  shutil.copy(f"{train_downloads_dir}/{dog}", f"{test_path_v1}/dogs/{dog}")
  shutil.copy(f"{train_downloads_dir}/{cat}", f"{test_path_v1}/cats/{cat}")

for dog, cat in zip(all_dogs[11250:], all_cats[11250:]):
  shutil.copy(f"{train_downloads_dir}/{dog}", f"{val_path_v1}/dogs/{dog}")
  shutil.copy(f"{train_downloads_dir}/{cat}", f"{val_path_v1}/cats/{cat}")


# v2
# train_dogs[6250] train_cats[6250]
# val_dogs[6250] val_cats[6250]
os.makedirs(f'{train_path_v2}/dogs', exist_ok=True)
os.makedirs(f'{train_path_v2}/cats', exist_ok=True)
os.makedirs(f'{test_path_v2}/dogs', exist_ok=True)
os.makedirs(f'{test_path_v2}/cats', exist_ok=True)

for dog, cat in zip(all_dogs[:len(all_dogs)//2], all_cats[:len(all_cats)//2]):
  shutil.copy(f"{train_downloads_dir}/{dog}", f"{train_path_v2}/dogs/{dog}")
  shutil.copy(f"{train_downloads_dir}/{cat}", f"{train_path_v2}/cats/{cat}")

for dog, cat in zip(all_dogs[len(all_dogs)//2:], all_cats[len(all_cats)//2:]):
  shutil.copy(f"{train_downloads_dir}/{dog}", f"{test_path_v2}/dogs/{dog}")
  shutil.copy(f"{train_downloads_dir}/{cat}", f"{test_path_v2}/cats/{cat}")

##変数の作成

In [None]:
import pathlib
import glob

v1_all_imgs_train = [str(path) for path in list(pathlib.Path(train_path_v1).glob('*/*'))]
v1_all_imgs_val = [str(path) for path in list(pathlib.Path(val_path_v1).glob('*/*'))]
v1_all_imgs_test = [str(path) for path in list(pathlib.Path(test_path_v1).glob('*/*'))]

v2_all_imgs_train = [str(path) for path in list(pathlib.Path(train_path_v2).glob('*/*'))]
v2_all_imgs_test = [str(path) for path in list(pathlib.Path(test_path_v2).glob('*/*'))]

# シャッフル
random.shuffle(v1_all_imgs_train)
random.shuffle(v1_all_imgs_val)
random.shuffle(v1_all_imgs_test)

random.shuffle(v2_all_imgs_train)
random.shuffle(v2_all_imgs_test)

label_names = sorted(item.name for item in pathlib.Path(train_path_v1).glob('*/') if item.is_dir())
label_to_index = dict((name, index) for index,name in enumerate(label_names))

v1_all_labels_train = [label_to_index[pathlib.Path(path).parent.name] for path in v1_all_imgs_train]
v1_all_labels_val = [label_to_index[pathlib.Path(path).parent.name] for path in v1_all_imgs_val]
v1_all_labels_test = [label_to_index[pathlib.Path(path).parent.name] for path in v1_all_imgs_test]

v2_all_labels_train = [label_to_index[pathlib.Path(path).parent.name] for path in v2_all_imgs_train]
v2_all_labels_test = [label_to_index[pathlib.Path(path).parent.name] for path in v2_all_imgs_test]

##NPZデータセットの作成

In [None]:
IMG_SIZE = 96
IMG_MODE = "RGB"

x_train = [np.asarray(Image.open(img_train).convert(IMG_MODE).resize((IMG_SIZE, IMG_SIZE))) for img_train in v1_all_imgs_train]
y_train = [label_train for label_train in v1_all_labels_train]

x_val = [np.asarray(Image.open(img_test).convert(IMG_MODE).resize((IMG_SIZE, IMG_SIZE))) for img_test in v1_all_imgs_val]
y_val = [label_test for label_test in v1_all_labels_val]

x_test = [np.asarray(Image.open(img_test).convert(IMG_MODE).resize((IMG_SIZE, IMG_SIZE))) for img_test in v1_all_imgs_test]
y_test = [label_test for label_test in v1_all_labels_test]

#numpy配列に変換
x_train = np.array(x_train)
y_train = np.array(y_train)
x_val = np.array(x_val)
y_val = np.array(y_val)
x_test = np.array(x_test)
y_test = np.array(y_test)

#save
np.savez(f'/content/datasets/v2/{IMG_SIZE}_{IMG_MODE}_catDog', x_train=x_train, y_train=y_train, x_val=x_val, y_val=y_val, x_test=x_test, y_test=y_test)

##NPZデータセットを読み込む

In [None]:
# npz ファイルからのロード
data = np.load('/content/datasets/v2/96_RGB_catDog.npz')
x_train = data['x_train']
y_train = data['y_train']
x_val = data['x_val']
y_val = data['y_val']
x_test = data['x_test']
y_test = data['y_test']

# NumPy 配列をロード
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
val_ds = tf.data.Dataset.from_tensor_slices((x_val, y_val))
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))

# データセットのシャッフルとバッチ化
BATCH_SIZE = 16 * tpu_strategy.num_replicas_in_sync

AUTOTUNE = tf.data.AUTOTUNE

data_augmentation = tf.keras.Sequential([
  tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
  tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
  tf.keras.layers.experimental.preprocessing.RandomZoom(0.2),
])

def prepare(ds, shuffle=False, augment=False, one_hot=False):
  if one_hot:
    ds = ds.map(lambda image, label: (tf.cast(image, tf.float32) / 255.0, tf.cast(tf.one_hot(tf.cast(label, tf.int64), 2), tf.float32)), num_parallel_calls=AUTOTUNE)
  else:
    ds = ds.map(lambda image, label: (tf.cast(image, tf.float32) / 255.0, tf.cast(label, tf.float32)), num_parallel_calls=AUTOTUNE)

  # キャシュ化する
  # ds = ds.cache(filename='./cache.tf-data')

  if shuffle:
    ds = ds.shuffle(1000)

  # Batch all datasets
  ds = ds.batch(BATCH_SIZE)

  # Use data augmentation only on the training set
  if augment:
    ds = ds.map(lambda image, label: (data_augmentation(image, training=True), label), num_parallel_calls=AUTOTUNE)

  # Use buffered prefecting on all datasets
  return ds.prefetch(buffer_size=AUTOTUNE)

train_ds_normal = prepare(train_ds, shuffle=True, augment=True)
val_ds_normal = prepare(val_ds)
test_ds_normal = prepare(test_ds)

train_ds_categorical = prepare(train_ds, shuffle=True, augment=True, one_hot=True)
val_ds_categorical = prepare(val_ds, one_hot=True)
test_ds_categorical = prepare(test_ds, one_hot=True)

###可視化

In [None]:
def convert_ds(dataset, n_examples):
  return dataset.unbatch().batch(n_examples).as_numpy_iterator()

converted_ds = convert_ds(test_ds_categorical, 9)

In [None]:
CLASSES = ['Cat', 'Dog']

# Re-run this cell to show a new batch of images
images, classes = next(converted_ds)

if isinstance(classes[0], np.ndarray):
  class_idxs = np.argmax(classes, axis=-1) # transform from one-hot array to class number
  labels = [CLASSES[idx] for idx in class_idxs]
else:
  labels = [CLASSES[int(idx)] for idx in classes]

fig = px.imshow(np.squeeze(np.array(images)), facet_col=0, facet_col_wrap=3)
fig.for_each_annotation(lambda a: a.update(text=(labels[int(a.text.split("=")[-1])])))
fig.show()

##直接データセットを読み込む

In [None]:
# 検証用のtf.data.Datasetを作成
BATCH_SIZE = 32
IMG_SIZE = 160
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_path_v2,
    validation_split=0.2,
    seed = 123,
    subset="training",
    batch_size=BATCH_SIZE,
    image_size=(IMG_SIZE, IMG_SIZE)
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_path_v2,
    validation_split=0.2,
    seed = 123,
    subset="validation",
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
)

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    test_path_v2,
    batch_size=BATCH_SIZE,
    image_size=(IMG_SIZE, IMG_SIZE)
)

# バッファリングされたプリフェッチを使用して、I / Oがブロックされることなくディスクからイメージをロード
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

##モデルを構築する

###TPU

####モデル

In [None]:
DATE_TIME = datetime.datetime.now(pytz.timezone('Asia/Tokyo')).strftime("%Y-%m-%d_%H:%M:%S")

DIR_MODEL_1 = f"saved_models/TPU/model_1/{DATE_TIME}"
DIR_MODEL_2 = f"saved_models/TPU/model_2/{DATE_TIME}"
DIR_MODEL_3 = f"saved_models/TPU/model_3/{DATE_TIME}"

def create_model(model_number):
  if model_number == 1:
    base_model = tf.keras.applications.MobileNetV2(input_shape=x_train.shape[1:], include_top=False, weights='imagenet', pooling='avg')
    base_model.trainable = False
    model = tf.keras.Sequential([
                                base_model,
                                tf.keras.layers.Dropout(0.5),
                                tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=['accuracy']
    )
  elif model_number == 2:
    base_model = tf.keras.applications.EfficientNetB7(input_shape=x_train.shape[1:], include_top=False, weights='imagenet', pooling='avg')
    base_model.trainable = False
    model = tf.keras.Sequential([
                                base_model,
                                tf.keras.layers.Dense(len(set(y_train)), activation='softmax')
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )
  elif model_number == 3:
    base_model = tf.keras.applications.Xception(input_shape=x_train.shape[1:], include_top=False)
    base_model.trainable = False
    model = tf.keras.Sequential([
                                base_model,
                                tf.keras.layers.GlobalAveragePooling2D(),
                                tf.keras.layers.Dense(len(set(y_train)), activation='softmax')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=['accuracy']
    )
  else:
    print('Error: Not Defined', file=sys.stderr)
    sys.exit(1)

  model.summary()
  return model

def plot_history(hist, test_acc):
  fig = go.Figure()
  fig.add_trace(go.Scatter(x=np.arange(1, len(hist.epoch)+1, 1), y=hist.history['loss'], name="train_loss", line = dict(color='royalblue', dash='dash')))
  fig.add_trace(go.Scatter(x=np.arange(1, len(hist.epoch)+1, 1), y=hist.history['val_loss'], name="val_loss", line = dict(color='firebrick', dash='dot')))
  fig.add_trace(go.Scatter(x=np.arange(1, len(hist.epoch)+1, 1), y=hist.history['accuracy'], name="train_acc", line=dict(color='royalblue')))
  fig.add_trace(go.Scatter(x=np.arange(1, len(hist.epoch)+1, 1), y=hist.history['val_accuracy'], name="val_acc", line=dict(color='firebrick')))
  fig.update_layout(
      xaxis=dict(dtick=1),
      title=f'Loss: {round(test_acc[0], 4)} / Accurecy: {round(test_acc[1]*100)}%',
      title_x=0.5,
      xaxis_title='Epoch',
      yaxis_title='Loss/Accuracy'
  )
  fig.show()

  return fig


def train(model_number, num_epochs, plot):
  default_callback = [
                      tfa.callbacks.TQDMProgressBar(),
                      # tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-2, patience=3, verbose=1
  ]
  
  if model_number == 1:
    # モデルをトレーニングする
    hist = model.fit(
        train_ds_normal,
        epochs=num_epochs,
        validation_data=val_ds_normal,
        callbacks=default_callback,
        verbose=0
    )
    # SAVE
    os.makedirs(f'{DIR_MODEL_1}', exist_ok=True)
    model.save(f'{DIR_MODEL_1}/model.h5')

    # Evaluate
    test_acc = model.evaluate(test_ds_normal, callbacks=default_callback, verbose=0)

    if plot: plot_history(hist, test_acc).write_image(f"{DIR_MODEL_1}/model.png")

  elif model_number == 2:
    # モデルをトレーニングする
    hist = model.fit(
        train_ds_normal,
        epochs=num_epochs,
        validation_data=val_ds_normal,
        callbacks=default_callback,
        verbose=0
    )

    # SAVE
    os.makedirs(f'{DIR_MODEL_2}', exist_ok=True)
    model.save(f'{DIR_MODEL_2}/model.h5')

    # Evaluate
    test_acc = model.evaluate(test_ds_normal, callbacks=default_callback, verbose=0)
  
    if plot: plot_history(hist, test_acc).write_image(f"{DIR_MODEL_2}/model.png")

  elif model_number == 3:
    def lrfn(epoch):
      start_lr = 0.00001
      min_lr = 0.00001
      max_lr = 0.00005 * tpu_strategy.num_replicas_in_sync
      rampup_epochs = 5
      sustain_epochs = 0
      exp_decay = .8
      if epoch < rampup_epochs: return (max_lr - start_lr)/rampup_epochs * epoch + start_lr
      elif epoch < rampup_epochs + sustain_epochs: return max_lr
      else: return (max_lr - min_lr) * exp_decay**(epoch-rampup_epochs-sustain_epochs) + min_lr

    fig = go.Figure(data=go.Scatter(x=np.arange(1, num_epochs+1, 1), y=[lrfn(x) for x in np.arange(num_epochs)]))
    fig.update_layout(
        xaxis=dict(dtick=1),
        title='Learning Rate Per Epoch',
        yaxis=dict(tickformat=".5f"),
        title_x=0.5,
        xaxis_title='Epoch',
        yaxis_title='Learning Rate'
    )
    fig.show()

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda epoch: lrfn(epoch), verbose=1)

    # モデルをトレーニングする
    hist = model.fit(
        train_ds_categorical,
        epochs=num_epochs,
        validation_data=val_ds_categorical,
        callbacks=default_callback + [lr_callback],
        verbose=0
    )

    # SAVE
    os.makedirs(f'{DIR_MODEL_3}', exist_ok=True)
    model.save(f'{DIR_MODEL_3}/model.h5')

    # Evaluate
    test_acc = model.evaluate(test_ds_categorical, callbacks=default_callback, verbose=0)

    if plot: plot_history(hist, test_acc).write_image(f"{DIR_MODEL_3}/model.png")
  else:
    print('Error: Not Defined', file=sys.stderr)
    sys.exit(1)

MODEL = 1
EPOCHS = 10

with tpu_strategy.scope(): # creating the model in the TPUStrategy scope means we will train the model on the TPU
  model = create_model(MODEL)
train(MODEL, EPOCHS, True)

tf.keras.backend.clear_session() # 計算グラフを破棄する
del model                        # 変数を削除する

####評価と予測

In [None]:
#保存したモデルを読み込む
model = tf.keras.models.load_model('/content/saved_models/TPU/model_3/2021_04_14__00_33_36.h5')
test_acc = model.evaluate(test_ds_categorical, verbose=0)
# test_acc = model.evaluate(test_ds_normal, verbose=0)

#####テストデータ

In [None]:
def convert_ds(dataset, n_examples):
  return dataset.unbatch().batch(n_examples).as_numpy_iterator()

def img_title(label, prediction):
  if isinstance(label, np.ndarray):
    class_idx = np.argmax(label, axis=-1) # transform from one-hot array to class number
    prediction_idx = np.argmax(prediction, axis=-1)
    collect = f'Cat: {round(prediction[0]*100)}%, Dog: {round(prediction[1]*100)}%', 'black'
    wrong = f'Cat: {round(prediction[0]*100)}%, Dog: {round(prediction[1]*100)}%', 'red'
  else:
    class_idx = int(label) # transform from one-hot array to class number
    prediction_idx = 0 if float(prediction[0]) < 0.5 else 1
    collect = f'{round(float(prediction[0]), 3)}', 'black'
    wrong = f'{round(float(prediction[0]), 3)}', 'red'
  return collect if class_idx == prediction_idx else wrong

def get_titles(images, labels, model):
  predictions = model.predict(images)
  titles, colors = [], []
  for label, prediction in zip(classes, predictions):
    title, color = img_title(label, prediction)
    titles.append(title)
    colors.append(color)
  return titles, colors

converted_ds_categorical = convert_ds(test_ds_categorical, 9)
converted_ds_normal = convert_ds(test_ds_normal, 9)

In [None]:
# Re-run this cell to show a new batch of images
images, classes = next(converted_ds_categorical)
titles, colors = get_titles(images, classes, model)

fig = px.imshow(np.squeeze(np.array(images)), facet_col=0, facet_col_wrap=3)
fig.for_each_annotation(lambda a: a.update(text=(titles[int(a.text.split("=")[-1])]), bordercolor="red") if colors[int(a.text.split("=")[-1])] == 'red' else a.update(text=(titles[int(a.text.split("=")[-1])])))
fig.update_layout(title_x=0.5, title_text=f"Loss: {round(test_acc[0], 3)}, Acc: {round(test_acc[1]*100)}%")
fig.show()

#####オリジナルデータ

In [None]:
CLASSES = ['Cat', 'Dog']

file_name = list(files.upload())[0]

img = np.asarray(Image.open(file_name).convert('RGB').resize((IMG_SIZE, IMG_SIZE))) / 255.0
pred = model.predict(np.array([img]))
if len(pred[0]) == 1:
  title = f'This is a {CLASSES[0] if pred[0] < 0.5 else CLASSES[1]} ({round(float(pred[0]),3)})'
else:
  title = f"This is a {CLASSES[int(np.argmax(pred, axis=1))]} (Cat: {round(pred[0][0]*100)}%, Dog: {round(pred[0][1]*100)}%)"
fig = px.imshow(img)
fig.update_layout(
    title_x=0.5,
    title_text=title
)
fig.show()

os.remove(f'./{file_name}')

###GPU

####モデル

#####[MobileNetV2](https://www.tensorflow.org/tutorials/images/transfer_learning)

In [None]:
DATE_TIME = datetime.datetime.now(pytz.timezone('Asia/Tokyo')).strftime("%Y-%m-%d_%H:%M:%S")
DIR_MODEL = f"saved_models/MobileNetV2/{DATE_TIME}"

os.makedirs(f'{DIR_MODEL}/model_summary', exist_ok=True)
os.makedirs(f'{DIR_MODEL}/model_history', exist_ok=True)

def plot_history(acc, val_acc, loss, val_loss, epochs, test_acc):
  fig = go.Figure()
  fig.add_trace(go.Scatter(x=np.arange(1, epochs+1, 1), y=loss, name="loss", line = dict(color='royalblue', dash='dash')))
  fig.add_trace(go.Scatter(x=np.arange(1, epochs+1, 1), y=val_loss, name="val_loss", line = dict(color='firebrick', dash='dash')))
  fig.add_trace(go.Scatter(x=np.arange(1, epochs+1, 1), y=acc, name="acc", line=dict(color='royalblue')))
  fig.add_trace(go.Scatter(x=np.arange(1, epochs+1, 1), y=val_acc, name="val_acc", line=dict(color='firebrick')))
  fig.update_layout(
      xaxis=dict(dtick=1),
      title=f'Loss: {round(test_acc[0], 4)} / Accurecy: {round(test_acc[1]*100)}%',
      title_x=0.5,
      xaxis_title='Epoch',
      yaxis_title='Loss/Accuracy'
  )
  fig.show()
  return fig

def transfer_learning():
  INIT_EPOCS = 1
  FINE_EPOCS = 1
  BASE_LR = 0.0001

  callbacks = [
               tfa.callbacks.TQDMProgressBar(),
              #  tf.keras.callbacks.TensorBoard(log_dir=f'{DIR_MODEL}/tensorboard',histogram_freq=1),
               tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-2, patience=3, verbose=1),
               tf.keras.callbacks.ModelCheckpoint(
                   filepath='%s/checkpoints/cp_epoch_{epoch}.ckpt'%DIR_MODEL,
                   monitor='val_loss',
                   verbose=1,
                   save_best_only=True,
                   save_weights_only=True
               )
  ]

  # ランダムデータ拡張
  data_augmentation = tf.keras.Sequential([
                                           tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
                                           tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
                                           ])

  # ピクセル値を再スケーリングします
  preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

  # 事前にトレーニングされたconvnetからベースモデルを作成します
  # input_shapeのサイズは 96, 128, 160, 192, 224
  base_model = tf.keras.applications.MobileNetV2(
      input_shape=(IMG_SIZE, IMG_SIZE, 3),
      include_top=False,
      weights='imagenet'
  )

  base_model.trainable = False

  # 分類ヘッドを追加する
  global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

  prediction_layer = tf.keras.layers.Dense(1)

  inputs = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
  x = data_augmentation(inputs)
  x = preprocess_input(x)
  x = base_model(x, training=False)
  x = global_average_layer(x)
  x = tf.keras.layers.Dropout(0.2)(x)
  outputs = prediction_layer(x)
  model = tf.keras.Model(inputs, outputs)

  # モデルをコンパイルする
  model.compile(
      optimizer=tf.keras.optimizers.Adam(lr=BASE_LR),
      loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
      metrics=['accuracy']
  )
  model.summary()
  tf.keras.utils.plot_model(model, f'{DIR_MODEL}/model_summary/transfer_learning.png', show_shapes=True)

  # モデルをトレーニングする
  history = model.fit(
      train_ds,
      epochs=INIT_EPOCS,
      validation_data=val_ds,
      callbacks=callbacks,
      verbose=0,
  )
  # SavedModel
  model.save(f'{DIR_MODEL}/saved_model/transfer_learning')

  #テスト
  test_acc = model.evaluate(test_ds, callbacks=[tfa.callbacks.TQDMProgressBar()], verbose=0)

  #プロット
  acc = history.history['accuracy']
  val_acc = history.history['val_accuracy']
  loss = history.history['loss']
  val_loss = history.history['val_loss']
  plot_history(acc, val_acc, loss, val_loss, INIT_EPOCS, test_acc).write_image(f"{DIR_MODEL}/model_history/transfer_learning.png")

  # MobileNet V2の層の数は154でした。
  # そのうち100までの重み付けはそのままで、残りの54層の重み付けを再学習することで調整させます。 
  base_model.trainable = True
  fine_tune_at = 100
  for layer in base_model.layers[:fine_tune_at]:
    layer.trainable =  False

  model.compile(
      loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
      optimizer = tf.keras.optimizers.RMSprop(lr=BASE_LR/10),
      metrics=['accuracy']
  )
  history_fine = model.fit(
      train_ds,
      epochs=INIT_EPOCS+FINE_EPOCS,
      initial_epoch=INIT_EPOCS,
      validation_data=val_ds,
      callbacks=callbacks,
      verbose=0,
  )

  # SavedModel
  model.save(f'{DIR_MODEL}/saved_model/fine_tuning')

  #テスト
  test_acc = model.evaluate(test_ds, callbacks=[tfa.callbacks.TQDMProgressBar()], verbose=0)

  #プロット
  acc += history.history['accuracy']
  val_acc += history.history['val_accuracy']
  loss += history.history['loss']
  val_loss += history.history['val_loss']
  plot_history(acc, val_acc, loss, val_loss, INIT_EPOCS+FINE_EPOCS, test_acc).write_image(f"{DIR_MODEL}/model_history/fine_tuning.png")

transfer_learning()
tf.keras.backend.clear_session() # 計算グラフを破棄する

#####[Xception](https://www.tensorflow.org/guide/keras/transfer_learning)

In [None]:
DATE_TIME = datetime.datetime.now(pytz.timezone('Asia/Tokyo')).strftime("%Y-%m-%d_%H:%M:%S")
DIR_MODEL = f"saved_models/Xception/{DATE_TIME}"

os.makedirs(f'{DIR_MODEL}/model_summary', exist_ok=True)
os.makedirs(f'{DIR_MODEL}/model_history', exist_ok=True)

def plot_history(acc, val_acc, loss, val_loss, epochs, test_acc):
  fig = go.Figure()
  fig.add_trace(go.Scatter(x=np.arange(1, epochs+1, 1), y=loss, name="loss", line = dict(color='royalblue', dash='dash')))
  fig.add_trace(go.Scatter(x=np.arange(1, epochs+1, 1), y=val_loss, name="val_loss", line = dict(color='firebrick', dash='dash')))
  fig.add_trace(go.Scatter(x=np.arange(1, epochs+1, 1), y=acc, name="acc", line=dict(color='royalblue')))
  fig.add_trace(go.Scatter(x=np.arange(1, epochs+1, 1), y=val_acc, name="val_acc", line=dict(color='firebrick')))
  fig.update_layout(
      xaxis=dict(dtick=1),
      title=f'Loss: {round(test_acc[0], 4)} / Accurecy: {round(test_acc[1]*100)}%',
      title_x=0.5,
      xaxis_title='Epoch',
      yaxis_title='Loss/Accuracy'
  )
  fig.show()
  return fig

def transfer_learning():
  INIT_EPOCS = 1
  FINE_EPOCS = 1

  callbacks = [
               tfa.callbacks.TQDMProgressBar(),
              #  tf.keras.callbacks.TensorBoard(log_dir=f'{DIR_MODEL}/tensorboard',histogram_freq=1),
               tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-2, patience=3, verbose=1),
               tf.keras.callbacks.ModelCheckpoint(
                   filepath='%s/checkpoints/cp_epoch_{epoch}.ckpt'%DIR_MODEL,
                   monitor='val_loss',
                   verbose=1,
                   save_best_only=True,
                   save_weights_only=True
               )
  ]

  # ランダムデータ拡張
  data_augmentation = tf.keras.Sequential([
                                           tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
                                           tf.keras.layers.experimental.preprocessing.RandomRotation(0.1),
                                           ])

  base_model = tf.keras.applications.Xception(
      weights="imagenet",  # Load weights pre-trained on ImageNet.
      input_shape=(IMG_SIZE, IMG_SIZE, 3),
      include_top=False,
  )  # Do not include the ImageNet classifier at the top.

  base_model.trainable = False

  # Create new model on top
  inputs = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
  x = data_augmentation(inputs)  # Apply random data augmentation

  # Normalizationレイヤーを追加して、入力値（最初は[0, 255]範囲）を[-1, 1] [0, 255]範囲にスケーリングします。
  norm_layer = tf.keras.layers.experimental.preprocessing.Normalization() #-1から1までのピクセル値を正規化
  mean = np.array([127.5] * 3)
  var = mean ** 2
  # Scale inputs to [-1, +1]
  x = norm_layer(x)
  norm_layer.set_weights([mean, var])

  # ベースモデルを呼び出すときにtraining=Falseを渡すようにして、推論モードで実行するようにします。これにより、微調整のためにベースモデルのフリーズを解除した後でも、batchnorm統計が更新されません。
  x = base_model(x, training=False)
  x = tf.keras.layers.GlobalAveragePooling2D()(x)
  x = tf.keras.layers.Dropout(0.2)(x)  # 正則化のために、分類レイヤーの前にDropoutレイヤーを追加
  outputs = tf.keras.layers.Dense(1)(x)
  model = tf.keras.Model(inputs, outputs)

  #最上層をトレーニングする
  model.compile(
      optimizer=tf.keras.optimizers.Adam(),
      loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
      metrics=[tf.keras.metrics.BinaryAccuracy()],
  )
  model.summary()
  tf.keras.utils.plot_model(model, f'{DIR_MODEL}/model_summary/transfer_learning.png', show_shapes=True)

  # モデルをトレーニングする
  history = model.fit(
      train_ds,
      epochs=INIT_EPOCS,
      validation_data=val_ds,
      callbacks=callbacks,
      verbose=0,
  )

  # Save
  model.save(f'{DIR_MODEL}/saved_model/transfer_learning')

  #テスト
  test_acc = model.evaluate(test_ds, callbacks=[tfa.callbacks.TQDMProgressBar()], verbose=0)

  #プロット
  acc = history.history['binary_accuracy']
  val_acc = history.history['val_binary_accuracy']
  loss = history.history['loss']
  val_loss = history.history['val_loss']
  plot_history(acc, val_acc, loss, val_loss, INIT_EPOCS, test_acc).write_image(f"{DIR_MODEL}/model_history/transfer_learning.png")

  base_model.trainable = True

  model.compile(
      optimizer=tf.keras.optimizers.Adam(1e-5),  # Low learning rate
      loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
      metrics=[tf.keras.metrics.BinaryAccuracy()],
  )

  history_fine = model.fit(
      train_ds,
      epochs=INIT_EPOCS+FINE_EPOCS,
      initial_epoch=INIT_EPOCS,
      validation_data=val_ds,
      callbacks=callbacks,
      verbose=0,
  )

  # SavedModel
  model.save(f'{DIR_MODEL}/saved_model/fine_tuning')

  #テスト
  test_acc = model.evaluate(test_ds, callbacks=[tfa.callbacks.TQDMProgressBar()], verbose=0)

  #プロット
  acc += history.history['binary_accuracy']
  val_acc += history.history['val_binary_accuracy']
  loss += history.history['loss']
  val_loss += history.history['val_loss']
  plot_history(acc, val_acc, loss, val_loss, INIT_EPOCS+FINE_EPOCS, test_acc).write_image(f"{DIR_MODEL}/model_history/fine_tuning.png")

transfer_learning()
tf.keras.backend.clear_session() # 計算グラフを破棄する

####評価と予測

In [None]:
#保存したモデルを読み込む
model = tf.keras.models.load_model('/content/saved_models/Xception/2021-04-16_01:37:16/saved_model/fine_tuning')
test_acc = model.evaluate(test_ds, verbose=0)

#####テストデータ

In [None]:
#Retrieve a batch of images from the test set
image_batch, label_batch = test_ds.as_numpy_iterator().next()
pred = model.predict_on_batch(image_batch).flatten()

# Apply a sigmoid since our model returns logits
pred = tf.nn.sigmoid(pred)
pred_label = tf.where(pred < 0.5, 0, 1)
pred = [round(float(x), 3) for x in pred]

fig = px.imshow(
    np.squeeze(np.array(image_batch[:9])),
    facet_col=0,
    facet_col_wrap=3
)
fig.for_each_annotation(
    lambda a: a.update(text=(f'{pred[int(a.text.split("=")[-1])]}'), bordercolor="red") if pred_label[int(a.text.split("=")[-1])] != label_batch[int(a.text.split("=")[-1])] else a.update(text=(f'{pred[int(a.text.split("=")[-1])]}'))
)
fig.update_layout(
    title_x=0.5,
    title_text=f"Loss: {round(test_acc[0], 3)}, Acc: {round(test_acc[1]*100)}%"
)
fig.show()

#####オリジナルデータ

In [None]:
CLASSES = ['Cat', 'Dog']

file_name = list(files.upload())[0]

img = tf.keras.preprocessing.image.load_img(
    file_name, target_size=(IMG_SIZE, IMG_SIZE)
)
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)

pred = model.predict(img_array)
pred = tf.nn.sigmoid(pred)

title = f'This is a {CLASSES[0] if pred[0] < 0.5 else CLASSES[1]} ({round(float(pred[0]),3)})'

fig = px.imshow(img)
fig.update_layout(
    title_x=0.5,
    title_text=title
)
fig.show()

os.remove(f'./{file_name}')

## メモ

###TFRecords読み込む

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
IMG_SIZE = 160
batch_size = 16 * strategy.num_replicas_in_sync

train_fns = os.listdir('/content/datasets/v1/tfrecords/train')
validation_fns = os.listdir('/content/datasets/v1/tfrecords/val')
        
def parse_tfrecord(example):
  features = {
    'height': tf.io.FixedLenFeature([], tf.int64),
    'width': tf.io.FixedLenFeature([], tf.int64),
    'depth': tf.io.FixedLenFeature([], tf.int64),
    'label': tf.io.FixedLenFeature([], tf.int64),
    'image_raw': tf.io.FixedLenFeature([], tf.string),
    "one_hot_class": tf.io.VarLenFeature(tf.float32),
  } 
  example = tf.io.parse_single_example(example, features)
  decoded = tf.image.decode_jpeg(example['image_raw'], channels=3)
  normalized = tf.cast(decoded, tf.float32) / 255.0 # convert each 0-255 value to floats in [0, 1] range
  image_tensor = tf.reshape(normalized, [*[IMG_SIZE, IMG_SIZE], 3])
  one_hot_class = tf.reshape(tf.sparse.to_dense(example['one_hot_class']), [2])
  return image_tensor, one_hot_class

def load_dataset(filenames):
  # Read from TFRecords. For optimal performance, we interleave reads from multiple files.
  records = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO)
  return records.map(parse_tfrecord, num_parallel_calls=AUTO)

def get_training_dataset():
  dataset = load_dataset(train_fns)

  # Create some additional training images by randomly flipping and
  # increasing/decreasing the saturation of images in the training set. 
  def data_augment(image, one_hot_class):
    modified = tf.image.random_flip_left_right(image)
    modified = tf.image.random_saturation(modified, 0, 2)
    return modified, one_hot_class
  augmented = dataset.map(data_augment, num_parallel_calls=AUTO)

  # Prefetch the next batch while training (autotune prefetch buffer size).
  return augmented.repeat().shuffle(2048).batch(batch_size).prefetch(AUTO) 

train_ds = get_training_dataset()
val_ds = load_dataset(validation_fns).batch(batch_size).prefetch(AUTO)

###TFRecords作成

In [None]:
os.makedirs('/content/datasets/v1/tfrecords/train', exist_ok=True)
os.makedirs('/content/datasets/v1/tfrecords/test', exist_ok=True)
os.makedirs('/content/datasets/v1/tfrecords/val', exist_ok=True)

CLASSES = ['cat', 'dog']

# 下記の関数を使うと値を tf.Example と互換性の有る型に変換できる
def _bytes_feature(value):
  """string / byte 型から byte_list を返す"""
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """float / double 型から float_list を返す"""
  return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def _int64_feature(value):
  """bool / enum / int / uint 型から Int64_list を返す"""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

# 関連する特徴量のディクショナリを作成
def image_example(image_string, label):
  image_shape = tf.image.decode_jpeg(image_string).shape
  one_hot_class = np.eye(len(CLASSES))[label]
  
  feature = {
      'height': _int64_feature(image_shape[0]),
      'width': _int64_feature(image_shape[1]),
      'depth': _int64_feature(image_shape[2]),
      'label': _int64_feature(label_train),
      'image_raw': _bytes_feature(image_string),
      "one_hot_class": _float_feature(one_hot_class.tolist())
  }

  return tf.train.Example(features=tf.train.Features(feature=feature))

# train
for img_train, label_train in zip(v1_all_imgs_train, v1_all_labels_train):
  file_name = os.path.splitext(os.path.basename(img_train))[0]
  with tf.io.TFRecordWriter(f'/content/datasets/v1/tfrecords/train/{file_name}.tfrec') as writer:
    image_string = open(img_train, 'rb').read()
    tf_example = image_example(image_string, label_train)
    writer.write(tf_example.SerializeToString())

# val
for img_val, label_val in zip(v1_all_imgs_val, v1_all_labels_val):
  file_name = os.path.splitext(os.path.basename(img_val))[0]
  with tf.io.TFRecordWriter(f'/content/datasets/v1/tfrecords/val/{file_name}.tfrec') as writer:
    image_string = open(img_val, 'rb').read()
    tf_example = image_example(image_string, label_val)
    writer.write(tf_example.SerializeToString())

# test
for img_test, label_test in zip(v1_all_imgs_test, v1_all_labels_test):
  file_name = os.path.splitext(os.path.basename(img_test))[0]
  with tf.io.TFRecordWriter(f'/content/datasets/v1/tfrecords/test/{file_name}.tfrec') as writer:
    image_string = open(img_test, 'rb').read()
    tf_example = image_example(image_string, label_test)
    writer.write(tf_example.SerializeToString())

###colabで35GBメモリを無料で利用する方法

1. ipynbファイルをダウンロード
2. ダウンロードしたファイルを編集


```
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Untitled2.ipynb",
      "provenance": [],
      "machine_shape": "hm" <---追加
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "TPU"
  },
  "cells": [
    {
      "cell_type": "code",
```
3. アップロードして開く
