In [1]:
import tensorflow as tf
import os
import numpy as np
from utils import *
import datetime
from PIL import Image

In [2]:
!export CUDA_VISIBLE_DEVICES=0,1

In [2]:
mirrored_strategy = tf.distribute.MirroredStrategy()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


In [3]:
batch_size = 16
dropout = 0.7
ls = 0.1
lr = 1e-4
epochs = 1000

In [5]:
with mirrored_strategy.scope():
    ROOT_DIR = get_git_root(os.getcwd())
    train_path = os.path.join(ROOT_DIR, 'veri-wild', 'images', 'train')
    val_path = os.path.join(ROOT_DIR, 'veri-wild', 'images', 'val')
    train_gen = tf.keras.preprocessing.image.ImageDataGenerator(validation_split=0.99)
    val_gen = tf.keras.preprocessing.image.ImageDataGenerator(validation_split=0.95)
    
    train_gen = train_gen.flow_from_directory(train_path, target_size=(299,299),
                                              batch_size=batch_size,
                                              subset='training')
    val_gen = val_gen.flow_from_directory(val_path, target_size=(299,299),
                                            batch_size=batch_size,
                                            subset='training')
    
    input_layer = tf.keras.layers.Input(shape=train_gen.next()[0][0].shape)
    model = tf.keras.applications.EfficientNetB7(include_top=False,
                                                 input_tensor=input_layer)
    for each_layer in model.layers:
        each_layer.trainable = False
    
    output = model(input_layer)
    output = AveragePooling2D((5,5), name='avg_pool')(output)
    output = Dropout(dropout)(output)
    output = Flatten(name='Flatten_1')(output)
    output = Dense(1024, activation='relu', name='Dense_preoutput')(output)
    output = Dropout(dropout)(output)
    output = Dense(30671, activation='softmax', name='Dense_output')(output)

    model = tf.keras.models.Model(input_layer, output)
    
    loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=ls)
    metrics = tf.keras.metrics.CategoricalAccuracy()
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    
    print('Model compiled')    

    model.fit(train_gen,
              epochs=epochs)

Found 30680 images belonging to 30671 classes.
Found 11475 images belonging to 10001 classes.
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:C

KeyboardInterrupt: 

In [18]:
model.save_weights(os.path.join(ROOT_DIR, 'trainings', 'models', 'tmp'))

In [6]:
with mirrored_strategy.scope():
    input_layer = tf.keras.layers.Input(shape=train_gen.next()[0][0].shape)
    model2 = tf.keras.applications.EfficientNetB7(include_top=False,
                                                 input_tensor=input_layer)
    for each_layer in model2.layers:
        each_layer.trainable = False

    output = model2(input_layer)
    output = AveragePooling2D((5,5), name='avg_pool')(output)
    output = Dropout(dropout)(output)
    output = Flatten(name='Flatten_1')(output)
    output = Dense(1024, activation='relu', name='Dense_preoutput')(output)
    output = Dropout(dropout)(output)
    output = Dense(30671, activation='softmax', name='Dense_output')(output)

    model2 = tf.keras.models.Model(input_layer, output)

    model2.load_weights(os.path.join(ROOT_DIR, 'trainings', 'models', 'tmp'))
    model2.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [None]:
with mirrored_strategy.scope():
    model2.fit(train_gen,
              epochs=epochs)

Epoch 1/1000
INFO:tensorflow:batch_all_reduce: 4 all-reduces with algorithm = nccl, num_packs = 1
INFO:tensorflow:batch_all_reduce: 4 all-reduces with algorithm = nccl, num_packs = 1