In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
!nvidia-smi

Wed May  8 14:25:56 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.40.04    Driver Version: 418.40.04    CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00005E6A:00:00.0 Off |                    0 |
| N/A   74C    P0   133W / 149W |   6265MiB / 11441MiB |     83%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [4]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'

In [5]:
import tensorflow as tf
config = tf.ConfigProto(intra_op_parallelism_threads=0, 
                        inter_op_parallelism_threads=0,
                        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.15),
                        allow_soft_placement=True)
session = tf.Session(config=config)
tf.keras.backend.set_session(session)

In [7]:
import random
import numpy as np

seed = 11

np.random.seed(seed)
random.seed(seed)
tf.random.set_random_seed(seed)

In [8]:
from fewshot.data_provider.dataset import Dataset
from fewshot.backbones import ConvNet

from fewshot.algorithms.backbone_train import (
    simple_one_layer_cross_entropy_train,
    simple_cosine_layer_cross_entropy_train
)
from fewshot.algorithms.fewshot_test import baseline_fewshot_test
from fewshot.algorithms.fewshot_models import BaselineFewShotModel

import tqdm
from scipy import stats

## Initialize

Data parameters:

In [9]:
img_width = 84
img_height = 84
img_depth = 3

In [10]:
dataset_root = "../../../data/"
dataset_name = 'mini_imagenet'
csv_name = "data.csv"

base_num_classes = 64
novel_num_classes = 20
val_num_classes = 16  # not used in baseline algo

Backbone training parameters:

In [11]:
# backbone training
backbone_training_batch_size = 16
backbone_training_epochs = 21

backbone_generator_args={
    "flip_prob": 0.5,
    "color_jitter_prob": 1.0,
    "hue_range": (0.6, 1.4),
    "saturation_range": (0.6, 1.4),
    "value_range": (0.6, 1.4),
    "contrast_range": (0.6, 1.4),
    "crop_scale": (0.08, 1.0),
    "crop_ratio": (0.75, 1.3333333333333333),
    "crop_size": (img_width, img_height)  # think about order (w, h) or (h, w)
}

Few-shot training parameters:

In [22]:
# fewshot training and testing
n_way = 5
k_shot = 5
fewshot_batch_size = 4
support_epochs = 100
query_size = 16

support_generator_args={
    "size": (int(img_width * 1.15), int(img_height * 1.15)),
    "center": True,  # for center cropping,
    "crop_size": (img_width, img_height),
}

query_generator_args={
    "size": (int(img_width * 1.15), int(img_height * 1.15)),
    "center": True,  # for center cropping,
    "crop_size": (img_width, img_height),
}

n_episodes = 50

In [13]:
checkpoint_dir = '../../../data/few-models/mini_imagenet3/checkpoints'
log_dir = '../../../data/few-models/mini_imagenet3/logs'

## Data

Create dataset:

In [14]:
dataset_dir = os.path.join(dataset_root, dataset_name)
dataset = Dataset(dataset_dir=dataset_dir, 
                  csv_name=csv_name, 
                  image_size=(img_width, img_height))

Create backbone dataset:

In [15]:
backbone_dataset, val_fewshot_dataset = dataset.split_by_classes(train_size=base_num_classes,
                                                                 random_state=seed)

Split by classes with train size = 64 (seed = 11)
Train classes: 64
Test classes: 36
Train data: 38400 samples
Test data:  21600 samples




Create few-shot dataset:

In [16]:
val_dataset, fewshot_dataset = val_fewshot_dataset.split_by_classes(train_size=val_num_classes,
                                                                    random_state=seed)

Split by classes with train size = 16 (seed = 11)
Train classes: 16
Test classes: 20
Train data: 9600 samples
Test data:  12000 samples


## Backbone

In [17]:
model_name = 'baseline'

Create model:

In [18]:
backbone = ConvNet(input_size=(img_width, img_height, img_depth))

Instructions for updating:
Colocations handled automatically by placer.


Train model:

In [19]:
backbone.set_trainable(True)

In [20]:
backbone_optimizer = tf.keras.optimizers.Adam(lr=1e-3)

In [21]:
simple_one_layer_cross_entropy_train(
    backbone,
    backbone_dataset.get_batch_generator(batch_size=backbone_training_batch_size,
                                         shuffle=True,
                                         generator_args=backbone_generator_args),
    epochs=backbone_training_epochs,
    optimizer=backbone_optimizer,
    model_name=model_name,
    checkpoint_dir=checkpoint_dir,
    period=10,
    tensorboard=True,
    log_dir=log_dir,
    use_multiprocessing=True,
    workers=4,
    verbose=2,
    resume="{}/baseline.80.hdf5".format(checkpoint_dir)
)

Epoch 1/21
 - 129s - loss: 2.3377 - acc: 0.3917
Epoch 2/21
 - 129s - loss: 2.3469 - acc: 0.3898
Epoch 3/21
Epoch 3/21
 - 129s - loss: 2.3362 - acc: 0.3916
Epoch 4/21
 - 128s - loss: 2.3328 - acc: 0.3952
Epoch 5/21
 - 128s - loss: 2.3263 - acc: 0.3917
Epoch 6/21
 - 129s - loss: 2.3266 - acc: 0.3953
Epoch 7/21
 - 129s - loss: 2.3184 - acc: 0.3959
Epoch 8/21
 - 129s - loss: 2.3179 - acc: 0.3933
Epoch 9/21
 - 128s - loss: 2.3167 - acc: 0.3935
Epoch 10/21
 - 129s - loss: 2.3021 - acc: 0.3967
Epoch 11/21
 - 129s - loss: 2.3105 - acc: 0.3980
Epoch 12/21
 - 129s - loss: 2.3083 - acc: 0.3996
Epoch 13/21

 - 129s - loss: 2.3136 - acc: 0.3990
Epoch 14/21
 - 102s - loss: 2.3139 - acc: 0.3950
Epoch 15/21
 - 95s - loss: 2.3077 - acc: 0.3976
Epoch 15/21
Epoch 16/21
 - 95s - loss: 2.2954 - acc: 0.3991
Epoch 17/21Epoch 16/21

 - 95s - loss: 2.3041 - acc: 0.3976

Epoch 18/21
 - 99s - loss: 2.2959 - acc: 0.3983
Epoch 19/21
Epoch 18/21
 - 98s - loss: 2.3030 - acc: 0.3995
Epoch 20/21
 - 110s - loss: 2.3014

<fewshot.backbones.convnet.ConvNet at 0x7f0ae896b3c8>

## Few-shot

In [23]:
episode_generator = fewshot_dataset.get_fewshot_generator(n_way=n_way, 
                                                          k_shot=k_shot, 
                                                          query_size=query_size,
                                                          support_generator_args=support_generator_args,
                                                          query_generator_args=query_generator_args)

Train few-shot model:

In [24]:
fewshot_optimizer = tf.keras.optimizers.Adam(lr=1e-3) 

In [25]:
fewshot_model = BaselineFewShotModel(backbone, n_way)
accuracies = baseline_fewshot_test(model=fewshot_model,
                                   generator=episode_generator, 
                                   optimizer=fewshot_optimizer,
                                   batch_size=fewshot_batch_size,
                                   support_epochs=support_epochs,
                                   n_episodes=n_episodes,
                                   model_name='baseline-fewshot',
                                   tensorboard=True,
                                   log_dir=log_dir,
                                   period=10)

  0%|          | 0/50 [00:00<?, ?it/s]

Instructions for updating:
Use tf.cast instead.


Average acc: 54.65%: 100%|██████████| 50/50 [08:55<00:00, 13.37s/it]


## Report

Calculate 95% confidence interval:

In [27]:
mean_accuracy = np.mean(accuracies)
confidence_interval = stats.t.interval(0.95, len(accuracies) - 1, loc=mean_accuracy, scale=stats.sem(accuracies))

print("{}-way {}-shot metric: {}% +- {}%".format(
    n_way, k_shot,
    round(mean_accuracy * 100, 2),
    round((confidence_interval[1] - confidence_interval[0]) / 2 * 100, 2))
)

5-way 5-shot metric: 54.65% +- 2.67%
