In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

import random
import numpy as np
import os
import argparse

SEED = 1000
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

from dataloader import DataLoader

NUM_CONV_LAYERS = 4
SAVE_INTERVAL = 100
LOG_INTERVAL = 1
VAL_INTERVAL = 50
NUM_TRAIN_TASKS = 20
NUM_TEST_TASKS = 100
NUM_ITERATIONS = 1500

2022-05-19 14:25:36.201452: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
num_classes, n_support, n_query = 5, 5, 15
num_inner_steps = 5
_outer_lr = 0.001
_inner_lr = 0.4

In [3]:
with tf.device("/cpu:0"):
    train_data = DataLoader('train', num_classes, n_support, n_query)
    val_data = DataLoader('test', num_classes, n_support, n_query)

2022-05-19 14:25:38.743872: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-19 14:25:38.748973: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-19 14:25:38.757212: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-19 14:25:38.758530: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compi

	-Preprocessing train Omniglot dataset
	-Preprocessing test Omniglot dataset


In [4]:
dataset = train_data.generate_entire_dataset()

In [5]:
class ConvLayer(layers.Layer):
    def __init__(self, filters, kernel_size, padding: str = 'same'):
        super(ConvLayer, self).__init__()
        self.filters = filters
        self.kernel_size = kernel_size
        self.padding = padding

        self.conv = layers.Conv2D(
            filters=self.filters, kernel_size=self.kernel_size, strides=2, padding=self.padding)
        self.bn = layers.BatchNormalization()
        self.relu = layers.ReLU()

    def call(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

class ConvNet(keras.Model):
    def __init__(self, classes=964, shape=(28,28,1)):
        super(ConvNet, self).__init__()

        self.encoder = tf.keras.Sequential([
            layers.Input(shape=shape),
            ConvLayer(64, 3, 'same'),
            ConvLayer(64, 3, 'same'),
            ConvLayer(64, 3, 'same'),
            ConvLayer(64, 3, 'same'),
            layers.Flatten()
        ])

        self.classification = layers.Dense(classes, activation='softmax')

    def call(self, inputs):
        x = self.encoder(inputs)
        x = self.classification(x)
        return x

In [6]:
model = ConvNet()
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=_outer_lr), 
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=['Accuracy']
)

In [7]:
model.build((1,28,28,1))
model.summary()

Model: "conv_net"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 256)               112448    
                                                                 
 dense (Dense)               multiple                  247748    
                                                                 
Total params: 360,196
Trainable params: 359,684
Non-trainable params: 512
_________________________________________________________________


In [8]:
metrics = model.fit(dataset, epochs=1, verbose=0)
metrics.history['loss'][-1], metrics.history['Accuracy'][-1]

2022-05-19 14:25:55.974302: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8200


(5.228789329528809, 0.10129667818546295)

In [9]:
val_dataset = val_data.generate_task()

In [10]:
feature_extractor = model.encoder
feature_extractor.trainable = False

opt_fn = tf.keras.optimizers.SGD(learning_rate=_inner_lr)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
metrics_fn = tf.keras.metrics.SparseCategoricalAccuracy(name='Accuracy')

In [18]:
prediction_layer = layers.Dense(5, activation='softmax')
model = keras.Sequential([keras.Input(shape=(28, 28, 1)), feature_extractor, prediction_layer])
model.compile(
    optimizer=opt_fn, 
    loss=loss_fn,
    metrics=metrics_fn
)

In [19]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 256)               112448    
                                                                 
 dense_2 (Dense)             (None, 5)                 1285      
                                                                 
Total params: 113,733
Trainable params: 1,285
Non-trainable params: 112,448
_________________________________________________________________


In [20]:
task_batch = val_dataset
for task in task_batch:
    support, query = task
    break

In [21]:
history = model.fit(
    support, 
    epochs=num_inner_steps,
    validation_data=query
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [23]:
history.history

{'loss': [2.6747443675994873,
  3.3626832962036133,
  2.6089394092559814,
  6.547420501708984,
  3.7291290760040283],
 'Accuracy': [0.36000001430511475,
  0.7200000286102295,
  0.47999998927116394,
  0.7200000286102295,
  0.6800000071525574],
 'val_loss': [3.6072559356689453,
  2.9572553634643555,
  6.212284564971924,
  4.1100358963012695,
  2.8104536533355713],
 'val_Accuracy': [0.7333333492279053,
  0.4266666769981384,
  0.6933333277702332,
  0.6133333444595337,
  0.6266666650772095]}

In [15]:
logits = model.predict(query)



In [17]:
logits.shape

(75, 5)

In [18]:
acc = history.history['Accuracy']
val_acc = history.history['val_Accuracy']
val_loss = history.history['val_loss']

In [19]:
val_acc

[0.7733333110809326,
 0.9066666960716248,
 0.9066666960716248,
 0.9066666960716248,
 0.9200000166893005]

In [None]:
outer_loss_batch = []
accuracies_support_batch = []
accuracy_query_batch = []

In [None]:
model.fit(support, epochs=num_inner_steps)

In [None]:
model_layers = [layers.Input(shape=(28,28,1))]

for i in range(NUM_CONV_LAYERS):
    model_layers.append(
        layers.Conv2D(filters=64, kernel_size=3, strides=2, padding="same", name=f"Conv{i+1}")
    )
    model_layers.append(
        layers.BatchNormalization(name=f"BN{i+1}")
    )
    model_layers.append(
        layers.ReLU(name=f"ReLU{i+1}")
    )
model_layers.append(layers.Flatten())
model_layers.append(layers.Dense(num_classes, activation='softmax', name='Classification'))
model = keras.Sequential(model_layers)

In [None]:
val_batches = val_data.generate_task(NUM_TEST_TASKS)

In [None]:
train_task = train_data.generate_task(NUM_TRAIN_TASKS)

## Outer 0

In [None]:
_optimizer = keras.optimizers.Adam(learning_rate=_outer_lr)

theta = tf.nest.map_structure(lambda x: tf.Variable(tf.zeros_like(x)), model.trainable_weights)
tf.nest.map_structure(lambda x, y: x.assign(y), theta, model.trainable_weights)

task_batch = train_task
for task in task_batch:
    support, query = task
    break

In [None]:
fig, axes = plt.subplots(1, 5, figsize=(12, 10))
for i in range(5):
    imgs, label = support.take(1).get_single_element()
    axes[i].imshow(imgs[0].numpy().squeeze())
    print(label)

## Inner

In [None]:
def inner_run(theta, support_data):
    accuracies = []
    
    phi = tf.nest.map_structure(lambda x: tf.Variable(tf.zeros_like(x)), model.trainable_weights)
    tf.nest.map_structure(lambda x, y: x.assign(y), phi, theta)

    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
    opt_fn = tf.keras.optimizers.SGD(learning_rate=_inner_lr)
    metrics_fn = tf.keras.metrics.SparseCategoricalAccuracy(name='Inner Accuracy')
    for _ in range(num_inner_steps):
        for imgs, label in support_data:
            with tf.GradientTape() as tape:
                tape.watch(phi)
                logits = model(imgs, training=True)
                loss = loss_fn(label, logits)
            grads = tape.gradient(loss, model.trainable_weights)
            opt_fn.apply_gradients(zip(grads, phi))
            opt_fn.apply_gradients(zip(grads, model.trainable_weights))
            metrics_fn.update_state(label, logits)
            accuracies.append(metrics_fn.result().numpy())
    return phi, accuracies

In [None]:
phi, accuracies = inner_run(theta, support_data=support)

In [None]:
accuracies

## Outer 1

In [None]:
train = True

In [None]:
loss_fn = keras.losses.SparseCategoricalCrossentropy()
metrics_fn = tf.keras.metrics.SparseCategoricalAccuracy(name='Outer Accuracy')
all_grads = tf.nest.map_structure(lambda x: tf.Variable(tf.zeros_like(x)), model.trainable_weights)

outer_loss_batch = []
accuracies_support_batch = []
accuracy_query_batch = []

$$ \theta_{t+1} \coloneqq \theta_t - \beta \sum_{\tau_i \sim p(\tau)} \triangledown_\theta  L(\phi_i^L, D_i^{query})$$


$$\triangledown_\theta L(\phi_i^L, D_i^{query}) = \triangledown_{\phi_i^L} L(\phi_i^L, D_i^{query}) \cdot \prod_{k=1}^L (I - \alpha \triangledown_{\phi_i^{k-1}} (\triangledown_\theta L(\phi_i^{k-1}, D_i^{query}) ) ) $$

FO-MAML

$$\triangledown_\theta L(\phi_i^L, D_i^{query}) \approx \triangledown_{\phi_i^L} L(\phi_i^L, D_i^{query}) $$

In [None]:
# minibatch 
tf.nest.map_structure(lambda x, y: x.assign(y), model.trainable_weights, phi)
single_task_grads = tf.nest.map_structure(lambda x: tf.Variable(tf.zeros_like(x)), model.trainable_weights)
B = len(query)
query_loss = 0
metrics_fn.reset_states()
for imgs, label in (query):
    with tf.GradientTape(persistent=True) as tape:
        logits = model(imgs, training=train)
        loss = loss_fn(label, logits)

    query_loss += loss
    metrics_fn.update_state(label, logits)

    grads = tape.gradient(loss, model.trainable_weights)
    single_task_grads = tf.nest.map_structure(lambda x, y: x + y, single_task_grads, grads)

single_task_grads = [x/B for x in single_task_grads]

In [None]:
len(query)

In [None]:
task_grads

In [None]:
phi[-1]

In [None]:
tape.gradient(loss, phi)

In [None]:
tape.gradient(loss, model.trainable_weights)

In [None]:
# in the batch loop
metrics_fn.reset_states()
task_loss = 0
num_batch_data = 0
task_grads = tf.nest.map_structure(lambda x: tf.Variable(tf.zeros_like(x)), model.trainable_weights)

for imgs, label in query:
    batch_size = len(label)
    with tf.GradientTape() as tape:
        logits = model(imgs, training=train)
        loss = loss_fn(label, logits)
    grads = tape.gradient(loss, model.trainable_weights)
    task_grads = tf.nest.map_structure(lambda x, y: x + y, task_grads, grads)
    task_loss += loss
    num_batch_data += batch_size
    metrics_fn.update_state(label, logits)

task_grads = [x / num_batch_data for x in task_grads]
accuracies_support_batch.append(accuracies)
accuracy_query_batch.append(metrics_fn.result().numpy())
outer_loss_batch.append(task_loss)

In [None]:
loss_fn = keras.losses.SparseCategoricalCrossentropy()
all_grads = tf.nest.map_structure(lambda x: tf.Variable(tf.zeros_like(x)), self.model.trainable_weights)

for task in task_batch:
    support, query = task
    # support
    phi, accuracies = self._inner_loop(theta, support_data=support)
    # query
    metrics_fn.reset_states()
    query_loss = 0
    num_batch_data = 0
    task_grads = tf.nest.map_structure(lambda x: tf.Variable(tf.zeros_like(x)), self.model.trainable_weights)

    for imgs, label in query:
        batch_size = len(label)
        with tf.GradientTape() as tape:
            logits = self.model(imgs, training=train)
            loss = loss_fn(label, logits)
        grads = tape.gradient(loss, self.model.trainable_weights)
        task_grads = tf.nest.map_structure(lambda x, y: x + y, task_grads, grads)
        query_loss += loss
        num_batch_data += batch_size
        metrics_fn.update_state(label, logits)

    task_grads = [x / num_batch_data for x in task_grads]
    all_grads = tf.nest.map_structure(lambda x, y: x + y, task_grads, all_grads)
    accuracies_support_batch.append(accuracies)
    accuracy_query_batch.append(metrics_fn.result().numpy())
    outer_loss_batch.append(query_loss)

self._optimizer.apply_gradients(zip(all_grads, theta))

In [None]:
all_grads = tf.nest.map_structure(lambda x, y: x + y, task_grads, all_grads)

In [None]:
_optimizer.apply_gradients(zip(all_grads, theta))

In [None]:
dataset = train_data.generate_entire_dataset()