# MODEL-BASED META-LEARNING USING 

# Conditional Neural Processes

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
%cd drive/MyDrive/'Colab Notebooks/MetaLearning'
!ls

[Errno 2] No such file or directory: 'drive/MyDrive/Colab Notebooks/MetaLearning'
/content/drive/MyDrive/Colab Notebooks/MetaLearning
l2lutils.ipynb	models.ipynb  nb1.ipynb  nb2-CNP.ipynb	nb3.ipynb  utils.ipynb


In [3]:
!pip install import_ipynb --quiet
!pip install learn2learn --quiet

  Building wheel for import-ipynb (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 1.4MB 17.2MB/s 
[K     |████████████████████████████████| 2.9MB 38.1MB/s 
[K     |████████████████████████████████| 174kB 40.8MB/s 
[K     |████████████████████████████████| 102kB 9.9MB/s 
[K     |████████████████████████████████| 61kB 6.8MB/s 
[K     |████████████████████████████████| 61kB 6.9MB/s 
[K     |████████████████████████████████| 1.4MB 32.4MB/s 
[K     |████████████████████████████████| 112kB 37.2MB/s 
[K     |████████████████████████████████| 3.2MB 39.8MB/s 
[?25h  Building wheel for learn2learn (setup.py) ... [?25l[?25hdone
  Building wheel for gsutil (setup.py) ... [?25l[?25hdone
  Building wheel for gcs-oauth2-boto-plugin (setup.py) ... [?25l[?25hdone
  Building wheel for google-apitools (setup.py) ... [?25l[?25hdone
  Building wheel for retry-decorator (setup.py) ... [?25l[?25hdone
  Building wheel for pyu2f (setup.py) ... [?25l[?25hdone


In [10]:
import import_ipynb
import utils
import models
utils.hide_toggle('Imports 1')

In [11]:
from IPython import display
import torch
from sklearn.manifold import TSNE
from matplotlib import pyplot as plt
# from l2lutils import KShotLoader
from IPython import display
import torch.nn as nn
utils.hide_toggle('Imports 2')

l2lutils

In [8]:
import torch
import numpy as np
import learn2learn as l2l
from learn2learn.data import *
import import_ipynb
import utils
class KShotLoader():
    def __init__(self,myds,num_tasks=1000,shots=2,ways=2,classes=None):
        self.shots = shots
        self.ways = ways
        self.myMds = l2l.data.MetaDataset(myds)
        if classes == None:
            n_classes = len(set(myds.labels))
            classes = [i for i in range(n_classes)]
        self.my_tasks = l2l.data.TaskDataset(self.myMds, task_transforms=[
                                l2l.data.transforms.FilterLabels(self.myMds,classes),
                                l2l.data.transforms.NWays(self.myMds,ways),
                                l2l.data.transforms.KShots(self.myMds,2*shots),
                                l2l.data.transforms.LoadData(self.myMds),
                                l2l.data.transforms.RemapLabels(self.myMds),
                                l2l.data.transforms.ConsecutiveLabels(self.myMds)
                                ],num_tasks=num_tasks)
    def get_task(self):
        data,labels = self.my_tasks.sample()
        adaptation_indices = np.zeros(data.size(0), dtype=bool)
        adaptation_indices[np.arange(self.shots*self.ways) * 2] = True
        evaluation_indices = torch.from_numpy(~adaptation_indices)
        adaptation_indices = torch.from_numpy(adaptation_indices)
        adaptation_data, adaptation_labels = data[adaptation_indices], labels[adaptation_indices]
        evaluation_data, evaluation_labels = data[evaluation_indices], labels[evaluation_indices]
        d_train = (adaptation_data,adaptation_labels)
        d_test = (evaluation_data,evaluation_labels)
        return d_train, d_test

# Data Generation/Loading

In [12]:
#Generate data - euclidean
meta_train_ds, meta_test_ds, full_loader = utils.euclideanDataset(n_samples=10000,n_features=20,n_classes=10,batch_size=32)

In [13]:
# Define an MLP network. Note that input dimension has to be data dimension. For classification
# final dimension has to be number of classes; for regression one.
#torch.manual_seed(10)
net = models.MLP(dims=[20,64,10])

In [14]:
# Train the network; note that network is trained in place so repeated calls further train it.
net,losses,accs=models.Train(net,full_loader,lr=1e-2,epochs=50,verbose=True)

Epoch   49 Loss: 1.93893e-02 Accuracy: 0.99641


In [15]:
#Training accuracy.
models.accuracy(net,meta_train_ds.samples,meta_train_ds.labels,verbose=True)

7475.0 7500


0.9966666666666667

In [None]:
# Test accuracy.
models.accuracy(net,meta_test_ds.samples,meta_test_ds.labels)

# Meta-Learning: Tasks

Generate a k-shot n-way loader using the meta-training dataset

In [16]:
meta_train_kloader=KShotLoader(meta_train_ds,shots=5,ways=2)

Sample a task - each task has a k-shot n-way training set and a similar test set

In [17]:
d_train,d_test=meta_train_kloader.get_task()

Let's try directly learning using the task training set albeit its small size: create a dataset and loader and train it with the earlier network and Train function.

In [None]:
taskds = utils.MyDS(d_train[0],d_train[1])

In [None]:
d_train_loader = torch.utils.data.DataLoader(dataset=taskds,batch_size=1,shuffle=True)

In [None]:
net,losses,accs=models.Train(net,d_train_loader,lr=1e-1,epochs=10,verbose=True)

How does it do on the test set of the sampled task?

In [None]:
models.accuracy(net,d_test[0],d_test[1])

# CNP-based  Meta-learning

In [18]:
# optimisers from torch
import torch.optim as optim
import torch.nn.functional as F

In [19]:
lossfn = torch.nn.NLLLoss()

Conditional Neural Process Network

In [21]:
[1]+[1,2]

[1, 1, 2]

In [22]:
class CNP(nn.Module):
    def __init__(self,n_features=1,dims=[32,32],n_classes=2,lr=1e-4):
        super(CNP,self).__init__()
        self.n_features = n_features
        self.n_classes = n_classes
        dimL1 = [n_features]+dims
        dimL2=[n_features+n_classes*dims[-1]]+dims+[n_classes]
        self.mlp1 = models.MLP(dims=dimL1,task='embedding')
        self.mlp2 = models.MLP(dims=dimL2)
        self.optimizer=torch.optim.Adam(self.parameters(),lr=lr)
    def adapt(self,X,y):
        R = self.mlp1(X)
        m = torch.eye(self.n_classes)[y].transpose(0,1)/self.n_classes
        r = (m@R).flatten().unsqueeze(0)
        #r = (R.sum(dim=0)/X.shape[0]).unsqueeze(0)
        return r
    def forward(self,Y,r):
        rr = r.repeat(Y.shape[0],1)
        p = self.mlp2(torch.cat((Y,rr),dim=1))
        return p
utils.hide_toggle('Class CNP')

Get a task dataset.

In [23]:
meta_train_kloader=KShotLoader(meta_train_ds,shots=5,ways=2,num_tasks=1000)

In [24]:
d_train,d_test = meta_train_kloader.get_task()

In [25]:
net = CNP(n_features=20,dims=[32,64,32])

In [26]:
print(net.mlp1,net.mlp2)

MLP(
  (layers): ModuleList(
    (0): Linear(in_features=20, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=32, bias=True)
  )
) MLP(
  (layers): ModuleList(
    (0): Linear(in_features=84, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): ReLU()
    (6): Linear(in_features=32, out_features=2, bias=True)
    (7): LogSoftmax(dim=1)
  )
)


In [27]:
r = net.adapt(d_train[0],d_train[1])
r.shape

torch.Size([1, 64])

In [28]:
net(d_train[0],r)

tensor([[-0.6715, -0.7152],
        [-0.6989, -0.6874],
        [-0.6138, -0.7794],
        [-0.6765, -0.7101],
        [-0.6534, -0.7345],
        [-0.6736, -0.7131],
        [-0.6311, -0.7593],
        [-0.6537, -0.7342],
        [-0.6740, -0.7127],
        [-0.6844, -0.7020]], grad_fn=<LogSoftmaxBackward>)

# Putting it all together: CNP-based Meta-learning
Now let's put it together in a loop - CNP model-based meta-learning algorithm:

In [29]:
# Redifning accuracy function so that it takes h - dataset context - as input since net requires it.
def accuracy(Net,X_test,y_test,h,verbose=True):
    #Net.eval()
    m = X_test.shape[0]
    y_pred = Net(X_test,h)
    _, predicted = torch.max(y_pred, 1)
    correct = (predicted == y_test).float().sum().item()
    if verbose: print(correct,m)
    accuracy = correct/m
    #Net.train()
    return accuracy

In [30]:
classes_train = [i for i in range(5)]
classes_test = [i+5 for i in range(5)]
classes_train, classes_test

([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])

In [31]:
import learn2learn as l2l
import torch.optim as optim
shots,ways = 5,2
net = CNP(n_features=20,n_classes=ways,dims=[32,64,32],lr=1e-4)
lossfn = torch.nn.NLLLoss()
meta_train_kloader=KShotLoader(meta_train_ds,shots=shots,ways=ways,num_tasks=1000,classes=classes_train)

In [32]:
#Meta-testing task loader for later.
meta_test_kloader=KShotLoader(meta_test_ds,shots=shots,ways=ways,classes=classes_test)

In [34]:
epoch=0
n_epochs=100
task_count=100
while epoch<n_epochs:
    # OUTER LOOP - GRADIENT DESCENT OVER SUM OF LOSSES ON DTEST
    test_loss = 0.0
    test_acc = 0.0
    # Sample and train on a task
    for task in range(task_count):
        # INNER LOOP - NO GRADIENT DESCENT
        d_train,d_test=meta_train_kloader.get_task()
        rp = torch.randperm(d_train[1].shape[0])
        d_train0=d_train[0][rp]
        d_train1=d_train[1][rp]
        x_tr = d_train0
        d_tr = x_tr 
        h = net.adapt(d_tr,d_train1)
        rp1 = torch.randperm(d_test[1].shape[0])
        d_test0=d_test[0][rp1]
        d_test1=d_test[1][rp1]
        x_ts = d_test0
        # y_ts_sh = torch.zeros(x_ts.shape[0],ways)
        d_ts = x_ts 
        test_preds = net(d_ts,h)
        train_preds = net(d_tr,h)
        # Accumulate losses over tasks - note train and test loss both included
        test_loss += lossfn(test_preds,d_test1)+lossfn(train_preds,d_train1)
        net.eval()
        test_acc += accuracy(net,d_ts,d_test1,h,verbose=False)
        net.train()
    #Update the network weights
    print('Epoch  % 2d Loss: %2.5e Avg Acc: %2.5f'%(epoch,test_loss/task_count,test_acc/task_count))
    display.clear_output(wait=True)
    net.optimizer.zero_grad()
    test_loss.backward()
    net.optimizer.step()
    epoch+=1
    

Epoch   99 Loss: 8.89653e-01 Avg Acc: 0.85900


Now test the trained CNP network and to tasks sampled from the meta_test_ds dataset:

In [35]:
meta_test_kloader=KShotLoader(meta_test_ds,shots=shots,ways=ways)
test_acc = 0.0
task_count = 50
adapt_steps = 1
# Sample and train on a task
for task in range(task_count):
    d_train,d_test=meta_test_kloader.get_task()
    x_tr = d_train[0]
    y_tr_sh = torch.cat((torch.zeros(1,ways),torch.eye(ways)[d_train[1][1:]]))
    d_tr = x_tr #torch.cat((x_tr,y_tr_sh),1)
    h=net.adapt(d_tr,d_train[1])
    x_ts = d_test[0]
    y_ts_sh = torch.zeros(x_ts.shape[0],ways)
    d_ts = x_ts #torch.cat((x_ts,y_ts_sh),1)
    test_preds = net(d_ts,h)
    test_acc += accuracy(net,d_ts,d_test[1],h,verbose=False)
    # Done with a task
net.train()
print('Avg Acc: %2.5f'%(test_acc/task_count))

Avg Acc: 0.76600
