In [9]:
from loaders import *
from collections import Counter
from random import random
from torch import nn
from torch.autograd import Variable

import numpy as np
import torch
import torch.nn.functional as F
import json
import numpy as np
import matplotlib.pyplot as plt
import random

In [24]:
class EncoderLSTM(nn.Module):
    # Your code goes here
    def __init__(self, input_size, hidden_size):
        super(EncoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size).double()
        if use_cuda:
            self.lstm = self.lstm.cuda()
        
    def forward(self, input, hidden_in):
        _, hidden_out = self.lstm(input, hidden_in) # encoder only outputs hidden
        return hidden_out
    
    def initHidden(self, hidden):
        
        if hidden == None:
            result = Variable(torch.zeros(1, 1, self.hidden_size)).double()
            
            if use_cuda:
                result = result.cuda()
            return result
        
        else:
            return hidden

In [8]:

class DecoderLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(DecoderLSTM, self).__init__()
        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size, hidden_size).double()
        self.out = nn.Linear(hidden_size, output_size).double()
        self.project = nn.Linear(4096, self.hidden_size).double()
        if use_cuda:
            self.lstm = self.lstm.cuda()
            self.out = self.out.cuda()
            self.project = self.project.cuda()

    def forward(self, input, hidden):
        output = F.relu(input)
        output, hidden = self.lstm(output, hidden)
        output = self.out(output)
        output = output.squeeze()
        return output.unsqueeze(0), hidden

    def initHidden(self):
        result = Variable(torch.zeros(1, 1, self.hidden_size)).double()
        if use_cuda:
            return result.cuda()
        else:
            return result

In [None]:
class MetaLearner(nn.Module):
    
    def __init__(self,
                 input_size,
                 hidden_size):
        super(MetaLearner,self).__init__()
        
    

In [None]:
class Learner(nn.Module):
    
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 criterion,
                 learning_rate,
                 embeddings=one_hot_embeddings):
        
        super(Learner,self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.encoder = EncoderLSTM(input_size, hidden_size)
        self.decoder = DecoderLSTM(input_size, hidden_size, output_size)
        self.encoder_optimizer = torch.optim.Adam(self.encoder.parameters(), lr=learning_rate)
        self.decoder_optimizer = torch.optim.Adam(self.decoder.parameters(), lr=learning_rate)
        
        self.embeddings = embeddings
        self.criterion = criterion
        
    
    def forward(self, sequence, hidden):
        
        output = []
        sequence_length = sequence.size()[1]
        loss = 0
        
        encoder_hidden = encoder.initHidden(hidden)
        encoder_hidden = (encoder_hidden, encoder_hidden) # Need a tuple

        # Encoder is fed the flipped control sequence
        for index_control in np.arange(sequence_length-1, 0, -1):
            encoder_input = sequence[0][index_control].view(1, 1, vocabulary_size)
            encoder_hidden = encoder(encoder_input, encoder_hidden) # Gets hidden for next input  
        
        # feed encoder_hidden
        decoder_input = sequence[0][1] # One after SOS
        decoder_hidden = encoder_hidden
        predicted_note_index = 0

        for index_control in range(2, sequence_length):
            decoder_input = decoder_input.view(1, 1, vocabulary_size)
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)

            topv, topi = decoder_output.data.topk(1)
            predicted_control_index = int(topi)
            
            output.append(predicted_control_index)

            if random.random() <= 0.9:
                decoder_input = sequence[0][index_control].view(1, 1, vocabulary_size)
            else:
                # This is the next input, without teacher forcing it's the predicted output
                decoder_input = torch.from_numpy(self.embeddings[predicted_control_index])
                decoder_input = Variable(decoder_input)
                if use_cuda:
                    decoder_input = decoder_input.cuda()
                    
            # CrossEntropyLoss takes input1: (N, C) and input2: (N).
            _, actual_control_index = sequence[0][index_control].topk(1)
            if use_cuda:
                actual_control_index = actual_control_index.cuda()
            loss += self.criterion(decoder_output, actual_control_index)
            
        return output, loss
    
    def train(self, sequence, hidden):
        self.encoder_optimizer.zero_grad()
        self.decoder_optimizer.zero_grad()
        
        output, loss = self.forward(sequence, hidden)
        
        loss.backward()
        self.encoder_optimizer.step()
        self.decoder_optimizer.step()
        return loss

In [36]:
use_cuda = False
encoder1 = EncoderLSTM(5,5)
encoder2 = EncoderLSTM(5,5)

In [37]:
encoder1.lstm.weight_hh_l0

Parameter containing:
 0.2610  0.4331  0.2729 -0.1771  0.0005
 0.0057  0.1114 -0.1566  0.0569  0.3910
-0.2347 -0.2272  0.3031 -0.3302  0.2815
-0.3309  0.1743 -0.3510 -0.0948  0.0784
 0.2381 -0.2602 -0.4434 -0.3815  0.0747
-0.0825  0.2122 -0.3795 -0.4332 -0.3532
 0.1605 -0.3901  0.2161  0.2075 -0.0334
-0.0916  0.3068  0.2008 -0.2449  0.3019
 0.4311 -0.0324  0.1797 -0.2204 -0.3622
 0.1890  0.2445 -0.2052 -0.0807  0.1944
-0.4039  0.2079 -0.3889  0.0714 -0.3463
-0.0469  0.4437 -0.2979 -0.0398 -0.2388
-0.0332 -0.1388  0.3420 -0.3059 -0.2158
 0.4398  0.2785  0.4244 -0.4295 -0.3180
-0.3789 -0.1014 -0.2710  0.3379 -0.1847
 0.2796  0.0084  0.4023  0.3928  0.2777
 0.2643 -0.3386 -0.4349  0.2461  0.2022
 0.4121  0.0849  0.3388  0.4083  0.3810
-0.3487  0.0788 -0.4308 -0.0355 -0.0311
 0.4008  0.1055 -0.3978  0.0637  0.2238
[torch.DoubleTensor of size 20x5]

In [38]:
encoder2.lstm.weight_hh_l0

Parameter containing:
-0.2651  0.1933  0.4374  0.3999  0.3224
-0.1832  0.4113  0.3490  0.1361 -0.2020
 0.1873  0.3269 -0.0275  0.3359  0.1357
-0.3396  0.1453  0.2452  0.2933 -0.3389
-0.0287  0.3502  0.2520  0.0321 -0.3406
 0.3427  0.1971 -0.3090  0.4287 -0.0327
-0.2465 -0.1550  0.2877  0.1485 -0.1522
 0.1656 -0.1220 -0.4063  0.2780 -0.0665
 0.0831 -0.3152  0.3622 -0.0480  0.3943
 0.4343  0.2587 -0.3178  0.2025 -0.3326
-0.3127  0.3550  0.3487  0.1612  0.0728
 0.0390  0.2703 -0.3676 -0.1714  0.3557
-0.3950  0.2204  0.0851  0.2939 -0.2697
 0.0419 -0.4395  0.4177  0.1232 -0.1082
-0.3512 -0.0014 -0.0535  0.3175  0.2017
 0.4034  0.0054  0.1865 -0.2414 -0.3439
 0.1934 -0.1225  0.3401 -0.1114  0.3615
 0.3848  0.1728 -0.4038 -0.2881  0.1532
-0.0879  0.4038  0.1651  0.1497  0.0924
 0.0973 -0.4003  0.3037  0.3747  0.2102
[torch.DoubleTensor of size 20x5]

In [39]:
encoder1.load_state_dict(encoder2.state_dict())
encoder1.lstm.weight_hh_l0

Parameter containing:
-0.2651  0.1933  0.4374  0.3999  0.3224
-0.1832  0.4113  0.3490  0.1361 -0.2020
 0.1873  0.3269 -0.0275  0.3359  0.1357
-0.3396  0.1453  0.2452  0.2933 -0.3389
-0.0287  0.3502  0.2520  0.0321 -0.3406
 0.3427  0.1971 -0.3090  0.4287 -0.0327
-0.2465 -0.1550  0.2877  0.1485 -0.1522
 0.1656 -0.1220 -0.4063  0.2780 -0.0665
 0.0831 -0.3152  0.3622 -0.0480  0.3943
 0.4343  0.2587 -0.3178  0.2025 -0.3326
-0.3127  0.3550  0.3487  0.1612  0.0728
 0.0390  0.2703 -0.3676 -0.1714  0.3557
-0.3950  0.2204  0.0851  0.2939 -0.2697
 0.0419 -0.4395  0.4177  0.1232 -0.1082
-0.3512 -0.0014 -0.0535  0.3175  0.2017
 0.4034  0.0054  0.1865 -0.2414 -0.3439
 0.1934 -0.1225  0.3401 -0.1114  0.3615
 0.3848  0.1728 -0.4038 -0.2881  0.1532
-0.0879  0.4038  0.1651  0.1497  0.0924
 0.0973 -0.4003  0.3037  0.3747  0.2102
[torch.DoubleTensor of size 20x5]

In [26]:
for module in encoder.modules():
     print(module.weight.data)

AttributeError: 'EncoderLSTM' object has no attribute 'weight'

Things that we need in the learner module
+ The init function should take as argument a network class and then create two networks, the theta network and theta' network and define the optimizer
+ A function to copy parameters from the theta function to the theta' function
+ A forward function that updates the theta' paramters mutiple times based on the examples from the support set and use loss from the meta test set to update the theta paramets
+ 