In [1]:
import torch
import torch.nn as nn
from pp import fix, get_char_idx, get_char_idx_flat, get_loaders

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
train_loader, dev_loader, test_loader, train_flat_loader, dev_flat_loader, test_flat_loader, char_to_idk, idx_to_char, train_tensor = get_loaders('/scratch/sanika/content/sample_data/fulldataset_dedup_final.txt')

In [54]:
# make the character embedding and convolutional layer with max pooling
class CharCNN(nn.Module):
    def __init__(self, character_embedding_size, num_filters, kernel_size, max_word_length, char_vocab_size, word_embedding_dim, device = device):
        super(CharCNN, self).__init__()
        self.char_embedding = nn.Embedding(char_vocab_size, character_embedding_size)
        self.conv_layers = nn.ModuleList([nn.Conv1d(character_embedding_size
                                                    , num_filters, kernel_size) for _ in range(max_word_length - kernel_size + 1)])
        self.fc = nn.Linear(num_filters * (max_word_length - kernel_size + 1), word_embedding_dim)
    def forward(self, x):
        # x is a batch of words. Each word is a list of characters (batch_size, max_word_length)
        # first, we convert the characters to embeddings
        x = self.char_embedding(x) # (batch_size, max_word_length, character_embedding_size)
        x = x.permute(0, 2, 1) # (batch_size, character_embedding_size, max_word_length)
        # now we run the convolutional layers
        x = [conv(x) for conv in self.conv_layers]
        # now we max pool
        x = [torch.max(torch.relu(conv), dim = 2)[0] for conv in x]
        # now we concatenate the results
        x = torch.cat(x, dim = 1) # (batch_size, num_filters * (max_word_length - kernel_size + 1))
        # finally, we run the fully connected layer
        x = self.fc(x)
        return x 

# ELMo part
class ELMo(nn.Module):
    def __init__(self, cnn_config, elmo_config, char_vocab_size):
        # input to this is a batch of sentences. Each sentence is a list of words. Each word is a list of characters.
        super(ELMo, self).__init__()
        # first, we convert the token to a representation using character embeddings
        self.char_cnn = CharCNN(cnn_config['character_embedding_size'], 
                                cnn_config['num_filters'], 
                                cnn_config['kernel_size'], 
                                cnn_config['max_word_length'], 
                                cnn_config['char_vocab_size'],
                                elmo_config['word_embedding_dim'],
                                device = device)
        self.lstm1 = nn.LSTM(elmo_config['word_embedding_dim'], 
                             hidden_size=elmo_config['lstm_hidden_size'], 
                             num_layers=elmo_config['lstm_num_layers'], 
                             batch_first = True, 
                             bidirectional = True)
        self.lstm2 = nn.LSTM(elmo_config['word_embedding_dim'],
                                hidden_size=elmo_config['lstm_hidden_size'], 
                                num_layers=elmo_config['lstm_num_layers'], 
                                batch_first = True, 
                                bidirectional = True)
        
        self.interpolation_linear = nn.Linear(3, 1)
        self.lambdas = nn.Parameter(torch.tensor([0.33, 0.33, 0.33], 
                                                 device = device, dtype=torch.float32))

    def forward(self, x):
        # character cnn
        x = [self.char_cnn(sentence) for sentence in x]
        # lstm1
        x = torch.stack(x, dim=1) 
        x = x.permute(1, 0, 2) # (batch_size, max_sentence_length, num_filters * (max_word_length - kernel_size + 1))
        lstm1, _ = self.lstm1(x)
        # concatenate the outputs from the forward and backward LSTMs to get a tensor of dimension same as x
        lstm1 = torch.cat((lstm1[:, :, :self.lstm1.hidden_size], lstm1[:, :, self.lstm1.hidden_size:]), dim=2)
        lstm2_input = lstm1 + x
        # lstm2
        lstm2, _ = self.lstm2(lstm2_input)
        # concatenate the outputs from the forward and backward LSTMs to get a tensor of dimension same as x
        lstm2 = torch.cat((lstm2[:, :, :self.lstm2.hidden_size], lstm2[:, :, self.lstm2.hidden_size:]), dim=2)

        # interpolation
        alpha = torch.nn.functional.softmax(self.lambdas, dim = 0)
        x = alpha[0] * lstm1 + alpha[1] * lstm2 + alpha[2] * x
        # output of dimension (batch_size, max_sentence_length, 2 * lstm_hidden_size)
        
        return x
 

In [55]:
model = ELMo(cnn_config = {'character_embedding_size': 16, 
                           'num_filters': 32, 
                           'kernel_size': 5, 
                           'max_word_length': 16, 
                           'char_vocab_size': len(char_to_idk)}, 
             elmo_config = {'lstm_hidden_size': 150, 
                            'lstm_num_layers': 1,
                            'word_embedding_dim': 300}, 
             char_vocab_size = len(char_to_idk)).to(device)


In [56]:
# run a forward pass
# enumerate once through the data loader
for i, x in enumerate(train_loader):
    # convert x to tensor
    x = [torch.tensor(sentence, device = device) for sentence in x]
    output = model(x[0])
    print(output.shape)
    break

torch.Size([32, 30, 300])


  """
