In [1]:
import pandas as pd
import numpy as np
import csv

import torch
import torch.nn as nn
import torch.nn.functional as f
import torch.nn.utils.rnn as rnn

from torch.utils.data import DataLoader, Dataset
import torchvision
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim.lr_scheduler import ExponentialLR
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.optim.lr_scheduler import CosineAnnealingLR

if torch.cuda.is_available():
  device = torch.device("cuda:0")
  print("GPU")
else:
  device = torch.device("cpu")
  print("CPU")

GPU


In [2]:
train = pd.read_csv("train", sep=' ', header=None, quoting=csv.QUOTE_NONE, names=['index', 'word', 'tag'])

In [3]:
vocabulary = {}
def create_vocab(word, vocabulary):
    if word in vocabulary:
        vocabulary[word] += 1
    else:
        vocabulary[word] = 1 
    return word

train['word_d'] = train['word'].apply(lambda x: create_vocab(x, vocabulary))
print(len(vocabulary))

23624


In [4]:
tag2idx = {}

def create_tagset(tag, tagset, ):
  if tag not in tagset:
    l = len(tagset)
    tagset[tag] = l


train['tag_d'] = train['tag'].apply(lambda x: create_tagset(x, tag2idx))
tag2idx['PAD'] = -1

idx2tag = {value:key for key, value in tag2idx.items()}
print(idx2tag)

{0: 'B-ORG', 1: 'O', 2: 'B-MISC', 3: 'B-PER', 4: 'I-PER', 5: 'B-LOC', 6: 'I-ORG', 7: 'I-MISC', 8: 'I-LOC', -1: 'PAD'}


In [5]:
def filter_vocab(vocabulary, threshold):
    filtered_vocabulary = {}

    i = 2
    for word, value in vocabulary.items():
        if value > threshold:
            filtered_vocabulary[word] = i
            i += 1

    filtered_vocabulary['_pad'] = 0
    filtered_vocabulary['_unk'] = 1
    return filtered_vocabulary
            
threshold = 2
word2idx = filter_vocab(vocabulary, threshold) 
idx2word = {value:key for key, value in word2idx.items()}  

In [6]:
print(len(word2idx))
print(len(idx2word))

8129
8129


In [7]:
from pandas.core.frame import validate_axis_style_args

# prepare sequences

train_indices = train['index'].values
train_words = train['word'].values
train_tags = train['tag'].values

def pad_sentence(seq, id):
  padsize = 113 - len(seq)
  if padsize == 113:
    return np.array(seq)
  if id == "word":
    return np.concatenate((np.array(seq), np.zeros(padsize)))
  else:
    return np.concatenate((np.array(seq), -1*np.ones(padsize)))
  

def prepare_sequences(indices, words, tags):
  word_sequences = []
  tag_sequences = []
  word_seq = []
  tag_seq = []
  lengths = []
  
  count = 0
  for i in range(len(indices)):
    if indices[i] == 1:
      count += 1
      if i != 0:
        lengths.append(len(word_seq))
        word_sequences.append(pad_sentence(word_seq,"word"))
        tag_sequences.append(pad_sentence(tag_seq,"tag"))
     
      word_seq = []
      tag_seq = []

    word = "_unk"
    if words[i] in word2idx:
      word = words[i]
    word_seq.append(word2idx[word])
    tag_seq.append(tag2idx[tags[i]])

  lengths.append(len(word_seq))
  word_sequences.append(pad_sentence(word_seq,"word"))
  tag_sequences.append(pad_sentence(tag_seq,"tag"))
  return np.array(word_sequences), np.array(tag_sequences), lengths

train_x, train_y, train_lengths = prepare_sequences(train_indices, train_words, train_tags)

dev = pd.read_csv("dev", sep=' ', header=None, quoting=csv.QUOTE_NONE, names=['index', 'word', 'tag'])
dev_indices = dev['index'].values
dev_words = dev['word'].values
dev_tags = dev['tag'].values

dev_x, dev_y, dev_lengths = prepare_sequences(dev_indices, dev_words, dev_tags)

In [8]:
class TrainData(Dataset):
    def __init__(self, train_tensors, train_labels, train_lengths):
        self.data = torch.from_numpy(train_tensors)
        self.data = (self.data).to(torch.int64)
        self.labels = torch.from_numpy(train_labels)
        self.labels = (self.labels).to(torch.int64)
        self.lengths = train_lengths
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index], self.lengths[index], self.labels[index]

class ValData(Dataset):
    def __init__(self, val_tensors, val_labels, val_lengths):
        self.data = torch.from_numpy(val_tensors)
        self.data = (self.data).to(torch.int64)
        self.labels = torch.from_numpy(val_labels)
        self.labels = (self.labels).to(torch.int64)
        self.lengths = val_lengths
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index], self.lengths[index], self.labels[index]
    
class TestData(Dataset):
    def __init__(self, test_tensors, test_labels, test_lengths):
        self.data = torch.from_numpy(test_tensors)
        self.data = (self.data).to(torch.int64)
        self.labels = torch.from_numpy(test_labels)
        self.labels = (self.labels).to(torch.int64)
        self.lengths = test_lengths
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index], self.lengths[index], self.labels[index]

In [9]:
class BLSTM_NER(nn.Module):
    def __init__(self, vocab_size, linear_output_dim, output_dim, embedding_dim, hidden_dim, dropout_prob):
        super(BLSTM_NER, self).__init__()

        self.hidden_dim = hidden_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.blstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True, dropout=dropout_prob)
        self.linear1 = nn.Linear(2*hidden_dim, linear_output_dim)
        self.linear2 = nn.Linear(linear_output_dim, output_dim)
        self.elu = nn.ELU()

    def forward(self, sentence, lengths):
        embeddings = self.embeddings(sentence)
        lstm_out = rnn.pack_padded_sequence(embeddings, lengths, batch_first=True, enforce_sorted=False)
        lstm_out, _ = self.blstm(lstm_out)
        lstm_out, _ = rnn.pad_packed_sequence(lstm_out, batch_first=True, padding_value=0.0, total_length=sentence.shape[1])
        lstm_out = self.linear1(lstm_out)
        lstm_out = self.elu(lstm_out)

        output = self.linear2(lstm_out)
        tag_scores = f.log_softmax(output, dim=1)
        return tag_scores

In [40]:
# LOADING DATA

batch_size = 20

train_loader = torch.utils.data.DataLoader(TrainData(train_x, train_y, train_lengths), batch_size=batch_size)
val_loader = torch.utils.data.DataLoader(ValData(dev_x, dev_y, dev_lengths), batch_size=10)

In [None]:
# vocab_size = len(word2idx)
# embedding_dim = 100
# hidden_dim = 256
# dropout_prob = 0.33
# linear_output_dim = 128
# output_dim = 9

# model = BLSTM_NER(23626, linear_output_dim, output_dim, embedding_dim, hidden_dim, dropout_prob).to(device)

# criterion = nn.CrossEntropyLoss( ignore_index = -1)
# if torch.cuda.is_available():
#   criterion = nn.CrossEntropyLoss( ignore_index = -1).cuda()
# optimizer = torch.optim.SGD(model.parameters(), lr=1)
# # scheduler = ExponentialLR(optimizer, gamma=0.9)
# # scheduler = ReduceLROnPlateau(optimizer, 'min')

# # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=1000, eta_min=0.05, last_epoch=-1, verbose=False)
# batch_size = 20

# train_loader = torch.utils.data.DataLoader(TrainData(train_x, train_y, train_lengths), batch_size=batch_size)
# val_loader = torch.utils.data.DataLoader(ValData(dev_x, dev_y, dev_lengths), batch_size=10)

# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=len(train_loader), epochs=50)

In [41]:
def task1(train_loader, val_loader):

  # HYPERPARAMS -------------------------------------------------------------------------------
  vocab_size = len(word2idx)
  embedding_dim = 100
  hidden_dim = 256
  dropout_prob = 0.33
  linear_output_dim = 128
  output_dim = 9

  model = BLSTM_NER(11985, linear_output_dim, output_dim, embedding_dim, hidden_dim, dropout_prob).to(device)

  criterion = nn.CrossEntropyLoss( ignore_index = -1)
  if torch.cuda.is_available():
    criterion = nn.CrossEntropyLoss( ignore_index = -1).cuda()
  optimizer = torch.optim.SGD(model.parameters(), lr=1)
  # scheduler = ExponentialLR(optimizer, gamma=0.9)
  # scheduler = ReduceLROnPlateau(optimizer, 'min')

  # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=1000, eta_min=0.05, last_epoch=-1, verbose=False)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.93, steps_per_epoch=len(train_loader), epochs=50)

  epochs = 50
  counter = 0
  print_every = 1000
  clip = 5
  valid_loss_min = np.Inf

  model.train()
  for i in range(epochs):

      for data, lengths, targets in train_loader:
          counter += 1
          data = data.to(device)
          
          targets = targets.to(device)
          # h = tuple([e.data for e in h])
          # inputs, labels = inputs.to(device), labels.to(device)
          model.zero_grad()
          output = model(data.to(device), lengths)

          y_pred_for_loss = output.permute(0,2,1)
          y_pred_for_loss = y_pred_for_loss.to(device)
          
          loss = criterion(y_pred_for_loss, targets)
          loss.backward()
          nn.utils.clip_grad_norm_(model.parameters(), clip)
          optimizer.step()
        
          # if counter%print_every == 0:

          #   val_losses = []
          #   model.eval()
          #   for val_data, val_lengths, val_targets in val_loader:
                
          #       val_data = val_data.to(device)
                
          #       val_targets = val_targets.to(device)
          #       val_output = model(val_data, val_lengths)
          #       val_pred_for_loss = val_output.permute(0,2,1)
          #       val_pred_for_loss = val_pred_for_loss.to(device)
          #       val_loss = criterion(val_pred_for_loss, val_targets)
          #       val_losses.append(val_loss.item())
                
          #   model.train()
          #   print("Epoch: {}/{}...".format(i+1, epochs),
          #         "Step: {}...".format(counter),
          #         "Loss: {:.6f}...".format(loss.item()),
          #         "Val Loss: {:.6f}".format(np.mean(val_losses)))
          #   # if np.mean(val_losses) <= valid_loss_min:
          #   #     torch.save(model.state_dict(), './state_dict.pt')
          #   #     print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,np.mean(val_losses)))
          #   #     valid_loss_min = np.mean(val_losses)
          scheduler.step()

  return model

task1model = task1(train_loader, val_loader)


In [42]:
# GET DEV RESULTS
task1model.eval()

dev_x_tensor = torch.from_numpy(dev_x)
dev_x_tensor = dev_x_tensor.to(torch.int64)
dev_y_tensor = torch.from_numpy(dev_y)
dev_y_tensor = dev_y_tensor.to(torch.int64)

dev_output = task1model(dev_x_tensor.to(device), dev_lengths)
dev_output = dev_output.cpu().detach().numpy()

# FUNCTIONS FOR GETTING PREDICTIONS
def get_preds(output, lengths):
  preds = []
  for i in range(output.shape[0]):
      tmp = []
      
      for j in range(lengths[i]):
          tmp.append(np.argmax(output[i][j]))
          
      preds.append(tmp)
  return preds

def write_predictions(filename, indices, words, tags, preds, idx2word, idx2tag):
    
    count = 0
    file = open(filename, "w")
    
    for i in range(len(preds)):
        if i != 0:
            file.write("\n")
        for j in range(len(preds[i])):
            s = str(indices[count]) + " " + str(words[count]) + " " + str(tags[count]) + " " + str(idx2tag[preds[i][j]]) + "\n"
            file.write(s)
            count += 1
            
    file.close()
    return


# GETTING PREDICTIONS ON DEV SET
dev_preds = get_preds(dev_output, dev_lengths)
write_predictions("dev1pred.out", dev_indices, dev_words, dev_tags, dev_preds, idx2word, idx2tag)
        

In [43]:
# SAVE MODEL

task1model.train()
torch.save(task1model.state_dict(), './blstm1.pt')

## Task 2: 

In [None]:
# READ GLOVE EMBEDDINGS

glove = pd.read_csv("glove.6B.100d", sep=' ', header=None, quoting=csv.QUOTE_NONE)
glove.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
0,the,-0.038194,-0.24487,0.72812,-0.39961,0.083172,0.043953,-0.39141,0.3344,-0.57545,...,0.016215,-0.017099,-0.38984,0.87424,-0.72569,-0.51058,-0.52028,-0.1459,0.8278,0.27062
1,",",-0.10767,0.11053,0.59812,-0.54361,0.67396,0.10663,0.038867,0.35481,0.06351,...,0.34951,-0.7226,0.37549,0.4441,-0.99059,0.61214,-0.35111,-0.83155,0.45293,0.082577
2,.,-0.33979,0.20941,0.46348,-0.64792,-0.38377,0.038034,0.17127,0.15978,0.46619,...,-0.063351,-0.67412,-0.068895,0.53604,-0.87773,0.31802,-0.39242,-0.23394,0.47298,-0.028803
3,of,-0.1529,-0.24279,0.89837,0.16996,0.53516,0.48784,-0.58826,-0.17982,-1.3581,...,0.18712,-0.018488,-0.26757,0.727,-0.59363,-0.34839,-0.56094,-0.591,1.0039,0.20664
4,to,-0.1897,0.050024,0.19084,-0.049184,-0.089737,0.21006,-0.54952,0.098377,-0.20135,...,-0.13134,0.058617,-0.31869,-0.61419,-0.62393,-0.41548,-0.038175,-0.39804,0.47647,-0.15983


In [None]:
glove_vocab = glove[0].values
print(glove_vocab.shape)
glove_embeddings = glove.iloc[:, 1:]
print(glove_embeddings.shape)

(400000,)
(400000, 100)


In [None]:
glove_embeddings = np.array(glove_embeddings)
_unk_embedding = np.mean(glove_embeddings,axis=0,keepdims=True)
print(_unk_embedding.shape)

(1, 100)


In [None]:
def create_glove_vocab_dict(glove_vocab, glove_embeddings):
  glove_vocab_dict = {}
  for i in range(glove_vocab.shape[0]):
      glove_vocab_dict[glove_vocab[i]] = glove_embeddings[i]
  
  return glove_vocab_dict

glove_vocab_dict = create_glove_vocab_dict(glove_vocab, glove_embeddings) 

In [None]:
print(len(glove_vocab_dict))

399998


In [None]:
def create_glove_embeddings(word2idx, glove_vocab_dict, _unk_embedding):
  embeddings = np.zeros((1,101))

  _pad_embedding = np.zeros((1,101))   #embedding for '<pad>' token.
  unk_embed_zero = np.concatenate((_unk_embedding.flatten(), np.zeros(1)))
  # unk_embed_one = np.concatenate((_unk_embedding.flatten(), np.ones(1)))

  padcount = 0
  unkcount = 0
  inglove = 0
  lowercase = 0
  notinglove = 0

  embeddings = np.vstack((embeddings,_pad_embedding))
  embeddings = np.vstack((embeddings,unk_embed_zero))

  for word, idx in word2idx.items():
    word = str(word)
    if word == "_pad":
      padcount += 1
    elif word == "_unk":
      unkcount += 1
    else:
      if word.lower() in glove_vocab_dict:
        inglove += 1
        if word == word.lower():
          embedding = np.concatenate((glove_vocab_dict[word.lower()], np.ones(1)))
          embeddings = np.vstack((embeddings,embedding))
          lowercase += 1
        else:
          embedding = np.concatenate((glove_vocab_dict[word.lower()], np.zeros(1)))
          embeddings = np.vstack((embeddings,embedding))
        
      else:
        notinglove += 1
        if word == word.lower():
          embedding = np.concatenate((np.random.rand(100), np.ones(1)))
          embeddings = np.vstack((embeddings,embedding))
        else:
          embedding = np.concatenate((np.random.rand(100), np.zeros(1)))
          embeddings = np.vstack((embeddings,embedding))
        
        

  print("padcount ", padcount)
  print("unkcount ", unkcount)
  print("inglove ", inglove)
  print("lowercase ", lowercase)
  print("notinglove ", notinglove)

  return embeddings

train_embeddings = create_glove_embeddings(word2idx, glove_vocab_dict, _unk_embedding)
train_embeddings = train_embeddings[1:]

padcount  1
unkcount  1
inglove  11510
lowercase  6544
notinglove  473


In [None]:
print(train_embeddings.shape)


(11985, 101)


In [None]:
class BLSTM_NER_GLOVE(nn.Module):
    def __init__(self, train_embeddings, embedding_dim, linear_output_dim, output_dim, hidden_dim, dropout_prob):
        super(BLSTM_NER_GLOVE, self).__init__()

        self.hidden_dim = hidden_dim
        self.embeddings = nn.Embedding.from_pretrained(torch.from_numpy(train_embeddings).float())
        self.blstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True, dropout=dropout_prob)
        self.linear1 = nn.Linear(2*hidden_dim, linear_output_dim)
        self.linear2 = nn.Linear(linear_output_dim, output_dim)
        self.elu = nn.ELU()

    def forward(self, sentence, lengths):
        embeddings = self.embeddings(sentence)
        lstm_out = rnn.pack_padded_sequence(embeddings, lengths, batch_first=True, enforce_sorted=False)
        lstm_out, _ = self.blstm(lstm_out)
        lstm_out, _ = rnn.pad_packed_sequence(lstm_out, batch_first=True, padding_value=0.0, total_length=sentence.shape[1])
        lstm_out = self.linear1(lstm_out)
        lstm_out = self.elu(lstm_out)
        output = self.linear2(lstm_out)
        tag_scores = f.log_softmax(output, dim=1)
        return tag_scores

In [None]:
def task2(train_loader, val_loader, train_embeddings):

  # HYPERPARAMS -------------------------------------------------------------------------------
  embedding_dim = 101
  hidden_dim = 256
  dropout_prob = 0.33
  linear_output_dim = 128
  output_dim = 9

  model = BLSTM_NER_GLOVE(train_embeddings, embedding_dim, linear_output_dim, output_dim, hidden_dim, dropout_prob).to(device)

  criterion = nn.CrossEntropyLoss( ignore_index = -1)
  if torch.cuda.is_available():
    criterion = nn.CrossEntropyLoss( ignore_index = -1).cuda()
  optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
  # scheduler = ExponentialLR(optimizer, gamma=0.9)
  # scheduler = ReduceLROnPlateau(optimizer, 'min')

  # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=1000, eta_min=0.05, last_epoch=-1, verbose=False)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=1, steps_per_epoch=len(train_loader), epochs=100)

  epochs = 100
  counter = 0
  print_every = 1000
  clip = 5
  valid_loss_min = np.Inf

  model.train()
  for i in range(epochs):

      for data, lengths, targets in train_loader:
          counter += 1
          data = data.to(device)
          
          targets = targets.to(device)
          # h = tuple([e.data for e in h])
          # inputs, labels = inputs.to(device), labels.to(device)
          model.zero_grad()
          output = model(data.to(device), lengths)

          y_pred_for_loss = output.permute(0,2,1)
          y_pred_for_loss = y_pred_for_loss.to(device)
          
          loss = criterion(y_pred_for_loss, targets)
          loss.backward()
          nn.utils.clip_grad_norm_(model.parameters(), clip)
          optimizer.step()
        
          if counter%print_every == 0:

            val_losses = []
            model.eval()
            for val_data, val_lengths, val_targets in val_loader:
                
                val_data = val_data.to(device)
                
                val_targets = val_targets.to(device)
                val_output = model(val_data, val_lengths)
                val_pred_for_loss = val_output.permute(0,2,1)
                val_pred_for_loss = val_pred_for_loss.to(device)
                val_loss = criterion(val_pred_for_loss, val_targets)
                val_losses.append(val_loss.item())
                
            model.train()
            print("Epoch: {}/{}...".format(i+1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(loss.item()),
                  "Val Loss: {:.6f}".format(np.mean(val_losses)))
            # if np.mean(val_losses) <= valid_loss_min:
            #     torch.save(model.state_dict(), './state_dict.pt')
            #     print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,np.mean(val_losses)))
            #     valid_loss_min = np.mean(val_losses)
          scheduler.step()

  return model

task2model = task2(train_loader, val_loader, train_embeddings)

Epoch: 2/100... Step: 1000... Loss: 0.071471... Val Loss: 0.239023
Epoch: 3/100... Step: 2000... Loss: 0.185015... Val Loss: 0.224442
Epoch: 4/100... Step: 3000... Loss: 0.004551... Val Loss: 0.149174
Epoch: 6/100... Step: 4000... Loss: 0.011556... Val Loss: 0.157440
Epoch: 7/100... Step: 5000... Loss: 0.169870... Val Loss: 0.215603
Epoch: 8/100... Step: 6000... Loss: 0.002674... Val Loss: 0.136303
Epoch: 10/100... Step: 7000... Loss: 0.004220... Val Loss: 0.142497
Epoch: 11/100... Step: 8000... Loss: 0.146224... Val Loss: 0.180068
Epoch: 12/100... Step: 9000... Loss: 0.075276... Val Loss: 0.132916
Epoch: 14/100... Step: 10000... Loss: 0.000979... Val Loss: 0.135869
Epoch: 15/100... Step: 11000... Loss: 0.114062... Val Loss: 0.160993
Epoch: 16/100... Step: 12000... Loss: 0.000360... Val Loss: 0.136253
Epoch: 18/100... Step: 13000... Loss: 0.003181... Val Loss: 0.151762
Epoch: 19/100... Step: 14000... Loss: 0.131702... Val Loss: 0.195173
Epoch: 20/100... Step: 15000... Loss: 0.000027...

In [None]:
# embedding_dim = 101
# vocab_size = len(word2idx)
# hidden_dim = 256
# dropout_prob = 0.33
# linear_output_dim = 128
# output_dim = 9

# glove_model = BLSTM_NER_GLOVE(train_embeddings, embedding_dim, linear_output_dim, output_dim, hidden_dim, dropout_prob).to(device)

# criterion = nn.CrossEntropyLoss( ignore_index = -1)
# if torch.cuda.is_available():
#   criterion = nn.CrossEntropyLoss( ignore_index = -1).cuda()
# glove_optimizer = torch.optim.Adam(glove_model.parameters(), lr=0.1)
# # scheduler = ExponentialLR(optimizer, gamma=0.9)
# # scheduler = ReduceLROnPlateau(optimizer, 'min')

# # glove_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(glove_optimizer, T_max=1000, eta_min=0.05, last_epoch=-1, verbose=False)
# batch_size = 20
# # glove_scheduler = ReduceLROnPlateau(glove_optimizer, 'min')

# # glove_scheduler = torch.optim.lr_scheduler.OneCycleLR(glove_optimizer, max_lr=0.1, steps_per_epoch=len(train_loader), epochs=100)

In [None]:
# epochs = 100
# counter = 0
# print_every = 1000
# clip = 5
# valid_loss_min = np.Inf

# glove_model.train()
# for i in range(epochs):

    
#     for data, lengths, targets in train_loader:
#         counter += 1
#         data = data.to(device)
        
#         targets = targets.to(device)
#         # h = tuple([e.data for e in h])
#         # inputs, labels = inputs.to(device), labels.to(device)
#         glove_model.zero_grad()
#         output = glove_model(data.to(device), lengths)

#         y_pred_for_loss = output.permute(0,2,1)
#         y_pred_for_loss = y_pred_for_loss.to(device)
        
#         loss = criterion(y_pred_for_loss, targets)
#         loss.backward()
#         nn.utils.clip_grad_norm_(glove_model.parameters(), clip)
#         glove_optimizer.step()
       
#         if counter%print_every == 0:

#           val_losses = []
#           glove_model.eval()
#           for val_data, val_lengths, val_targets in val_loader:
              
#               val_data = val_data.to(device)
              
#               val_targets = val_targets.to(device)
#               val_output = glove_model(val_data, val_lengths)
#               val_pred_for_loss = val_output.permute(0,2,1)
#               val_pred_for_loss = val_pred_for_loss.to(device)
#               val_loss = criterion(val_pred_for_loss, val_targets)
#               val_losses.append(val_loss.item())
              
#           glove_model.train()
#           print("Epoch: {}/{}...".format(i+1, epochs),
#                 "Step: {}...".format(counter),
#                 "Loss: {:.6f}...".format(loss.item()),
#                 "Val Loss: {:.6f}".format(np.mean(val_losses)))
#                 # if np.mean(val_losses) <= valid_loss_min:
#                 #     torch.save(model.state_dict(), './state_dict.pt')
#                 #     print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,np.mean(val_losses)))
#                 #     valid_loss_min = np.mean(val_losses)
#         # glove_scheduler.step()

In [None]:
task2model.eval()

dev_output = task2model(dev_x_tensor.to(device), dev_lengths)
dev_output = dev_output.cpu().detach().numpy()

dev_preds = get_preds(dev_output, dev_lengths)
write_predictions("dev2pred.out", dev_indices, dev_words, dev_tags, dev_preds, idx2word, idx2tag)

In [None]:
task2model.train()
torch.save(task2model.state_dict(), './blstm2.pt')