In [85]:
!pip install torch torchvision torchaudio



In [86]:
!pip show torch

Name: torch
Version: 2.1.0
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: /Users/basisushil/opt/anaconda3/lib/python3.11/site-packages
Requires: filelock, fsspec, jinja2, networkx, sympy, typing-extensions
Required-by: torchaudio, torchtext, torchvision


In [119]:
!pip list | grep torch

torch                         2.1.0
torchaudio                    2.1.0
torchtext                     0.3.1
torchvision                   0.16.0


In [87]:
import torch

In [88]:
torch.backends.mps.is_available()

True

In [89]:
device = torch.device('mps')

In [90]:
import os
print(os.getcwd())
DIR = os.getcwd()
main_dir = os.path.join(os.getcwd(), 'main')
print(main_dir)

/Users/basisushil/Desktop/automatic-question-generation-master
/Users/basisushil/Desktop/automatic-question-generation-master/main


In [91]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtext import data
from torchtext.vocab import Vectors

from tqdm import tqdm
import argparse
from main.models import Seq2seq
import random
import pandas as pd
from sklearn.model_selection import train_test_split

In [92]:
train_set = os.path.join(DIR, 'result_squad/resultssquad_train.csv')
dev_set = os.path.join(DIR, 'result_squad/resultssquad_dev.csv')
test_size = 0.7
save = os.path.join(DIR, 'dataset/')
word_vector = 'glove'
batch_size = 128
resume = ''
epochs = 100
numberbatch_loc  = os.path.join(DIR, 'dataset/')

print(train_set, dev_set, save)

/Users/basisushil/Desktop/automatic-question-generation-master/result_squad/resultssquad_train.csv /Users/basisushil/Desktop/automatic-question-generation-master/result_squad/resultssquad_dev.csv /Users/basisushil/Desktop/automatic-question-generation-master/dataset/


In [93]:
# Split dev dataset into test set and validation set
dev_set = pd.read_csv(dev_set)
validation_set, test_set = train_test_split(dev_set, test_size = test_size)


In [94]:
# Saving file names to variables
trainloc = train_set
valloc = save+'validation_set.csv'
testloc = save+'test_set.csv'

In [95]:
valloc

'/Users/basisushil/Desktop/automatic-question-generation-master/dataset/validation_set.csv'

In [96]:
# Saving validation and test set to csv file
validation_set.to_csv(valloc, index=False)
test_set.to_csv(testloc, index=False)

In [97]:
# Create Field object
tokenize = lambda x: x.split()
TEXT = data.Field(tokenize=tokenize, lower=False, include_lengths = True, init_token = '<SOS>', eos_token = '<EOS>')
LEX = data.Field(tokenize=tokenize, lower=False, init_token = '<SOS>', eos_token = '<SOS>')
BIO = data.Field(tokenize=tokenize, lower=False, init_token = '<SOS>', eos_token = '<SOS>')


In [98]:
# Specify Fields in the dataset
fields = [('context', TEXT), ('question', TEXT), ('bio', BIO), ('lex', LEX)]


In [99]:
# Build the dataset
train_data, valid_data, test_data = data.TabularDataset.splits(path = '',train=trainloc, validation=valloc,
                                                               test=testloc, fields = fields, format='csv', skip_header=True)

In [100]:
# Build vocabulary
MAX_VOCAB_SIZE = 50000
MIN_COUNT = 5
BATCH_SIZE = batch_size

In [101]:
if word_vector == 'glove':
	TEXT.build_vocab(train_data, max_size=MAX_VOCAB_SIZE,
                 min_freq=MIN_COUNT, vectors='glove.6B.300d',
                 unk_init=torch.Tensor.normal_)
else:
	cache_ = numberbatch_loc 
	vectors = Vectors(name='numberbatch-en-19.08.txt', cache=cache_)
	TEXT.build_vocab(train_data, max_size=MAX_VOCAB_SIZE,
                 min_freq=MIN_COUNT, vectors=vectors,
                 unk_init=torch.Tensor.normal_)

In [102]:
BIO.build_vocab(train_data)
LEX.build_vocab(train_data)

In [103]:
# Create a set of iterators for each split
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
     batch_size = BATCH_SIZE,
     sort_within_batch = True,
     sort_key = lambda x:len(x.context),
     device = device)

In [104]:
pad_idx = TEXT.vocab.stoi['<pad>']
eos_idx = TEXT.vocab.stoi['<EOS>']
sos_idx = TEXT.vocab.stoi['<SOS>']

In [105]:
# Size of embedding_dim should match the dim of pre-trained word embeddings
embedding_dim = 300
hidden_dim = 512
vocab_size = len(TEXT.vocab)
# optimizer = optim.Adam([param for param in model.parameters() if param.requires_grad == True], 
#                        lr=1.0e-3)

In [106]:
# Initializing weights
model = Seq2seq(embedding_dim, hidden_dim, vocab_size, device, pad_idx, eos_idx, sos_idx).to(device)

In [107]:
pretrained_embeddings = TEXT.vocab.vectors

In [108]:
model.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[ 0.0391, -0.5870, -1.5240,  ..., -0.9038,  0.2345,  0.4160],
        [-0.7859, -0.3455,  0.1419,  ..., -0.2658, -1.1860, -0.5025],
        [-1.0757,  2.1462,  1.9535,  ..., -0.6773, -0.2611, -1.7673],
        ...,
        [-0.5509, -0.8127, -0.9347,  ...,  0.3362,  0.4070,  0.6546],
        [-0.5043, -0.1691,  0.2737,  ..., -0.2468,  0.7967, -0.3363],
        [-0.0176, -0.0626, -0.2060,  ...,  0.4832,  1.1497, -0.2633]],
       device='mps:0')

In [109]:
# 

# Initializing weights for special tokens
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
model.embedding.weight.data[UNK_IDX] = torch.zeros(embedding_dim)
model.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim)

In [110]:
model.embedding.weight.requires_grad = False

In [111]:
optimizer = optim.Adam([param for param in model.parameters() if param.requires_grad == True], 
                       lr=1.0e-3)

In [112]:
criterion = nn.CrossEntropyLoss(ignore_index = pad_idx)

In [113]:
# If continuing training
if (resume):
	model.load_state_dict(torch.load(resume))

In [114]:
def train(model, iterator, criterion, optimizer, clip):
    # Put the model in training mode
    model.train()
    epoch_loss = 0
    
    for idx, batch in tqdm(enumerate(iterator), total=len(iterator)):
        
        input_sequence = batch.context
        answer_sequence = batch.bio
        output_sequence = batch.question
        lexical_sequence = batch.lex
        
        target_tokens = output_sequence[0]
        
        # zero out the gradient for the current batch
        optimizer.zero_grad()
        print('----------------------', model, type(model))

        # Run the batch through the model
        output = model(input_sequence, answer_sequence, lexical_sequence, output_sequence, 0.5)

        # Throw it through the loss function
        output = output[1:].view(-1, output.shape[-1])
        target_tokens = target_tokens[1:].view(-1)
        
        loss = criterion(output, target_tokens)
        
        # Perform back-prop and calculate the gradient of the loss function
        loss.backward()
          
        # Clip the gradient if necessary.          
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        # Update model parameters
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [115]:

def evaluate(model, iterator, criterion):
    # Put model in evaluation mode
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for idx, batch in tqdm(enumerate(iterator), total=len(iterator)):

            input_sequence = batch.context
            answer_sequence = batch.bio
            output_sequence = batch.question
            lexical_sequence = batch.lex
            
            target_tokens = output_sequence[0]
            
            # Run the batch through the model
            output = model(input_sequence, answer_sequence, lexical_sequence, output_sequence, 0)
            
            # Throw it through the loss function
            output = output[1:].view(-1, output.shape[-1])
            target_tokens = target_tokens[1:].view(-1)

            loss = criterion(output, target_tokens)

            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [116]:
N_EPOCHS = epochs
CLIP = 1

best_valid_loss = float('inf')

In [117]:
train_loss = train(model, train_iterator, criterion, optimizer, CLIP)

  0%|          | 0/1019 [00:00<?, ?it/s]

---------------------- Seq2seq(
  (embedding): Embedding(50004, 300)
  (answer_embedding): Embedding(6, 300, padding_idx=1)
  (lexical_embedding): Embedding(452, 300, padding_idx=1)
  (encoder): Encoder(
    (embedding): Embedding(50004, 300)
    (answer_embedding): Embedding(6, 300, padding_idx=1)
    (lexical_embedding): Embedding(452, 300, padding_idx=1)
    (gru): GRU(300, 512, num_layers=2, dropout=0.5, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(50004, 300)
    (gru): GRU(300, 512, num_layers=2, dropout=0.5)
    (concat): Linear(in_features=1024, out_features=512, bias=True)
    (out): Linear(in_features=512, out_features=50004, bias=True)
    (attn): Attention()
  )
) <class 'main.models.Seq2seq'>





RuntimeError: 'lengths' argument should be a 1D CPU int64 tensor, but got 1D mps:0 Long tensor

In [None]:
# for epoch in range(N_EPOCHS):
for epoch in range(1):
    try:
        train_loss = train(model, train_iterator, criterion, optimizer, CLIP)
        valid_loss = evaluate(model, valid_iterator, criterion)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), save+'/model.pth')

        print('Epoch: ', epoch)
        print('Train loss: ', train_loss)
        print('Valid loss: ', valid_loss)
    except:
        raise

  0%|          | 0/1019 [00:00<?, ?it/s]


RuntimeError: 'lengths' argument should be a 1D CPU int64 tensor, but got 1D mps:0 Long tensor