In [1]:
import time
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

import utils
from dataset_wrapper import NYT10Dataset
from model_v1 import RelationClassifier

%load_ext autoreload
%autoreload 2

In [2]:
def load_saved_model(filepath, model, optimizer=None):
    state = torch.load(filepath)
    model.load_state_dict(state['state_dict'])
    # Only need to load optimizer if you are going to resume training on the model
    if optimizer is not None:
        optimizer.load_state_dict(state['optimizer'])

In [3]:
def train_model(model, dataloaders, dataset_sizes, criterion, optimizer, scheduler, use_gpu, num_epochs=5):
    since = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        # TODO: create val dataset and add 'val' to list
        for phase in ['train']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in tqdm(dataloaders[phase]):
#                 if use_gpu:
#                     inputs = Variable(inputs.cuda())
#                     labels = Variable(labels.cuda())
#                 else:
#                     inputs = inputs
#                     labels = Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                if type(outputs) == tuple:
                    outputs, _ = outputs
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0]
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.item() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            # TODO: uncomment
            # TODO: use a better metric than accuracy?
#             if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = model.state_dict()
            state = {'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
            torch.save(state, best_model_filepath)
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [4]:
def evaluate_model(model, testset_loader, test_size, use_gpu):
    model.train(False)  # Set model to evaluate mode

    predictions = []
    # Iterate over data
    for inputs, labels in tqdm(testset_loader):
        # TODO: wrap them in Variable?
        if use_gpu:
            inputs = inputs.cuda()
            labels = labels.cuda()

        # forward
        outputs = model(inputs)
        if type(outputs) == tuple:
            outputs, _ = outputs
        _, preds = torch.max(outputs.data, 1)
        predictions.extend(preds.tolist())
    return predictions

In [5]:
# TODO: split sentences dataset into train, val, test
train_dataset = NYT10Dataset('data/train.txt', 'data/relation2id.txt')
print(train_dataset.sentences_frame.head())
print(train_dataset.num_relations())

Cleaned file found! Loading now...
Number of trainable samples: 521793
  fb_mid_e1 fb_mid_e2 e1_name       e2_name                     relation  \
0   m.0ccvx  m.05gf08  queens  belle_harbor  /location/location/contains   
1   m.0ccvx  m.05gf08  queens  belle_harbor  /location/location/contains   
2   m.0ccvx  m.05gf08  queens  belle_harbor  /location/location/contains   
3   m.0ccvx  m.05gf08  queens  belle_harbor  /location/location/contains   
4   m.0ccvx  m.05gf08  queens  belle_harbor  /location/location/contains   

                                            sentence  
0  sen. charles e. schumer called on federal safe...  
1  but instead there was a funeral , at st. franc...  
2  rosemary antonelle , the daughter of teresa l....  
3  one was for st. francis de sales roman catholi...  
4  the firefighter , whom a fire department offic...  
58


In [6]:
x, y = train_dataset[3]
print(x)
print(y)

(['one', 'was', 'for', 'st.', 'francis', 'de', 'sales', 'roman', 'catholic', 'church', 'in'], [';', 'another', 'board', 'studded', 'with', 'electromechanical', 'magnets', 'will', 'go', 'under', 'the', 'pipes', 'of', 'an', 'organ', 'at', 'the', 'evangelical', 'lutheran', 'church', 'of', 'christ', 'in', 'rosedale', ','], ['.'])
48


In [7]:
trainset_loader = DataLoader(train_dataset,
                             batch_size=4, 
                             shuffle=True, 
                             num_workers=1, 
                             collate_fn=lambda batch: zip(*batch))

In [8]:
# Use GPU if available, otherwise stick with cpu
use_cuda = torch.cuda.is_available()
# torch.manual_seed(123)
device = torch.device("cuda" if use_cuda else "cpu")
print(device)

# if use_parallel:
#     print("[Using all the available GPUs]")
#     inception = nn.DataParallel(inception, device_ids=[0])


vocab = utils.glove2dict("data/glove.6B.50d.txt")  # dict[word] -> numpy array(embed_dim,)
rc_model = RelationClassifier(vocab, 50)

# He initialization
def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv1d:
        nn.init.kaiming_normal_(m.weight)

rc_model.apply(init_weights)

cpu


RelationClassifier(
  (pcnn): PiecewiseCNN(
    (conv1): Conv1d(50, 230, kernel_size=(3,), stride=(1,), padding=(2,))
  )
  (drop1): Dropout(p=0.5)
  (lin1): Linear(in_features=690, out_features=50, bias=True)
)

In [14]:
from torch.nn.utils.rnn import pad_sequence

C1 = [['this', 'a'], ['this', 'is', 'a'], ['this', 'is', 'a', 'full', 'sentence']]
C1 = [rc_model._assemble_vec_seq(c) for c in C1]
len(C1[0])

pad_sequence(C1, batch_first=True)

ValueError: lengths array has to be sorted in decreasing order

In [9]:
criterion = nn.CrossEntropyLoss()
num_epochs = 5
best_model_filepath = 'model_best.pth.tar'

dataloaders = {'train': trainset_loader}
dataset_sizes = {'train': len(train_dataset)}

optimizable_params = [param for param in rc_model.parameters() if param.requires_grad]
optimizer = torch.optim.Adam(optimizable_params, lr=0.001)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
best_model = train_model(rc_model,
                         dataloaders,
                         dataset_sizes,
                         criterion,
                         optimizer,
                         exp_lr_scheduler,
                         use_cuda,
                         num_epochs)

  0%|          | 0/130449 [00:00<?, ?it/s]

Epoch 0/4
----------





ValueError: lengths array has to be sorted in decreasing order

In [None]:
predictions = evaluate_model(best_model, testset_loader, len(test_dataset), use_cuda)
true_y = [y for img, y in test_dataset]
print(classification_report(true_y, predictions))