In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

In [6]:
import numpy as np

In [7]:
import matplotlib.pyplot as plt

**CLASSIFIER UTILS**

In [4]:
def train_classifier(model, lines, output, batches, iterations=1):
    errors = []
    lossfunc = nn.BCELoss()
    optimizer = optim.Adagrad(model.parameters())
    for _ in range(iterations):
        for batch in batches:
            try:
                if len(batch) == 20:
                    input = torch.cuda.LongTensor([lines[n] for n in batch])
                    truth = torch.cuda.FloatTensor([output[n] for n in batch])
                    truth[truth == 2] = 0
                    pred = model(input)
                    model.zero_grad()
                    loss = lossfunc(pred, truth)
                    loss.backward()
                    optimizer.step()
                    errors.append(loss.data)
            except:
                print('ad')
            print(batches.index(batch), end='\r')
        plt.plot(errors)

In [5]:
def get_summaries(model, lines_, batches, doc_folder, write_folder):
    for batch in batches:
        input = torch.cuda.LongTensor([lines_[n] for n in batch])
        pred = model(input)
        _idx = torch.sort(pred, descending=True)[1].data
        for lines, docid in zip(_idx, batch):
            with open(doc_folder + str(docid)) as f:
                content = f.readlines()
                selected_lines = [content[l] for l in lines[0:3]]
                with open(write_folder + str(docid), 'w+') as f2:
                    [f2.write(line) for line in selected_lines]

In [None]:
def get_file(doc_id, write_folder):
    print(open(write_folder + str(doc_id)).read())

In [None]:
def compute_accuracies(model, lines_, output, batches):
    tp, fn, fp = 0, 0, 0
    for batch in batches:
        input = torch.cuda.LongTensor([lines_[n] for n in batch])
        truth = torch.cuda.ByteTensor([output[n] for n in batch])
        pred = model(input)
        tp += torch.sum(pred.gt(0.5) * truth)
        fn += torch.sum(pred.le(0.5) * truth)
        fp += torch.sum(pred.gt(0.5) * truth.le(0.))
    tp = tp.float()
    fn = fn.float()
    fp = fp.float()
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * precision * recall /(precision + recall)
    
    return {'precision': precision, 'recall': recall ,'f1': f1}

**REFRESH UTILS**

In [79]:
def compute_refresh_loss(bceloss, pred_, scores_, max_=5):
    '''
    for output of length 1
    '''
    l = Variable(torch.zeros(1), requires_grad=True).cuda()
    for pred, scores in zip(pred_, scores_):
        for sentences, score in scores[0:max_]:
            truth = np.zeros(pred.size(0))
            truth[list(sentences)] = 1
            truth = torch.cuda.FloatTensor(truth)
            score = torch.cuda.FloatTensor([score])[0]
            l = l + bceloss(pred, truth) * score
    return l / 20.

In [None]:
def train_reinforce(model, lines, scores, batches, iterations, max_=5):
    errors = []
    lossfunc = nn.BCELoss()
    optimizer = optim.Adagrad(model.parameters())
    for _ in range(iterations):
        for batch in batches:
            if len(batch) == 20:
                input = torch.cuda.LongTensor([lines[n] for n in batch])
                scores_ = [scores[i] for i in batch]
                pred = model(input)
                model.zero_grad()
                
                loss = compute_refresh_loss(lossfunc, pred, scores_, max_)
                loss.backward()
                optimizer.step()
                errors.append(loss.data)
                    

                print(loss.data, batches.index(batch), end='\r')
        plt.plot(errors)