In [3]:
%run "../Get Features.ipynb"

In [4]:
import torch
import numpy as np

In [5]:
from model import LogisticRegression

In [6]:
l = LogisticRegression()

In [7]:
l.load_state_dict(torch.load('log_reg_weights'))

In [8]:
l.weights.weight

Parameter containing:
tensor([[-0.0144, -0.0044,  0.2282,  0.0741, -0.0292, -0.0797]],
       requires_grad=True)

In [9]:
a = 'pronoun, verbs, entity_counts, sen_len, sen_pos, stop_count'.split(',')

## Features and corresponding weights

In [10]:
list(zip(a, l.weights.weight.data.numpy()[0]))

[('pronoun', -0.014409942),
 (' verbs', -0.0043992093),
 (' entity_counts', 0.22817455),
 (' sen_len', 0.074055605),
 (' sen_pos', -0.029169073),
 (' stop_count', -0.07974424)]

## Testing on Test Set

In [12]:
a = get_all(type_=1)

loaded tags
loaded doc metadata
loaded stop counts
loaded outputs


In [13]:
input = torch.t(torch.Tensor([np.hstack(i) for i in a[:-1]]))

In [14]:
output = torch.Tensor(np.hstack(a[-1])).unsqueeze(1)

In [15]:
prediction = l(input)

In [16]:
true_positives = torch.sum(prediction.gt(0) * output.gt(0))

In [17]:
true_negatives = torch.sum(prediction.lt(0) * output.le(0))

In [18]:
false_positives = torch.sum(prediction.gt(0) * output.le(0))

In [19]:
false_negatives = torch.sum(prediction.lt(0) * output.gt(0))

## Precision

In [20]:
precision = true_positives.float() / (true_positives + true_negatives).float()
precision

tensor(0.3743)

## Recall

In [21]:
recall = true_positives.float() / (true_positives + false_positives).float()
recall

tensor(0.8241)

## F1 score

In [22]:
f1 = 2 * precision * recall / (precision + recall)

In [23]:
f1

tensor(0.5148)

## Sample output for a document

In [24]:
%run ../paths.py

In [25]:
text = open(TEXT_FOLDER + 'test/doc.1.txt').readlines()

In [27]:
features = get_all(start=1, end=2, type_=1)

loaded tags
loaded doc metadata
loaded stop counts
loaded outputs


In [28]:
input = torch.t(torch.Tensor(features[:-1]).squeeze(1))

In [29]:
pred = l(input)

In [30]:
ranks = list(zip(*sorted(enumerate(pred.data.numpy()), key=lambda x: x[1], reverse=True)))[0]

## Sample Document

In [31]:
for i in text:
    print(i)

a trip to a former heavyweight champ 's gaudy , abandoned mansion

the tallest and fastest " giga - coaster " in the world

a dramatic interview with a famed spiritual leader -- and the tearful reaction by one of his former students

these are some of the best videos of the week : in the 1980s and ' 90s -- before he moved to @entity15 and started keeping tigers as pets -- former heavyweight boxer @entity18 lived in a @entity21 , @entity20 , mansion

the home featured an indoor swimming pool , a marble - and - gold @entity24 ( with mirrored ceiling , naturally ) and an entertainment room large enough for small concerts

@entity18 sold the house in 1999 ; it 's due to become , of all things , a church

the video can be seen at the top of this story

not a fan of roller coasters ? you may want to skip the next video -- but for the rest of us , the thrill of watching is the next best thing to being there

the @entity46 325 can be found at @entity48 amusement part in @entity49 , @entity50



## Summary for the corresponding document

In [32]:
for r in ranks[0:3]:
    print(text[r])

" i saw the plane heading down along the valley and i said , ' my @entity71 , it 's going to hit the mountain , ' " @entity67 told @entity63

watch the video : a professor of physics at a @entity98 university asked 100 people to create a composite with facial features they thought were beautiful -- and then asked another 100 to rate their attractiveness

a trip to a former heavyweight champ 's gaudy , abandoned mansion

