In [1]:
%run '../Get Features.ipynb'

In [2]:
import torch
import numpy as np

In [3]:
from model import LogisticRegression

In [4]:
l = LogisticRegression()

In [5]:
l.load_state_dict(torch.load('log_reg_weights'))

In [6]:
l.weights.weight

Parameter containing:
tensor([[-0.0314,  0.0351,  0.2298,  0.0790, -0.0399, -0.0883]], requires_grad=True)

In [7]:
a = 'pronoun, verbs, entity_counts, sen_len, sen_pos, stop_count'.split(',')

## Features and corresponding weights

In [8]:
list(zip(a, l.weights.weight.data.numpy()[0]))

[('pronoun', -0.03144964),
 (' verbs', 0.03505763),
 (' entity_counts', 0.22977456),
 (' sen_len', 0.07900084),
 (' sen_pos', -0.03990756),
 (' stop_count', -0.088265166)]

## Testing on Test Set

In [9]:
a = get_all(type_=2)

loaded tags
loaded doc metadata
loaded stop counts
loaded outputs


In [10]:
input = torch.t(torch.Tensor([np.hstack(i) for i in a[:-1]]))

In [11]:
output = torch.Tensor(np.hstack(a[-1])).unsqueeze(1)

In [12]:
prediction = l(input)

In [13]:
true_positives = torch.sum(prediction.gt(0) * output.gt(0))

In [14]:
true_negatives = torch.sum(prediction.lt(0) * output.le(0))

In [15]:
false_positives = torch.sum(prediction.gt(0) * output.le(0))

In [16]:
false_negatives = torch.sum(prediction.lt(0) * output.gt(0))

## Precision

In [17]:
precision = true_positives.float() / (true_positives + true_negatives).float()
precision

tensor(0.3823)

## Recall

In [18]:
recall = true_positives.float() / (true_positives + false_positives).float()
recall

tensor(0.8561)

## F1 score

In [19]:
f1 = 2 * precision * recall / (precision + recall)

In [20]:
f1

tensor(0.5286)

## Sample output for a document

In [22]:
%run ../paths.py

In [24]:
text = open(TEXT_FOLDER + 'test/doc.1.txt').readlines()

In [25]:
features = get_all(start=1, end=2, type_=2)

loaded tags
loaded doc metadata
loaded stop counts
loaded outputs


In [32]:
input = torch.t(torch.Tensor(features[:-1]).squeeze(1))

In [33]:
pred = l(input)

In [38]:
ranks = list(zip(*sorted(enumerate(pred.data.numpy()), key=lambda x: x[1], reverse=True)))[0]

## Sample Document

In [46]:
for i in text:
    print(i)

@entity1 's agent expects to go ' around the world ' discussing his client as interest in the @entity6 midfielder increases ahead of the summer transfer window

@entity8 , who has confirmed that he held talks with @entity10 earlier this season , admits that he could receive 20 phone calls a day about the @entity13 international as clubs prepare to strengthen their squads before the start of next season

city are keen to sign the @entity21 midfielder as they look to reshape their squad but @entity23 insists no decision has been made and that the 23 - year - old could even remain at the @entity26

@entity6 midfielder has attracted interest from @entity10 , @entity29 and @entity30 @entity31 tussles with @entity33 's @entity32 during @entity6 's 1 - 1 draw at the @entity26 speaking to @entity35 , @entity23 said : ' for the moment , there are no formal discussions

of course in the next few weeks i will be going around the world to talk about the situation with @entity1 but this is just inf

## Summary for the corresponding document

In [40]:
for r in ranks[0:3]:
    print(text[r])

@entity1 's agent expects to go ' around the world ' discussing his client as interest in the @entity6 midfielder increases ahead of the summer transfer window

@entity6 midfielder has attracted interest from @entity10 , @entity29 and @entity30 @entity31 tussles with @entity33 's @entity32 during @entity6 's 1 - 1 draw at the @entity26 speaking to @entity35 , @entity23 said : ' for the moment , there are no formal discussions

@entity77 celebrates with @entity31 after the @entity79 's equaliser against @entity33 on sunday @entity31 has been in superb form this season , scoring 10 goals and providing 17 assists in the @entity68

