### this script sets a baseline for relation extraction using frequency-based BOW model

#### add additional features

In [114]:
import gzip
import numpy as np
import random
import os
import json

from collections import Counter, defaultdict, namedtuple
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_fscore_support, fbeta_score, make_scorer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.model_selection import cross_val_score, StratifiedKFold, KFold
from sklearn.preprocessing import FunctionTransformer,LabelEncoder
import numpy as np
from sklearn.pipeline import FeatureUnion
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_extraction import DictVectorizer

##### additional imports
import networkx as nx
import spacy
nlp = spacy.load('en')

In [3]:
##################################################################################################
# 1. LOAD DATA
##################################################################################################

PairExample = namedtuple('PairExample',
    'entity_1, entity_2, snippet')
Snippet = namedtuple('Snippet',
    'left, mention_1, middle, mention_2, right, direction')
def load_data(file, verbose=True):
    f = open(file,'r', encoding='utf-8')
    data = []
    labels = []
    for i,line in enumerate(f):
        instance = json.loads(line)
        if i==0:
            if verbose:
                print('json example:')
                print(instance)
        #'relation, entity_1, entity_2, snippet' fileds for each example
        #'left, mention_1, middle, mention_2, right, direction' for each snippet
        instance_tuple = PairExample(instance['entity_1'],instance['entity_2'],[])
        for snippet in instance['snippet']:
            try:
                snippet_tuple = Snippet(snippet['left'],snippet['mention_1'],snippet['middle'],
                                   snippet['mention_2'],snippet['right'],
                                    snippet['direction'])
                instance_tuple.snippet.append(snippet_tuple)
            except:
                print(instance)
        if i==0:
            if verbose:
                print('\nexample transformed as a named tuple:')
                print(instance_tuple)
        data.append(instance_tuple)
        labels.append(instance['relation'])
    return data,labels
    
train_data, train_labels = load_data('../data/train.json.txt')

json example:
{'relation': 'has_spouse', 'entity_1': 'Judy_Garland', 'entity_2': 'David_Rose', 'snippet': [{'left': 'thirty and his life and career were riding high . In 1941 , shortly after the death of his father , Mercer began an intense affair with nineteen-year-old', 'mention_1': 'Judy Garland', 'middle': 'while she was engaged to composer', 'mention_2': 'David Rose', 'right': '. Garland married Rose to temporarily stop the affair , but the effect on Mercer lingered , adding to the emotional depth of his lyrics . Their affair', 'direction': 'fwd'}]}

example transformed as a named tuple:
PairExample(entity_1='Judy_Garland', entity_2='David_Rose', snippet=[Snippet(left='thirty and his life and career were riding high . In 1941 , shortly after the death of his father , Mercer began an intense affair with nineteen-year-old', mention_1='Judy Garland', middle='while she was engaged to composer', mention_2='David Rose', right='. Garland married Rose to temporarily stop the affair , but 

In [4]:
# Statistics over relations
def print_stats(labels):
    labels_counts = Counter(labels)
    print('{:20s} {:>10s} {:>10s}'.format('', '', 'rel_examples'))
    print('{:20s} {:>10s} {:>10s}'.format('relation', 'examples', '/all_examples'))
    print('{:20s} {:>10s} {:>10s}'.format('--------', '--------', '-------'))
    for k,v in labels_counts.items():
        print('{:20s} {:10d} {:10.2f}'.format(k, v, v /len(labels)))
    print('{:20s} {:>10s} {:>10s}'.format('--------', '--------', '-------'))
    print('{:20s} {:10d} {:10.2f}'.format('Total', len(labels), len(labels) /len(labels)))

print('Train set statistics:')
print_stats(train_labels)

Train set statistics:
                                rel_examples
relation               examples /all_examples
--------               --------    -------
has_spouse                 3019       0.31
author                     2653       0.27
NO_REL                     2300       0.24
capital                     510       0.05
worked_at                  1178       0.12
--------               --------    -------
Total                      9660       1.00


In [5]:
# check that each entity pair is assigned only one relation
pair_dict={}
rel_dict={}
for example, label in zip(train_data,train_labels):
    if (example.entity_1,example.entity_2) not in pair_dict.keys():
        pair_dict[(example.entity_1,example.entity_2)] = [label]
        
    else:
        pair_dict[(example.entity_1,example.entity_2)].append(label)
        print(example.entity_1,example.entity_2,label)
    if label not in rel_dict.keys():
        rel_dict[label] = [example]
    else:
        rel_dict[label].append(example)
print("Done building dictionary")  
    
# example for each relation
for rel in rel_dict.keys():
    ex = rel_dict[rel][0]
    print(rel,ex.entity_1,ex.entity_2)

Done building dictionary
has_spouse Judy_Garland David_Rose
author Charlie_and_the_Chocolate_Factory Roald_Dahl
NO_REL Sichuan Tibet
capital Andalusia Seville
worked_at Carl-Henric_Svanberg Ericsson


In [6]:
# how to reconstruct full context

# ex = train_data[0]
# print(ex)
# print("\n full context:")
# s = ex.snippet[0]
# print(' '.join((s.left, s.mention_1, s.middle, s.mention_2, s.right)))

In [7]:
# def rebuild_text(ex):
#     rebuilt_ex = []
#     for s in ex.snippet:
#         text = ' '.join((s.left, s.mention_1, s.middle, s.mention_2, s.right))
#         rebuilt_ex.append(text)
#     return rebuilt_ex

In [8]:
# def build_text_from_snippet(s):
#     text = ' '.join((s.left, s.mention_1, s.middle, s.mention_2, s.right))
#     return text

In [9]:
# def rebuild_corpus(data):
#     corpus = []
#     for ex in data:
#         corpus.append(rebuild_text(ex)) 
#     return corpus

In [10]:
# def extract_key_sents(data):
#     key_sents = []
#     for ex in data:
#         m1 = ex.snippet[0].mention_1
#         m2 = ex.snippet[0].mention_2
#         text = build_text_from_snippet(ex.snippet[0])
#         doc = nlp(text)
#         for sent in doc.sents:
# #             print(sent)
#             if m1 in sent.string and m2 in sent.string:
#                 key_sents.append(sent)
#                 continue
                
#     return key_sents

In [11]:
# key_sents = extract_key_sents(train_data[:100])
# print(type(key_sents[0]))
# for sent in key_sents:
#     for chunk in sent.noun_chunks:
#         print(chunk.label_, chunk.text, chunk.root.text, chunk.root.dep_,
#           chunk.root.head.text)
#     for token in sent: 
#         print(token.text, token.dep_, token.head.text, token.head.pos_,
#               [child for child in token.children])

In [170]:
def tag_tokens(doc):
    tagged_ex = []
    
    for w in doc:
        if w.orth_ == "m_1" or w.orth_ == "m_2":
            tagged_ex.append(w.orth_)
        else:
            tagged_ex.append(w.pos_)
            
    tagged_ex = " ".join(tagged_ex)
    
    return tagged_ex

In [171]:
def lemmatize(doc):
    lemmas = []
    
    for w in doc:
        if w.lemma_ == "-PRON-" or w.orth_ == "m_1" or w.orth_ == "m_2":
            lemmas.append(w.orth_)
        else:
            lemmas.append(w.lemma_)
    
    lemmas = " ".join(lemmas)
    
    return lemmas

In [172]:
##################################################################################################
# 2.1 PERFORM NLP ON CORPUS DATA
##################################################################################################

def perform_nlp(data, verbose=True):
    
    if verbose:
        print("{} instances in data".format(len(data)))
        print("first instance looks like {}".format(data[0]))
        
    c = 0
    docs = []
    for instance in data:
        instance_context = []
        for s in instance.snippet:
            context = nlp(s.left + " m_1 " + s.middle + " m_2 " + s.right)
            instance_context.append(context)
        docs.append(instance_context)
        c += 1
    
        if verbose:
            if c % 1000 == 0:
                print("{} instances processed.".format(c))
        
    if verbose:
        print(len(docs))
        print(docs[0])
        print("Structure of context data is: {}-{}-{}".format(type(docs),
                                                              type(docs[0]),
                                                              type(docs[0][0])
                                                             )
             )
    
    return docs

In [173]:
def SelectContext(data, verbose=True):
    """BOW feature extraction"""
    only_context_data = []
    for instance in data:
        instance_context = []
        for s in instance.snippet:
            context = s.left + " m_1 " + s.middle + " m_2 " + s.right
            instance_context.append(context)
        only_context_data.append(' '.join(instance_context))
    if verbose:
        print(len(data))
        print(len(only_context_data))
        print(data[0])
        print(only_context_data[0])
    return only_context_data

In [174]:
def ExractSimpleFeatures(data, verbose=True):
    featurized_data = []
    for instance in data:
        featurized_instance = {'mid_words':'', 'distance':np.inf, 'mid_pos': ''}
        for s in instance.snippet:
            if len(s.middle.split()) < featurized_instance['distance']:
                featurized_instance['mid_words'] = s.middle
                featurized_instance['distance'] = len(s.middle.split())
                featurized_instance['mid_pos'] = ' '.join([w.pos_ for w in nlp(s.middle)])
        featurized_data.append(featurized_instance)
    if verbose:
        print(len(data))
        print(len(featurized_data))
        print(data[0])
        print(featurized_data[0])
        print(featurized_data[1])
    return featurized_data

In [175]:
def LengthOfEntities(data, verbose=True):
    featurized_data = []
    for instance in data:
        featurized_instance = {
            'entity1_len': len(instance.entity_1.split("_")),
            'entity2_len': len(instance.entity_2.split("_")),
            'combined_len': len(instance.entity_1.split("_")) + len(instance.entity_2.split("_"))
        }
        featurized_data.append(featurized_instance)
    if verbose:
        print(len(data))
        print(len(featurized_data))
        print(data[0])
        print(featurized_data[0])
        print(featurized_data[1])
    return featurized_data 

In [260]:
def FindDepPath(data, verbose=True):
    only_context_data = []
    for instance in data:
        instance_context = []
        for s in instance.snippet:
            context = s.left + " m_1 " + s.middle + " m_2 " + s.right
#             print(context)
            document = nlp(context)
            edges = []
            for token in document:
                # FYI https://spacy.io/docs/api/token
                for child in token.children:
                    edges.append(('{0}-{1}'.format(token.lower_,token.i),
                                  '{0}-{1}'.format(child.lower_,child.i)))

            graph = nx.Graph(edges)
#             print(graph)
            for w in graph:
#                 print(w)
                if "m_1" in w:
                    s = w
                if "m_2" in w:
                    t = w
            
#             print("s: ", len(s), "\t", "t: ", len(t))
#             if len(s) > 7 or len(t) > 7:
#                 pass
#             else:
            try:
                instance_context.append(nx.shortest_path_length(graph, source=s, target=t))
            except nx.NetworkXNoPath:
                instance_context.append(0)
            except nx.NodeNotFound:
                pass
        try:                                
            only_context_data.append(sum(instance_context)/len(instance_context))
        except ZeroDivisionError:
            only_context_data.append(0)
            
    if verbose:
        print(len(data))
        print(len(only_context_data))
        print(data[0])
        print(only_context_data[0])

    return only_context_data

In [263]:
# test_feat = LengthOfEntities(train_data)
# test_feat = ExractSimpleFeatures(train_data[:10])
# 
# test_feat = FindDepPath(train_data)

In [264]:
# print(len(test_feat))
# print(test_feat)

In [184]:
# def SelectTaggedContext(data, verbose=True):
    
#     processed_data = perform_nlp(data)
    
#     tagged_data = []
#     for processed_instance in processed_data:
#         instance_tags = []
#         for doc in processed_instance:  
#             tags = tag_tokens(doc)
#             instance_tags.append(tags)
#         tagged_data.append(' '.join(instance_tags))
#     if verbose:
#         print(len(processed_data))
#         print(len(tagged_data))
#         print(processed_data[0])
#         print(tagged_data[0])
#     return tagged_data

In [185]:
class SimpleFeaturizer(BaseEstimator, TransformerMixin):
    def __init__(self, featurizer):
        self.featurizers = featurizer

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return ExractSimpleFeatures(X, verbose=False)

In [186]:
class EntityLengthFeaturizer(BaseEstimator, TransformerMixin):
    """Extract features from each isntance for DictVectorizer"""
    def __init__(self, featurizer):
        self.featurizers = featurizer
        
    def fit(self, x, y=None):
        return self

    def transform(self, X):
        return LengthOfEntities(X, verbose=False)

In [187]:
class BowFeaturizer(BaseEstimator, TransformerMixin):
    """BOW featurizer"""
    def __init__(self, featurizer):
        self.featurizers = featurizer
        
    def fit(self, x, y=None):
        return self

    def transform(self, X):
        return SelectContext(X, verbose=False)

In [192]:
class DependencyPath(BaseEstimator, TransformerMixin):
    """BOW featurizer"""
    def __init__(self, featurizer):
        self.featurizers = featurizer
        
    def fit(self, x, y=None):
        return self

    def transform(self, X):
        return FindDepPath(X, verbose=False)

In [193]:
# Transform labels to numeric values
le = LabelEncoder()
train_labels_featurized = le.fit_transform(train_labels)

# Fit model one vs rest logistic regression    
# clf = make_pipeline(DictVectorizer(), LogisticRegression())

# clf = make_pipeline(union, LogisticRegression())

# length_pipe = Pipeline([
#   ('ent_length', EntityLengthFeaturizer(LengthOfEntities)),
#   ('ent_length_vectorize', DictVectorizer())
#   ])

# bow_pipe = Pipeline([
#   ('context_data', BowFeaturizer(SelectContext)),
#   ('context_data_vectorize', CountVectorizer())
#   ])

# clf = Pipeline([
#     ('feat_union', FeatureUnion(transformer_list=[
#           ('length_pipeline', length_pipe),
#           ('bow_pipeline', bow_pipe)
#           ])),
#     ('classify', LogisticRegression())
#     ])


length_pipe = make_pipeline(EntityLengthFeaturizer(LengthOfEntities), DictVectorizer())

# bow_pipe = make_pipeline(BowFeaturizer(SelectContext), CountVectorizer(ngram_range=(1,3)))
bow_pipe = make_pipeline(BowFeaturizer(SelectContext), CountVectorizer())

simple_pipe = make_pipeline(SimpleFeaturizer(ExractSimpleFeatures), DictVectorizer())

# dep_pipe = make_pipeline(DependencyPath(FindDepPath))

clf = make_pipeline(FeatureUnion(transformer_list=[
    ('length_pipeline', length_pipe),
    ('bow_pipeline', bow_pipe),
    ('simple_pipeline', simple_pipe),
    ('dependency_pipeline', DependencyPath(FindDepPath))]),
    LogisticRegression())


In [190]:
##################################################################################################
# 3. TRAIN CLASSIFIER AND EVALUATE (CV)
##################################################################################################

def print_statistics_header():
    print('{:20s} {:>10s} {:>10s} {:>10s} {:>10s}'.format(
        'relation', 'precision', 'recall', 'f-score', 'support'))
    print('{:20s} {:>10s} {:>10s} {:>10s} {:>10s}'.format(
        '-' * 18, '-' * 9, '-' * 9, '-' * 9, '-' * 9))

def print_statistics_row(rel, result):
    print('{:20s} {:10.3f} {:10.3f} {:10.3f} {:10d}'.format(rel, *result))

def print_statistics_footer(avg_result):
    print('{:20s} {:>10s} {:>10s} {:>10s} {:>10s}'.format(
        '-' * 18, '-' * 9, '-' * 9, '-' * 9, '-' * 9))
    print('{:20s} {:10.3f} {:10.3f} {:10.3f} {:10d}'.format('macro-average', *avg_result))

def macro_average_results(results):
    avg_result = [np.average([r[i] for r in results.values()]) for i in range(3)]
    avg_result.append(np.sum([r[3] for r in results.values()]))
    return avg_result

def average_results(results):
    avg_result = [np.average([r[i] for r in results]) for i in range(3)]
    avg_result.append(np.sum([r[3] for r in results]))
    return avg_result
    
def evaluateCV(classifier, label_encoder, X, y, verbose=True):
    """
    classifier: clf - pipeline with CountVevtorizer and Logistic regression
    label_encoder: le - label encoder
    X: train data featurized
    y: train labels featurized
    """
    results = {}
    for rel in le.classes_:
#         print(rel)
        results[rel] = []
    if verbose:
        print_statistics_header()
        kfold = StratifiedKFold(n_splits = 5, shuffle=True, random_state=0) 
        for train_index, test_index in kfold.split(X, y):
            #print("TRAIN:", train_index, "TEST:", test_index)
            X_train, X_test = [X[i] for i in train_index], [X[i] for i in test_index]
            y_train, y_test = [y[i] for i in train_index], [y[i] for i in test_index]
            clf.fit(X_train, y_train)
            pred_labels = classifier.predict(X_test)
            stats = precision_recall_fscore_support(y_test, pred_labels, beta=0.5)
            #print(stats)
            for rel in label_encoder.classes_:
                rel_id = label_encoder.transform([rel])[0]
#             print(rel_id,rel)
                stats_rel = [stat[rel_id] for stat in stats]
                results[rel].append(stats_rel)
        for rel in label_encoder.classes_:
            results[rel] = average_results(results[rel])
            if verbose:
                print_statistics_row(rel, results[rel])
    avg_result = macro_average_results(results)
    if verbose:
        print_statistics_footer(avg_result)
    return avg_result[2]  # return f_0.5 score as summary statistic

In [191]:
evaluateCV(clf, le, train_data, train_labels_featurized)

relation              precision     recall    f-score    support
------------------    ---------  ---------  ---------  ---------
thirty-0
and-1
life-3
his-2
and-4
career-5
riding-7
were-6
high-8
.-9
in-10
1941-11
after-14
shortly-13
death-16
the-15
of-17
father-19
his-18
began-22
,-12
,-20
mercer-21
affair-25
engaged-36
.-40
an-23
intense-24
with-26
m_1-32
year-29
nineteen-27
--28
old-31
--30
while-33
she-34
was-35
to-37
m_2-39
composer-38
married-42
garland-41
rose-43
stop-46
,-49
but-50
lingered-55
to-44
temporarily-45
affair-48
the-47
effect-52
the-51
on-53
mercer-54
,-56
adding-57
.-65
to-58
depth-61
the-59
emotional-60
of-62
lyrics-64
his-63
affair-67
their-66
activity-1
latest-0
summary-8
wikia-7
sweet-4
--5
treat-6
4-2
other-3
of-9
and-15
factory-18
charlie-14
adventure-12
sweet-11
a-10
edit-13
the-16
chocolate-17
is-19
adaptation-23
.-30
a-20
2005-21
film-22
of-24
by-28
m_1-27
the-25
1964-26
m_2-29
directed-31
by-32
burton-34
tim-33
film-37
the-36
stars-38
,-35
highmore-40
as-

places-1
various-0
claim-2
title-4
.-25
the-3
,-5
as-7
such-6
parts-8
of-9
in-12
,-16
including-17
kham-11
southern-10
province-15
northwestern-13
yunnan-14
destinations-20
the-18
tourist-19
of-21
lijiang-22
and-23
zhongdian-24
places-26
like-27
m_1-28
and-29
m_2-30
claim-32
also-31
was-38
.-42
la-37
the-33
real-34
shangri-35
--36
in-39
territory-41
its-40
in-43
2001-44
region-48
tibet-46
autonomous-47
put-49
,-45
forward-50
proposal-52
la-61
a-51
optimise-57
regions-56
the-54
three-55
that-53
all-58
shangri-59
--60
research-1
more-0
.-2
taxonomists-4
some-3
group-5
nosed-8
snub-6
--7
monkeys-9
with-11
.-15
together-10
genus-14
the-12
pygathrix-13
nosed-18
snub-16
--17
monkeys-19
live-20
in-21
,-23
with-24
.-49
asia-22
range-26
a-25
covering-27
china-29
southern-28
(-30
m_1-32
especially-31
,-33
m_2-34
)-40
as-43
part-46
,-35
yunnan-36
,-37
and-38
guizhou-39
as-41
well-42
the-44
northern-45
of-47
vietnam-48
09/03/16-50
08:11:15-51
from-52
archive-61
radarchive-53
(-54
from-56
)-58
orig

themes-1
but-2
plot-6
a-3
simple-4
enough-5
and-7
humor-8
appeal-10
and-0
to-9
to-11
.-13
children-12
made-15
it-14
use-17
notable-16
of-18
music-20
popular-19
soundtrack-23
the-22
includes-24
;-21
music-25
.-51
by-26
covering-47
m_1-27
,-28
eels-29
,-30
m_2-31
,-32
proclaimers-34
the-33
,-35
wade-37
jason-36
,-38
men-41
the-39
baha-40
,-42
and-43
cale-45
john-44
(-46
cohen-49
)-50
leonard-48
during-52
june-53
2008-54
institute-59
,-55
the-56
american-57
film-58
gazes-0
at-1
.-10
enormity-3
the-2
,-4
detail-6
the-5
,-7
and-8
craftsmanship-9
inside-11
cathedral-13
the-12
of-14
málaga-15
tower-18
the-16
north-17
rises-19
meters-21
and-22
is-23
.-37
84-20
cathedral-28
highest-27
second-25
--26
the-24
in-29
behind-32
m_1-30
,-31
giralda-34
the-33
of-35
m_2-36
tower-40
the-38
south-39
is-41
unfinished-42
.-43
are-45
there-44
disputes-46
,-53
but-54
nicknamed-64
on-47
disappeared-51
funds-50
the-49
where-48
to-52
is-57
because-55
it-56
unfinished-58
cathedral-61
the-60
,-59
has-62
been-63
s-

alabama-0
,-1
and-2
treasoning-9
in-3
meantime-5
the-4
husband-7
her-6
is-8
her-10
with-11
.-18
star-14
a-12
porn-13
,-15
suxxx-17
candy-16
believed-21
this-19
is-20
be-23
and-36
habits-39
.-40
to-22
reference-25
m_2-35
a-24
to-26
m_1-27
,-28
wife-30
the-29
of-31
president-34
former-32
us-33
his-37
womanizing-38
•-42
v-41
auto-50
g-43
•-44
d-45
•-46
e-47
grand-48
theft-49
:-51
city-53
vice-52
characters-54
major-58
protagonist-55
tommy-56
vercetti-57
sang-1
and-0
``-2
clothes-6
''-7
,-8
mimicking-9
.-18
hand-5
second-3
--4
song-11
the-10
``-12
rose-16
''-17
hand-15
second-13
--14
skit-20
the-19
helped-21
restore-23
.-26
to-22
reputation-25
her-24
reagan-27
and-28
husband-30
in-44
.-46
her-29
with-31
as-34
predecessor-33
her-32
lady-36
first-35
,-37
m_1-38
,-43
,-39
widow-40
of-41
m_2-42
1985-45
nancy-47
and-48
jackie-49
compared-52
were-50
often-51
due-53
,-57
in-58
.-65
to-54
glamour-56
their-55
contrast-59
to-60
ladies-64
the-61
intervening-62
first-63
reagan-67
nancy-66
reflected-68

are-0
architects-2
.-6
the-1
of-3
process-5
that-4
in-7
couple-10
the-8
last-9
of-11
months-12
read-14
i-13
couple-16
book-31
and-39
enchantress-44
a-15
of-17
mention-20
books-18
that-19
palace-25
the-21
term-22
``-23
memory-24
''-26
m_1-28
's-29
history-30
--27
m_2-32
(-33
favorite-36
)-37
,-38
a-34
personal-35
rushdie-41
salman-40
's-42
the-43
of-45
.-47
florence-46
term-53
what-48
an-49
evocative-50
,-51
romantic-52
thought-56
,-54
i-55
.-57
called-59
also-58
method-61
,-65
a-66
``-60
of-62
''-64
loci-63
in-0
skies-2
,-6
and-7
in-8
by-15
.-22
the-1
(-3
1959-4
)-5
experience-11
the-9
mathematical-10
(-12
1981-13
)-14
davis-18
philip-16
j.-17
and-19
hersh-21
reuben-20
served-24
it-23
as-25
:-55
illustration-28
the-26
cover-27
for-29
,-36
account-39
,-45
for-46
1983-34
m_1-30
's-31
m_2-32
(-33
)-35
a-37
bestselling-38
of-40
history-42
the-41
of-43
science-44
matter-50
sorabji-48
richard-47
's-49
,-51
space-52
&-53
motion-54
theories-56
in-57
and-59
sequel-61
stephan-66
antiquity-58
the

likely-4
,-0
with-1
matisse-2
more-3
place-6
.-13
to-5
figures-8
in-9
his-7
interiors-12
realized-11
fully-10
matisse-14
and-15
picasso-16
brought-19
were-17
first-18
together-20
at-21
m_2-30
.-31
salon-24
the-22
paris-23
of-25
m_1-26
and-27
companion-29
her-28
during-32
decade-35
,-40
americans-41
the-33
first-34
of-36
century-39
the-37
20th-38
in-42
stein-45
paris-43
gertrude-44
,-46
stein-50
brothers-48
her-47
leo-49
,-51
stein-53
michael-52
and-54
wife-57
michael-55
's-56
sarah-58
things-1
better-0
than-2
.-10
movies-4
the-3
hacking-8
he-5
has-6
been-7
out-9
meets-12
he-11
all-13
.-47
of-14
:-18
hemingway-19
idols-17
his-15
expatriate-16
,-20
fitzgerald-23
f.-21
scott-22
and-24
zelda-25
,-26
m_1-27
and-28
m_2-29
,-30
plus-31
luminaries-33
other-32
like-34
dali-35
,-36
ray-38
man-37
,-39
baker-41
josephine-40
,-42
picasso-43
,-44
porter-46
cole-45
s-50
’-49
in-51
!-54
heaven-53
absolute-52
imagine-55
,-56
stein-58
gertrude-57
toklas-0
(-1
left-2
and-4
stein-6
)-3
gertrude-5
right-8


mist-0
--1
reminder-3
.-20
a-2
of-4
history-10
delhi-5
's-6
long-7
and-8
rich-9
as-11
capital-13
the-12
of-14
kings-17
many-15
powerful-16
and-18
empires-19
at-21
airport-23
the-22
ran-25
i-24
into-26
--30
m_1-27
and-28
m_2-29
he-31
in-32
kurta-35
his-33
simple-34
in-38
pantsuit-40
a-39
are-43
and-36
she-37
;-41
they-42
couple-46
!-47
quite-44
a-45
few-50
just-48
a-49
days-51
earlier-52
was-54
i-53
in-55
.-57
bangalore-56
many-1
so-0
of-2
artists-9
america-3
’-4
s-5
garde-8
avant-6
--7
in-10
including-14
,-20
century-13
the-11
mid-20th-12
besides-15
some-16
of-17
poets-19
these-18
alumni-22
notable-21
include-23
dawson-25
,-61
fielding-24
,-26
rumaker-28
michael-27
,-29
m_1-30
,-31
rockburne-33
dorothea-32
,-34
m_2-35
,-36
chamberlain-38
john-37
,-39
johnson-41
ray-40
,-42
noland-44
kenneth-43
,-45
sivhonen-47
oli-46
,-48
oppenheimer-50
joel-49
,-51
williams-53
jonathan-52
,-54
asawa-56
ruth-55
,-57
niro-60
robert-58
de-59
overlooked-2
much-0
--1
kind-3
of-4
literature-7
children-5
's-

redemption-1
shawshank-0
is-2
film-7
.-21
drama-6
american-5
a-3
1994-4
written-8
and-9
directed-10
by-11
and-14
starring-15
darabont-13
frank-12
robbins-17
tim-16
and-18
freeman-20
morgan-19
adapted-22
from-23
m_2-27
novella-26
the-24
m_1-25
film-30
the-29
tells-31
,-28
story-33
the-32
of-34
dufresne-36
andy-35
,-37
banker-39
a-38
spends-41
who-40
decades-44
for-49
two-43
nearly-42
in-45
prison-48
shawshank-46
state-47
murder-51
the-50
of-52
his-53
redemption-2
the-0
shawshank-1
is-3
film-8
.-21
drama-7
american-6
a-4
1994-5
written-9
and-10
directed-11
by-12
darabont-14
frank-13
starring-15
robbins-17
tim-16
and-18
freeman-20
morgan-19
adapted-22
from-23
m_2-27
novella-26
the-24
m_1-25
film-30
the-29
portrays-31
,-28
story-33
the-32
of-34
dufresne-36
andy-35
,-37
banker-39
a-38
spends-41
who-40
decades-44
for-49
two-43
nearly-42
in-45
prison-48
shawshank-46
state-47
murder-51
the-50
of-52
his-53
show-1
90s-0
:-2
film-3
,-4
moment-5
by-6
friday-9
film-15
moment-7
1978-8
,-10
may-11
6-

written-0
by-1
,-4
and-5
turned-10
einstein-3
albert-2
as-6
result-8
a-7
he-9
down-11
scholarship-13
study-21
.-26
a-12
to-14
institute-17
the-15
art-16
of-18
chicago-19
to-20
physics-22
at-23
m_1-25
the-24
worked-30
whilst-27
there-28
he-29
under-31
,-33
and-34
received-36
.-43
m_2-32
eventually-35
ph.d.-38
in-39
in-41
his-37
physics-40
1956-42
in-44
1960-45
joined-47
he-46
faculty-50
.-57
the-48
physics-49
of-51
institute-54
the-52
massachusetts-53
of-55
technology-56
march-0
2001-1
,-2
christened-3
by-4
wife-7
reagan-5
's-6
nancy-8
day-11
,-12
and-13
commissioned-14
.-27
the-9
same-10
on-15
,-19
with-20
july-17
12-16
2003-18
goodwin-24
captain-21
j.-22
w.-23
in-25
command-26
president-29
vice-28
m_1-30
and-31
m_2-32
were-33
both-34
present-35
at-36
,-39
as-42
reagan-44
ceremony-38
the-37
as-40
well-41
nancy-43
,-45
gave-47
who-46
crew-51
order-55
ship-49
the-48
's-50
the-52
traditional-53
first-54
as-56
unit-59
an-57
active-58
of-60
world-1
the-0
.-2
refers-5
it-3
also-4
to-6
.-18
r

years-1
22,000-0
in-2
,-5
future-4
the-3
weapon-8
an-6
atomic-7
with-9
yield-13
an-10
adjustable-11
radiation-12
called-14
:-17
wikipedia-16
a-15
burner-19
stone-18
used-21
is-20
in-22
.-26
attempt-25
an-23
assassination-24
1-28
[-27
]-29
in-30
book-33
m_1-31
's-32
m_2-34
(-35
1982-36
)-37
town-41
an-39
american-40
,-42
city-44
,-47
midland-43
,-45
ohio-46
depopulated-49
,-38
is-48
detonates-54
.-58
bomb-53
a-51
neutron-52
because-50
on-55
freeway-57
the-56
structures-60
all-59
are-61
intact-62
townspeople-65
the-64
buried-67
,-63
are-66
16th-0
2011-1
posted-2
in-3
:-4
comments-7
lifestyle-5
no-6
like-10
?-13
do-8
you-9
story-12
this-11
lambert-15
miranda-14
and-16
shelton-18
blake-17
married-20
got-19
on-21
news-27
.-54
saturday-22
countrychorus.wordpress.com-24
(-23
)-25
happy-26
for-28
opted-32
m_1-29
and-30
m_2-31
for-33
ceremony-36
a-34
wedding-35
held-39
that-37
was-38
at-40
ranch-44
strange-43
don-42
the-41
in-45
north-50
boerne-46
,-47
texas-48
,-49
of-51
antonio-53
san-52
even

states-49
the-47
united-48
trees-55
christmas-54
decorated-58
[-51
31-52
]-53
may-56
be-57
with-59
.-63
lights-60
and-61
ornaments-62
36-1
[-0
]-2
[-3
37-4
]-5
from-6
germany-7
custom-9
the-8
introduced-11
was-10
to-12
,-14
via-16
,-24
and-25
by-29
during-31
.-36
england-13
first-15
charlotte-18
queen-17
,-19
wife-20
of-21
iii-23
george-22
successfully-28
then-26
more-27
m_1-30
reign-33
the-32
of-34
m_2-35
around-37
time-40
the-38
same-39
immigrants-43
german-42
introduced-44
,-41
custom-46
into-47
.-51
the-45
states-50
the-48
united-49
trees-56
christmas-55
decorated-59
[-52
38-53
]-54
may-57
be-58
with-60
.-64
lights-61
and-62
ornaments-63
rooms-0
and-1
libraries-2
of-3
after-9
disrobe-13
in-14
.-21
homes-8
estate-7
country-6
the-4
great-5
dinner-10
and-11
partially-12
order-15
show-17
to-16
off-18
tattoos-20
their-19
aside-22
from-23
consort-25
her-24
m_1-26
are-29
,-27
there-28
rumours-31
persistent-30
had-34
that-32
m_2-33
tattoo-37
in-38
a-35
small-36
location-44
an-39
undisclose

real-38
lived-41
who-40
and-42
ruled-43
in-44
time-47
the-45
same-46
set-50
kuroshitsuji-48
is-49
in-51
17-54
[-53
]-55
18-57
[-56
]-58
vanel-60
azzurro-59
and-61
undertaker-62
are-63
two-66
the-64
only-65
works-1
the-0
of-2
.-7
industry-3
of-4
nations-6
all-5
considered-9
often-8
model-11
the-10
of-12
fairs-15
all-13
worlds-14
followed-18
that-16
have-17
exhibition-21
the-20
sponsored-24
,-19
was-22
primarily-23
by-25
,-30
in-31
.-39
husband-27
m_1-26
,-28
m_2-29
order-32
promote-34
to-33
technology-36
modern-35
and-37
design-38
palace-42
the-40
crystal-41
,-43
feet-46
glass-52
,-57
a-44
1848-45
by-47
,-51
wide-50
feet-49
454-48
and-53
cast-54
structure-56
iron-55
collection-1
the-0
at-2
time-4
that-3
housed-6
were-5
at-7
.-13
locations-9
various-8
around-10
city-12
the-11
on-14
wednesday-15
,-16
october-17
,-21
28-18
,-19
1861-20
during-22
visit-25
a-23
royal-24
to-26
by-28
edinburgh-27
m_1-29
,-30
consort-33
prince-31
--32
m_2-34
laid-35
stone-39
for-40
.-50
the-36
foundation-37
--3

”-46
by-47
sagmeister-49
stefan-48
figure-53
(-50
an-51
important-52
in-54
world-56
the-55
of-57
graphic-58
dispute-2
moca-0
mural-1
,-3
censorship-5
.-36
moca-4
of-6
,-8
mural-10
mural-7
protest-9
in-11
monday-14
angeles-13
los-12
,-15
january-16
,-18
tendancies-21
3-17
2011-19
violent-20
finished-24
i-22
just-23
watching-25
m_1-27
'-26
,-28
'-29
second-31
the-30
of-32
trilogy-35
the-33
m_2-34
trilogy-39
the-37
millennium-38
is-40
sensation-42
heard-47
.-52
a-41
so-43
you-44
have-45
probably-46
something-48
about-49
plot-51
the-50
stories-54
the-53
character-56
central-55
is-57
salander-59
lisbeth-58
,-60
a-61
read-0
.-1
months-4
a-2
few-3
ago-5
read-7
i-6
``-8
girl-10
and-16
looking-19
.-36
the-9
with-11
``-15
tattoo-14
the-12
dragon-13
have-17
been-18
forward-20
to-21
,-25
``-26
m_1-27
reading-22
sequel-24
the-23
``-28
,-29
(-30
tgwpwf-31
)-32
by-34
both-33
m_2-35
read-40
(-37
btw-38
...-39
about-41
.-44
author-43
the-42
interesting-47
very-46
life-48
a-45
..-49
death-51
!-52
)-53
a

understanding-0
of-1
,-4
and-5
by-6
.-12
genre-3
the-2
culture-11
american-10
african-8
--9
extension-7
godin-14
dave-13
spent-15
childhood-18
in-19
forced-23
.-42
his-16
early-17
peckham-20
before-21
bombing-22
family-25
move-27
the-24
to-26
to-28
m_1-29
,-30
m_2-31
,-32
won-35
where-33
he-34
scholarship-37
to-38
a-36
school-41
dartford-39
grammar-40
began-44
godin-43
collecting-45
at-52
records-50
r-47
&-48
b-49
american-46
when-51
school-53
,-54
encouraged-57
where-55
he-56
younger-59
the-58
wikipedia-4
coat-3
:-5
photograph-12
publicity-11
kate-6
bush-7
kate-8
bush-9
--10
issued-13
october-14
bush-17
2005-15
kate-16
(-18
born-19
bush-21
on-22
in-26
catherine-20
july-24
30-23
1958-25
m_1-27
,-28
m_2-29
,-30
part-32
)-36
now-31
of-33
london-35
greater-34
is-37
inside-0
...-1
~-2
songwriter-42
a-38
british-39
singer-40
--41
known-43
,-54
and-55
eclectic-56
for-44
lyrics-53
voice-48
,-49
idiosyncratic-50
and-51
literary-52
her-45
expressive-46
3-octave-47
and-57
meticulous-58
to-0
room

entries-1
the-0
were-2
usually-3
longer-4
.-14
and-5
contain-7
frequently-6
references-8
to-9
quran-11
the-10
,-12
etc-13
posted-15
by-16
rationalpointofview-17
at-18
am-20
7:26-19
comments-22
no-21
:-23
labels-24
.-32
:-25
islam-26
,-27
religion-28
m_1-29
by-30
m_2-31
isbn-35
1996-33
,-34
978-36
.-45
0-38
31682-42
--37
393-40
--39
--41
7-44
--43
book-47
this-46
is-48
about-49
.-61
theory-51
the-50
of-52
or-54
idea-57
evolution-53
darwin-55
's-56
of-58
selection-60
natural-59
discusses-63
it-62
generations-65
how-64
of-66
follows-0
crew-3
.-17
a-1
spaceship-2
in-4
tasked-10
year-6
the-5
2057-7
who-8
are-9
with-11
reigniting-12
sun-16
earth-13
's-14
dying-15
religious-18
and-19
supernatural-20
apocalypse-21
(-22
fiction-24
)-25
eschatological-23
novel-28
the-26
1908-27
m_1-29
by-30
.-32
m_2-31
story-36
the-33
1953-34
short-35
names-40
,-47
taken-48
billion-39
nine-38
the-37
of-41
by-43
god-42
clarke-46
arthur-44
c.-45
from-49
collection-53
story-52
short-51
the-50
of-54
same-56
the-55
w

have-3
they-0
do-1
n't-2
right-5
.-17
free-4
do-7
to-6
want-10
what-8
they-9
with-11
capital-13
the-12
entrusted-14
to-15
them-16
see-20
we-18
may-19
it-21
as-22
made-33
.-48
point-25
a-23
turning-24
m_1-27
,-28
ceo-29
,-32
of-30
m_2-31
when-26
points-36
think-39
week-43
these-34
very-35
,-37
i-38
,-40
late-41
last-42
or-44
week-47
early-45
this-46
's-50
that-49
actually-51
sign-55
.-56
hopeful-54
pretty-53
a-52
coming-57
back-58
got-62
,-59
we-60
clearly-61
ephron-0
director-4
``-6
before-7
''-21
(-1
writer-2
/-3
)-5
audience-9
an-8
of-10
leaders-15
400-13
more-11
than-12
women-14
and-16
men-20
a-17
few-18
good-19
buffett-24
(-22
warren-23
brka-26
and-28
ceo-33
.-36
(-25
)-27
gs-31
(-30
)-32
m_1-29
m_2-34
)-35
listen-39
you-37
can-38
live-40
via-41
.-53
question-52
webcast-42
here-43
and-44
in-49
even-45
'-46
chat-47
--48
'-50
a-51
listen-57
you-54
can-55
also-56
to-58
sessions-61
the-59
speaker-60
on-62
demand-63
for-64
a-65
reporter-1
post-0
mcgee-3
suzanne-2
called-4
make-9
,-18
an

section-2
special-0
thanks-1
of-3
.-9
trespasser-8
game-7
video-6
the-4
1998-5
46-11
[-10
]-12
upcoming-13
and-14
announced-15
projects-16
directing-20
.-35
spielberg-17
is-18
currently-19
lincoln-21
,-22
starring-23
lewis-27
as-28
as-33
daniel-24
day-25
--26
m_1-29
and-30
field-32
sally-31
m_2-34
47-37
[-36
]-38
based-39
:-50
on-40
team-47
goodwin-43
doris-41
kearns-42
s-45
‘-44
bestseller-46
of-48
rivals-49
genius-53
the-51
political-52
of-54
lincoln-56
abraham-55
film-59
the-58
follows-60
,-57
lincoln-61
s-63
’-62
leadership-64
during-65
's-1
that-0
cross-3
,-9
``-10
repulsion-11
.-19
a-2
between-4
city-7
``-5
dark-6
''-8
''-12
and-13
ladder-17
jacob-15
's-16
``-14
''-18
sounds-20
made-24
,-27
right-28
like-21
it-22
's-23
of-25
win-26
?-29
add-30
in-31
actresses-34
the-32
gorgeous-33
m_1-35
and-36
m_2-37
oozing-42
and-38
it-39
's-40
even-41
with-43
.-46
appeal-45
sex-44
miss-50
how-47
could-48
this-49
?-51
make-54
and-52
then-53
flick-58
!-59
it-55
a-56
chick-57
right-61
all-60
!-62

rights-1
nfl-0
.-2
in-3
2006-4
corporation-6
cbs-5
's-7
radio-9
cbs-8
unit-10
sued-11
stern-13
howard-12
and-14
sumner-15
become-17
has-16
butt-19
m_2-29
.-30
the-18
of-20
on-22
along-25
criticism-21
show-24
his-23
with-26
ceo-28
m_1-27
autobiography-32
his-31
,-33
passion-35
(-38
knobler-46
)-47
,-48
a-34
to-36
win-37
was-40
which-39
co-41
--42
written-43
by-44
peter-45
released-50
was-49
in-51
and-53
published-54
2001-52
by-55
book-61
viacom-56
's-57
simon-58
&-59
schuster-60
pays-0
jr.-4
?-24
ceo-1
edgar-2
bronfman-3
,-5
bought-7
,-18
package-23
who-6
company-9
run-13
the-8
so-10
he-11
could-12
it-14
,-15
poorly-17
albeit-16
million-22
$-20
6-21
his-19
for-26
just-25
record-28
the-27
ceo-31
,-29
m_1-30
m_2-32
,-33
includes-36
,-39
watch-35
whose-34
radio-38
cbs-37
collected-40
million-43
year-45
.-46
$-41
33.7-42
last-44
in-47
period-51
the-48
same-49
time-50
radio-54
cbs-53
purged-55
,-52
jobs-57
.-58
700-56
like-61
still-59
,-60
a-62
watch-1
to-0
and-2
find-3
.-8
out-4
more-5
abou

because-50
m_2-37
about-38
woman-40
a-39
confined-41
to-42
by-46
bedroom-45
her-43
upstairs-44
husband-49
her-47
physician-48
of-51
depression-55
her-52
``-53
nervous-54
artist-59
the-58
earned-60
degree-63
her-61
undergraduate-62
in-64
painting-65
victim-3
the-2
receives-4
even-0
if-1
spell-7
.-11
restoration-6
a-5
or-8
something-9
similar-10
be-14
it-12
might-13
immobile-15
...-20
but-21
has-23
,-16
slow-17
and-18
weak-19
it-22
effect-28
.-29
a-24
devastating-25
,-26
lingering-27
m_1-31
inspiration-30
by-32
,-34
is-36
,-43
or-44
here-45
sites-54
.-55
m_2-33
which-35
available-37
via-38
gutenberg-40
project-39
,-41
here-42
,-46
as-49
as-47
well-48
dozen-52
a-50
few-51
other-53
was-57
lovecraft-56
fan-59
a-58
april-0
2010-1
march-2
2010-3
february-4
2010-5
january-6
2010-7
pages-8
about-9
:-15
published-10
by-11
under-14
filled-13
jacob-12
comments-18
no-17
in-19
head-21
–-22
our-20
m_1-26
the-23
yellow-24
wallpaper-25
is-27
story-30
by-31
.-33
a-28
short-29
m_2-32
set-36
the-34
full-3

at-0
age-1
,-5
source-9
!-13
89-2
from-3
wikinews-4
the-6
free-7
news-8
write-12
you-10
can-11
jump-15
unchecked-14
to-16
:-17
navigation-18
,-19
search-20
thursday-21
,-22
november-23
8-24
,-25
m_1-29
arthur-27
kornberg-28
m_2-31
,-32
winner-33
,-43
biochemist-30
of-34
prize-38
the-35
1959-36
nobel-37
in-39
physiology-40
or-41
medicine-42
died-44
on-45
.-53
october-46
26-47
,-48
2007-49
of-50
failure-52
respiratory-51
was-55
he-54
89-56
.-57
is-59
he-58
outrage-1
worldwide-0
rescued-5
after-2
had-3
been-4
from-6
.-25
husband-8
her-7
and-9
relatives-11
his-10
starved-13
who-12
and-14
tortured-15
girl-17
force-19
the-16
to-18
her-20
enter-22
to-21
into-23
prostitution-24
m_1-26
17-28
,-30
nominee-35
,-44
[-27
]-29
prize-34
nobel-32
peace-33
a-31
and-36
head-37
of-38
18-42
the-39
m_2-40
[-41
]-43
joined-47
has-45
also-46
protest-49
.-52
the-48
in-50
kabul-51
people-54
ome-53
used-56
have-55
execution-58
as-59
the-57
opportunity-61
an-60
criticize-63
to-62
nations-65
for-66
western-64
ina

giants-1
catalonian-0
are-3
,-2
ready-4
.-11
go-6
to-5
out-7
onto-8
streets-10
the-9
are-14
but-12
who-13
city-16
these-15
s-18
giants-19
exactly-20
?-21
giants-24
the-22
oldest-23
of-25
capital-28
the-26
catalan-27
are-29
m_1-30
m_2-34
.-35
and-31
wife-33
his-32
since-36
year-38
the-37
1424-39
parade-41
they-40
through-42
.-45
city-44
the-43
pair-49
the-46
current-47
giant-48
dates-50
from-51
got-59
year-53
and-55
in-56
the-52
1992-54
2006-57
they-58
new-60
in-0
1690-1
,-2
traveled-4
.-20
who-3
up-5
river-8
in-9
the-6
saskatchewan-7
hopes-10
of-11
fur-13
trading-12
with-14
peoples-19
province-16
the-15
's-17
indigenous-18
settlement-25
the-21
first-22
permanent-23
european-24
was-26
post-29
.-39
a-27
m_1-28
at-30
,-33
founded-34
house-32
cumberland-31
in-35
by-37
1774-36
m_2-38
kelsey-41
henry-40
sees-42
buffalo-44
on-45
.-49
the-43
plains-48
the-46
western-47
in-50
1803-51
purchase-54
the-52
louisiana-53
transferred-55
from-56
to-58
france-57
states-61
part-62
the-59
united-60
of-63


brings-1
who-0
news-3
”-4
.-5
bad-2
used-10
later-6
,-7
it-8
was-9
by-11
1-23
and-25
threatens-39
shakespeare-12
in-13
,-16
part-17
iv-15
henry-14
2-18
1598-20
(-19
)-21
[-22
]-24
in-26
antony-27
and-28
cleopatra-29
told-32
when-31
married-35
m_1-33
has-34
another-36
:-30
,-37
m_2-38
treat-41
s-45
.-68
to-40
messenger-43
the-42
’-44
eyes-46
as-47
,-49
eliciting-50
,-56
made-63
balls-48
madam-55
response-52
the-51
‘-53
gracious-54
i-57
bring-60
that-58
do-59
news-62
the-61
match-66
’-67
not-64
the-65
2-70
[-69
]-71
city-1
the-0
.-2
made-5
this-3
was-4
of-6
.-15
skin-9
fine-7
calf-8
,-10
predecessor-12
a-11
of-13
vellum-14
library-17
the-16
at-18
pergamom-19
believed-21
was-20
contain-23
.-37
to-22
volumes-25
200,000-24
,-26
gave-30
which-27
m_1-28
later-29
to-31
as-33
m_2-32
present-36
a-34
wedding-35
altar-40
the-38
great-39
of-41
,-43
on-44
,-48
germany-49
pergamon-42
display-45
in-46
berlin-47
believe-52
--50
i-51
was-54
this-53
temple-57
the-55
rebuilt-56
sat-60
where-58
satan-59
me

show-0
,-1
wizards-2
be-8
of-3
place-5
waverly-4
,-6
would-7
last-10
.-11
its-9
in-12
february-13
2011-14
reported-17
,-15
deadline.com-16
acquired-25
.-37
pictures-20
universal-19
and-21
entertainment-23
strike-22
that-18
had-24
novel-27
by-29
with-31
the-26
m_1-28
m_2-30
gomez-32
attached-33
to-34
lead-36
the-35
month-40
that-38
same-39
guide-42
tv-41
reported-43
make-47
.-54
that-44
gomez-45
would-46
cameo-49
a-48
in-50
film-53
the-51
muppets-52
production-55
for-56
final-58
the-57
embarked-0
on-1
without-8
.-11
year-7
tour-5
mini-3
--4
a-6
the-2
tour-10
rain-9
in-12
february-13
2011-14
reported-17
,-15
deadline.com-16
acquired-25
.-37
pictures-20
universal-19
and-21
entertainment-23
strike-22
that-18
had-24
novel-27
by-29
with-31
the-26
m_1-28
m_2-30
gomez-32
attached-33
to-34
lead-36
the-35
39-39
[-38
]-40
month-43
that-41
same-42
guide-45
tv-44
reported-46
make-50
.-57
that-47
gomez-48
would-49
cameo-52
a-51
in-53
film-56
the-54
muppets-55
show-0
,-1
wizards-2
be-8
of-3
place-5
w

part-1
a-0
of-2
.-6
empire-5
the-3
russian-4
after-8
shortly-7
revolution-11
the-9
october-10
in-12
1917-13
declared-16
,-14
finland-15
independence-18
.-23
its-17
from-19
union-22
the-20
soviet-21
m_1-26
the-25
ceded-28
when-24
was-27
by-29
to-31
finland-30
m_2-33
the-32
after-34
war-37
the-35
winter-36
and-38
war-41
the-39
continuation-40
(-42
1939–1944-43
)-44
became-47
,-45
kuokkala-46
russian-48
.-49
railway-51
a-50
to-52
,-54
built-55
,-58
vyborg-53
in-56
1870-57
turned-59
repino-60
into-61
cottage-64
a-62
summer-63
wars-2
“-0
physics-1
”-3
.-4
known-8
well-6
--7
contribution-9
another-5
was-10
reality-12
.-29
quantum-11
by-13
herbert-16
physicist-14
nick-15
(-17
1985-18
)-19
dealt-21
which-20
mainly-22
with-23
interpretations-25
possible-24
of-26
theory-28
quantum-27
book-32
the-30
1979-31
,-33
m_1-34
”-45
)-46
by-35
physicist-44
m_2-36
“-41
confessedly-40
self-38
--39
(-37
not-42
a-43
included-48
again-47
parallels-49
.-56
between-50
mysticism-52
eastern-51
and-53
physics-55
mo

one-0
of-1
required-14
.-42
biggest-3
the-2
and-4
highlights-8
expensive-7
most-6
the-5
of-9
film-11
the-10
as-12
it-13
show-16
to-15
trains-20
the-17
five-18
running-19
shot-23
that-21
was-22
in-24
was-28
m_1-25
as-26
it-27
impossible-30
set-32
technically-29
to-31
it-33
up-34
in-35
m_2-36
or-37
city-41
any-38
other-39
big-40
trains-44
some-43
kept-46
were-45
on-47
for-49
on-54
.-58
standby-48
couple-51
a-50
of-52
days-53
tracks-57
two-55
railway-56
was-60
it-59
a-61
expensive-63
very-62
beach-1
the-0
see-5
,-2
one-3
can-4
ships-6
.-33
in-7
,-13
beyond-17
transit-8
along-9
coast-12
the-10
malabar-11
is-15
that-14
,-16
calicut-18
and-22
moving-23
(-19
kozhikode-20
)-21
towards-24
mangalore-25
,-26
m_1-27
m_2-31
and-28
bombay-29
(-30
)-32
laid-36
well-35
out-37
garden-38
the-34
and-39
sculpture-43
erected-48
the-40
massive-41
landscaped-42
of-44
mother-45
and-46
child-47
by-49
kunhiraman-53
sculptor-51
noted-50
kanayi-52
makes-54
captivating-57
.-58
it-55
extremely-56
interests-1
wide-0

is-1
name-0
“-2
reference-4
!-12
a-3
to-5
term-8
the-6
nautical-7
,-9
ahoy-11
ships-10
words-15
the-14
ahoy-18
!-19
“-16
chips-17
feature-22
”-20
also-21
prominently-23
in-24
.-36
story-26
a-25
appearing-27
,-33
by-34
in-28
chapter-29
15-30
of-31
m_1-32
m_2-35
1-38
[-37
]-39
.-40
relays-42
dickens-41
tale-45
a-43
childhood-44
of-46
shipwright-48
a-47
named-49
taunted-53
chips-50
who-51
is-52
by-54
rat-58
a-55
diabolical-56
talking-57
predicts-60
who-59
sinking-62
the-61
seat-0
in-1
at-23
.-29
centuries-8
11th-3
,-4
12th-5
and-6
13th-7
the-2
in-9
duchy-12
the-10
former-11
of-13
swabia-14
,-15
incorporated-17
,-22
which-16
aargau-21
day-20
present-18
--19
time-25
the-24
of-26
m_1-28
the-27
from-30
germany-32
southwestern-31
alsace-35
(-33
mainly-34
,-36
m_2-37
)-42
,-38
aargau-39
and-40
thurgau-41
family-44
the-43
extended-45
influence-47
to-50
,-59
austria-63
its-46
and-48
holdings-49
reaches-53
the-51
southeastern-52
of-54
empire-58
the-55
holy-56
roman-57
today-61
's-62
roughly-60
com

NodeNotFound: Source Snippet(left='RAFAEL of Israel , as well as a single universal vertical launcher capable of launching supersonic multi-role cruise missiles like the BrahMos and Sagarika . Prime Minister', mention_1='Manmohan Singh', middle='’ s wife', mention_2='Gursharan Kaur', right='will crack the auspicious coconut ( no , no Champagne bottles here in the Indian Navy ; - ) ) marking the launch of India ’ s first nuclear-powered ballistic missile', direction='fwd') not in G

In [37]:
# A check for the average F1 score

f_scorer = make_scorer(fbeta_score, beta=0.5, average='macro')

def evaluateCV_check(classifier, X, y, verbose=True):
    kfold = StratifiedKFold(n_splits = 5, shuffle=True, random_state=0) 
    scores = cross_val_score(classifier, X, y, cv=kfold, scoring = f_scorer)
    print("\nCross-validation scores (StratifiedKFold): ", scores)
    print("Mean cv score (StratifiedKFold): ", scores.mean())

In [38]:
evaluateCV_check(clf, train_data, train_labels_featurized)


Cross-validation scores (StratifiedKFold):  [0.78319178 0.77791582 0.78375181 0.78223485 0.78230726]
Mean cv score (StratifiedKFold):  0.7818803012927733


In [39]:
##################################################################################################
# 4. TEST PREDICTIONS and ANALYSIS
##################################################################################################

# Fit final model on the full train data
clf.fit(train_data, train_labels_featurized)

# Predict on test set
test_data, test_labels = load_data('../data/test-covered.json.txt', verbose=False)
print(len(test_labels))
# test_data_featurized = SelectContext(test_data, verbose=False)
test_label_predicted = clf.predict(test_data)
print(len(test_label_predicted))
# Deprecation warning explained: https://stackoverflow.com/questions/49545947/sklearn-deprecationwarning-truth-value-of-an-array
test_label_predicted_decoded = le.inverse_transform(test_label_predicted)
print(len(test_label_predicted_decoded))
print(test_label_predicted_decoded[:2])
f = open("outputs/test_labels.txt", 'w', encoding="utf-8")
for label in test_label_predicted_decoded:
    f.write(label+'\n')

1840
1840
1840
['capital' 'NO_REL']


  if diff:


In [None]:
# Feature analisys - print N most informative
# !! Make changes in this function when you change the pipleine!!
def printNMostInformative(classifier,label_encoder,N):
    """Prints features with the highest coefficient values, per class"""
    feature_names = classifier.named_steps['countvectorizer'].get_feature_names()

    coef = classifier.named_steps['logisticregression'].coef_    
    print(coef.shape)
    for rel in label_encoder.classes_:
        rel_id = label_encoder.transform([rel])[0]
        coef_rel = coef[rel_id]
        coefs_with_fns = sorted(zip(coef_rel, feature_names))
        top_features = coefs_with_fns[-N:]
        print("\nClass {} best: ".format(rel))
        for feat in top_features:
            print(feat)        
        
print("Top features used to predict: ")
# show the top features
printNMostInformative(clf,le,2)