# Classification 2.1 Reuters data set.

# Loading the needed libraries.

In [1]:
import pandas as pd
import numpy as np
from scipy import sparse

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

from tqdm import tqdm_notebook

import os
import itertools

import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.multiclass import OneVsRestClassifier

from sklearn.decomposition import IncrementalPCA as iPCA

from sklearn.metrics import f1_score, precision_score, recall_score, brier_score_loss

from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.model_selection import train_test_split

In [36]:
import nltk
nltk.download("stopwords")
nltk.download("reuters")
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /home/dlab-
[nltk_data]     user/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package reuters to /home/dlab-
[nltk_data]     user/nltk_data...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package punkt to /home/dlab-user/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [4]:
from nltk import word_tokenize
from nltk.stem.porter import PorterStemmer
import re
from nltk.corpus import stopwords
 
cachedStopWords = stopwords.words("english")

# Loading the data and taking the first look.

The Reuters Corpus contains 10,788 news documents totaling 1.3 million words. The documents have been classified into 90 topics, and grouped into two sets, called "training" and "test".
This split is for training and testing algorithms that automatically detect the topic of a document.

In [28]:
from nltk.corpus import reuters 

 
def collection_stats():
    
    # List of documents
    documents = reuters.fileids()
    print(str(len(documents)) + " documents")
 
    train_docs = list(filter(lambda doc: doc.startswith("train"),
                        documents))
    print(str(len(train_docs)) + " total train documents")
 
    test_docs = list(filter(lambda doc: doc.startswith("test"),
                       documents));
    print(str(len(test_docs)) + " total test documents")
 
    # List of categories
    categories = reuters.categories()
    print(str(len(categories)) + " categories\n")
 
    # Documents in a category
    category_docs = reuters.fileids("acq")
 
    # Words for a document
    document_id = category_docs[0]
    document_words = reuters.words(category_docs[0])
    print(document_words, "\n")  
 
    # Raw document
    print(reuters.raw(document_id))
    
    document_id = category_docs[1]
    document_words = reuters.words(category_docs[1])
    print(document_words, "\n")  
 
    # Raw document
    print(reuters.raw(document_id))

See readers API descriptions
https://www.nltk.org/api/nltk.corpus.reader.html#module-nltk.corpus.reader.api

https://www.nltk.org/api/nltk.corpus.reader.html?highlight=categorizedplaintextcorpusreader#nltk.corpus.reader.CategorizedPlaintextCorpusReader

In [29]:
collection_stats()

10788 documents
7769 total train documents
3019 total test documents
90 categories

['SUMITOMO', 'BANK', 'AIMS', 'AT', 'QUICK', 'RECOVERY', ...] 

SUMITOMO BANK AIMS AT QUICK RECOVERY FROM MERGER
  Sumitomo Bank Ltd &lt;SUMI.T> is certain to
  lose its status as Japan's most profitable bank as a result of
  its merger with the Heiwa Sogo Bank, financial analysts said.
      Osaka-based Sumitomo, with desposits of around 23.9
  trillion yen, merged with Heiwa Sogo, a small, struggling bank
  with an estimated 1.29 billion dlrs in unrecoverable loans, in
  October.
      But despite the link-up, Sumitomo President Koh Komatsu
  told Reuters he is confident his bank can quickly regain its
  position.
      "We'll be back in position in first place within three
  years," Komatsu said in an interview.
      He said that while the merger will initially reduce
  Sumitomo's profitability and efficiency, it will vastly expand
  Sumitomo's branch network in the Tokyo metropolitan area where
  it

In [7]:
reuters.categories()[:20]

['acq',
 'alum',
 'barley',
 'bop',
 'carcass',
 'castor-oil',
 'cocoa',
 'coconut',
 'coconut-oil',
 'coffee',
 'copper',
 'copra-cake',
 'corn',
 'cotton',
 'cotton-oil',
 'cpi',
 'cpu',
 'crude',
 'dfl',
 'dlr']

#### print ids of documents in category 'barley'

In [8]:
reuters.fileids('barley')

['test/15618',
 'test/15649',
 'test/15676',
 'test/15728',
 'test/15871',
 'test/15875',
 'test/15952',
 'test/17767',
 'test/17769',
 'test/18024',
 'test/18263',
 'test/18908',
 'test/19275',
 'test/19668',
 'training/10175',
 'training/1067',
 'training/11208',
 'training/11316',
 'training/11885',
 'training/12428',
 'training/13099',
 'training/13744',
 'training/13795',
 'training/13852',
 'training/13856',
 'training/1652',
 'training/1970',
 'training/2044',
 'training/2171',
 'training/2172',
 'training/2191',
 'training/2217',
 'training/2232',
 'training/3132',
 'training/3324',
 'training/395',
 'training/4280',
 'training/4296',
 'training/5',
 'training/501',
 'training/5467',
 'training/5610',
 'training/5640',
 'training/6626',
 'training/7205',
 'training/7579',
 'training/8213',
 'training/8257',
 'training/8759',
 'training/9865',
 'training/9958']

#### print categories of 'training/9865', 'training/9880' documents

In [26]:
reuters.categories(['training/9865', 'training/9880'])

['barley', 'corn', 'grain', 'money-fx', 'wheat']

#### calculate number of documents in each category

In [27]:
for i in reuters.categories():
    print(i, len(reuters.fileids(i)))

acq 2369
alum 58
barley 51
bop 105
carcass 68
castor-oil 2
cocoa 73
coconut 6
coconut-oil 7
coffee 139
copper 65
copra-cake 3
corn 237
cotton 59
cotton-oil 3
cpi 97
cpu 4
crude 578
dfl 3
dlr 175
dmk 14
earn 3964
fuel 23
gas 54
gnp 136
gold 124
grain 582
groundnut 9
groundnut-oil 2
heat 19
hog 22
housing 20
income 16
instal-debt 6
interest 478
ipi 53
iron-steel 54
jet 5
jobs 67
l-cattle 8
lead 29
lei 15
lin-oil 2
livestock 99
lumber 16
meal-feed 49
money-fx 717
money-supply 174
naphtha 6
nat-gas 105
nickel 9
nkr 3
nzdlr 4
oat 14
oilseed 171
orange 27
palladium 3
palm-oil 40
palmkernel 3
pet-chem 32
platinum 12
potato 6
propane 6
rand 3
rape-oil 8
rapeseed 27
reserves 73
retail 25
rice 59
rubber 49
rye 2
ship 286
silver 29
sorghum 34
soy-meal 26
soy-oil 25
soybean 111
strategic-metal 27
sugar 162
sun-meal 2
sun-oil 7
sunseed 16
tea 13
tin 30
trade 485
veg-oil 124
wheat 283
wpi 29
yen 59
zinc 34


# Preprocessing

In [30]:
def tokenize(text):
    min_length = 3
    words = map(lambda word: word.lower(), word_tokenize(text))
    words = [word for word in words
                  if word not in cachedStopWords]
    tokens = (list(map(lambda token: PorterStemmer().stem(token),
                  words)));
    p = re.compile('[a-zA-Z]+');
    filtered_tokens = list(filter(lambda token:
                  p.match(token) and len(token)>=min_length,
         tokens))
    return filtered_tokens

stemmers https://pythonspot.com/nltk-stemming/

In [31]:
# Return the representer, without transforming
def tf_idf(docs):
    tfidf = TfidfVectorizer(tokenizer=tokenize, min_df=3,
                        max_df=0.90, max_features=3000,
                        use_idf=True, sublinear_tf=True,
                        norm='l2')
    tfidf.fit(docs)
    return tfidf

TfIdf Sklearn API

https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html

In [33]:
def feature_values(doc, representer):
    doc_representation = representer.transform([doc])
    features = representer.get_feature_names()
    return [(features[index], doc_representation[0, index])
                 for index in doc_representation.nonzero()[1]]

In [34]:
def main():
    train_docs = []
    test_docs = []
 
    for doc_id in reuters.fileids():
        if doc_id.startswith("train"):
            train_docs.append(reuters.raw(doc_id))
        else:
            test_docs.append(reuters.raw(doc_id))
 
    representer = tf_idf(train_docs)
 
    for doc in test_docs:
        print(feature_values(doc, representer))

In [37]:
%%time
main()

[('yesterday', 0.04033607332946197), ('year', 0.04868306823970166), ('yasuhiro', 0.06784216189089363), ('would', 0.06372602853755906), ('worri', 0.06369058131200922), ('world', 0.06538908930353625), ('work', 0.04242957850539387), ('whose', 0.06074498801433533), ('whole', 0.05831497105507654), ('week', 0.03350388061036944), ('washington', 0.05100373242934736), ('warn', 0.05588698545333975), ('want', 0.04437615671705097), ('virtual', 0.06526893899506282), ('view', 0.0881170846232083), ('u.s.-japan', 0.0756981315596951), ('u.s.', 0.10359584116941481), ('two', 0.048942514648738625), ('trade', 0.11348195468326348), ('tough', 0.06585418728418185), ('told', 0.032587432290743255), ('tokyo', 0.09114601823204509), ('time', 0.03724208529585703), ('threat', 0.06130874824193495), ('third', 0.04526371139204408), ('textil', 0.06275911603898573), ('tax', 0.03718698709533435), ('tariff', 0.1358039419071181), ('talk', 0.040716751283530396), ('taiwan', 0.13419112859999516), ('surplu', 0.09585406131190968

[('u.k.', 0.11785846548405876), ('treasuri', 0.12388133819551933), ('transact', 0.11880758327727726), ('today', 0.15139648782248474), ('target', 0.12176023313927663), ('take-up', 0.1922749323286938), ('system', 0.1019280701890119), ('stg', 0.3087720409975042), ('shortag', 0.14122687741409493), ('said', 0.03969166052087025), ('rise', 0.09270330595729243), ('repurchas', 0.1446979597824553), ('remov', 0.15930284163496267), ('partli', 0.14924426072577804), ('outflow', 0.18107736073529634), ('offset', 0.14058210237761096), ('offici', 0.09029722002538582), ('note', 0.07709893133366005), ('money', 0.18750308215638772), ('mln', 0.13998630659140432), ('matur', 0.15264276326368714), ('market', 0.1601245831208498), ('main', 0.12868533876541136), ('liquid', 0.13366526395991726), ('hand', 0.1504322550552602), ('forecast', 0.19376431862453725), ('factor', 0.1315950389926404), ('exchequ', 0.16588694579163493), ('england', 0.13317356059249932), ('drain', 0.2919842996843851), ('deficit', 0.119991333468

[('yield', 0.05119680381166472), ('yen', 0.09370121798350785), ('year', 0.019567768089141537), ('would', 0.05375420165139874), ('work', 0.04069640844366121), ('within', 0.044346929782266814), ('weight', 0.06233374301717808), ('week', 0.05440980758063314), ('weak', 0.05307312116926693), ('watch', 0.06287948401882695), ('want', 0.07206622349876324), ('wall', 0.05880439890078563), ('volatil', 0.11077939356976965), ('vehicl', 0.06439336870027798), ('valu', 0.0658733520861444), ('unit', 0.07081417898561222), ('undervalu', 0.06887910661228186), ('u.s.', 0.07636356451196646), ('type', 0.06345734544463097), ('two', 0.027725471058470275), ('turn', 0.08968648787676195), ('trillion', 0.07260600243727912), ('trend', 0.05145639983161858), ('tradit', 0.05650239388667615), ('tough', 0.1325570931616725), ('today', 0.031797873566693856), ('three', 0.0536986634797611), ('think', 0.07960761425126044), ('term', 0.035667967227826614), ('target', 0.04329945805334975), ('take', 0.07412872829602751), ('succes

[('yesterday', 0.07749088851606001), ('year', 0.09352656077276085), ('world', 0.07419379740009992), ('virtual', 0.1253901943780356), ('use', 0.07570766516385052), ('unlik', 0.1757145796537999), ('unless', 0.10490420009754256), ('total', 0.06294447454409846), ('tonn', 0.1681053527242778), ('tight', 0.12189510384295231), ('term', 0.07144108073474571), ('taken', 0.0952061946337312), ('take', 0.07074960100160248), ('sterl', 0.10826050175475721), ('step', 0.10013287375092121), ('soviet', 0.16779295383728665), ('soon', 0.09853512282032029), ('slightli', 0.09758203668476337), ('significantli', 0.11175559228984051), ('shipment', 0.09165912703154751), ('shearson', 0.28018547203078126), ('see', 0.07226754599053446), ('said', 0.0478674868315883), ('rise', 0.11179865540828482), ('return', 0.09079328968177346), ('report', 0.05866967700884442), ('reduct', 0.09395141394986867), ('reduc', 0.07557696320246639), ('recov', 0.11287993103139106), ('rate', 0.06373699817376904), ('quarterli', 0.0862726029602

[('york', 0.15429283392445084), ('would', 0.09103081554547013), ('u.s.', 0.05441018629686205), ('trust', 0.2626523937529321), ('term', 0.07486724916235085), ('suspens', 0.1358608609363328), ('suspend', 0.1129785500732676), ('stop', 0.10710693666119472), ('statu', 0.1308388515633775), ('seven', 0.08579452459372906), ('second', 0.08749156552775837), ('said', 0.06217588658685179), ('resum', 0.11829342214648139), ('result', 0.11783059108354146), ('rest', 0.11321414044718095), ('requir', 0.0863662882118836), ('regul', 0.11393751057471223), ('reduc', 0.13409976274828433), ('receiv', 0.08331787775779448), ('reach', 0.0869543642259366), ('rate', 0.06679369452504759), ('quarter', 0.07269904249001233), ('put', 0.091787535407557), ('potenti', 0.10675753959590979), ('place', 0.09711226824792545), ('payment', 0.20428447360554494), ('new', 0.10140216325950006), ('net', 0.08380333110526106), ('negoti', 0.15002957453411483), ('mln', 0.07854712180872303), ('matur', 0.11393751057471223), ('long-term', 0

[('zambia', 0.16331268669184945), ('would', 0.056340864260920595), ('week', 0.07068475848049165), ('way', 0.09549504264551026), ('subsidi', 0.25111272396493484), ('spend', 0.10930929678997356), ('said', 0.05256713039042851), ('rule', 0.09850140388007483), ('rise', 0.07251293969447302), ('retail', 0.23712508265941934), ('refin', 0.11520372449117244), ('reduc', 0.08299713108303255), ('rais', 0.08026814660575132), ('produc', 0.13193144755899544), ('price', 0.18224323071466317), ('pressur', 0.10033662500551346), ('presid', 0.08152265967847064), ('practic', 0.12045180782509), ('plan', 0.12051098727920806), ('per', 0.06748233410156274), ('peopl', 0.18507672766893402), ('pct', 0.047087982311111526), ('parti', 0.1143727822286317), ('offici', 0.07063088853914054), ('monetari', 0.10481383229904849), ('ministri', 0.09662994547079122), ('meal', 0.29724643679905693), ('maiz', 0.3031293814420564), ('level', 0.07912232615984932), ('led', 0.10760504283961164), ('least', 0.09981100697488261), ('later',

[('year', 0.06477602435079952), ('would', 0.050079327674190335), ('whether', 0.14794940349070715), ('use', 0.07390068789776692), ('unavail', 0.13018277081111834), ('u.s.', 0.10635972172861333), ('told', 0.061110493774246606), ('thoma', 0.13185348313782758), ('textil', 0.36242190020230275), ('takeov', 0.09192596174289755), ('stock', 0.05612008516956608), ('start', 0.07883623123638848), ('stake', 0.12994413416095735), ('spokesman', 0.12350417658804835), ('sharpli', 0.09387640337337189), ('say', 0.0625671980510847), ('sale', 0.05382849864343546), ('said', 0.08498191378962162), ('rumor', 0.3388058471878903), ('rose', 0.0689287681034611), ('reuter', 0.0769460756178783), ('respons', 0.09005627272327361), ('report', 0.14944083778014502), ('publish', 0.10417403248016324), ('publicli', 0.12939894446788078), ('profit', 0.058415481055700706), ('product', 0.05876145580398898), ('probabl', 0.09225607438451161), ('presid', 0.07246250195954887), ('prepar', 0.09716102448178812), ('plan', 0.06326549606

[('six', 0.29662907741234024), ('shr', 0.23085796954214627), ('rev', 0.26490547703619016), ('net', 0.3530636813331234), ('mth', 0.36522911784943907), ('mln', 0.26698357214449453), ('inc', 0.19758307378529855), ('feb', 0.37514855582175), ('broadcast', 0.528619775633117)]
[('year', 0.06938956076867839), ('unit', 0.1052322981979595), ('televis', 0.32591231163260764), ('tax', 0.12648272241247135), ('shr', 0.2209105473681098), ('seven', 0.24541076678542717), ('rev', 0.179859430738667), ('reserv', 0.13186151685311923), ('quarter', 0.1228196963798791), ('qtr', 0.09868034163623028), ('profit', 0.2528287940490438), ('pretax', 0.17701318642662772), ('period', 0.12698472357127016), ('note', 0.09722505418285919), ('nine', 0.2227705985449913), ('net', 0.1995399750570714), ('mth', 0.24797486998206306), ('mln', 0.20216712525754188), ('loss', 0.2407991843663933), ('invest', 0.12265186122429982), ('increas', 0.10530005255275278), ('includ', 0.10090925423525117), ('entiti', 0.2229778969962717), ('dlr', 

[('urg', 0.14518872678699357), ('tradit', 0.1620718577028029), ('trade', 0.14012324460645273), ('total', 0.09014231709886915), ('third', 0.12453123314806684), ('stop', 0.14636756330736356), ('said', 0.04048713256506892), ('remaind', 0.18202148888996128), ('reli', 0.18664991932563313), ('product', 0.08620947958585652), ('privat', 0.12188669585357462), ('plan', 0.09281739903399845), ('one-third', 0.19099235662417813), ('oil', 0.09414329467668019), ('offici', 0.09210689271875379), ('nation', 0.09557434090405008), ('make', 0.09724992301644537), ('major', 0.09912136548714331), ('institut', 0.12870780098574686), ('heavili', 0.1837826924758185), ('govern', 0.09475322555720905), ('foreign', 0.10093342154428188), ('forecast', 0.11673385965778613), ('export', 0.19220252180717237), ('energi', 0.12660180353316436), ('dlr', 0.05373840321542384), ('diversifi', 0.29446785925896), ('director', 0.12178590833265034), ('depart', 0.10892569461905717), ('commun', 0.11919262285117659), ('colombia', 0.363656

[('year', 0.08646535889683696), ('would', 0.04743042819814404), ('worldwid', 0.10331810895460558), ('work', 0.07535861781034044), ('without', 0.08501376372818627), ('well', 0.07311442939215956), ('unit', 0.09303958444320798), ('union', 0.24435016070361382), ('u.s.', 0.04800025411064087), ('two', 0.05133998937521352), ('total', 0.05819220269685569), ('texa', 0.2829593329583535), ('take', 0.06540804656844854), ('substanti', 0.08567170727423105), ('state', 0.06261821191312049), ('spent', 0.11542504354462649), ('spend', 0.15580642146694323), ('signific', 0.09135557401712127), ('seek', 0.07630570087411105), ('sale', 0.12165636545756103), ('said', 0.08356535096734041), ('robert', 0.10117617076407368), ('revenu', 0.07804467953368929), ('reserv', 0.16431078757502018), ('report', 0.09183655693037655), ('replac', 0.10073260107105249), ('releas', 0.08801818136143014), ('receiv', 0.07350240049746155), ('prove', 0.10888966353052554), ('properti', 0.08758773627623445), ('profit', 0.0553256484970822)

[('year', 0.05523027177403373), ('use', 0.10668566865569151), ('two', 0.07825549110387067), ('total', 0.15018223218166618), ('three', 0.08951682135772852), ('surplu', 0.12365211823717569), ('share', 0.0703032234018911), ('sell', 0.09780239202318679), ('second', 0.19919721848660454), ('sale', 0.07770874039022883), ('said', 0.10395828671630408), ('revenu', 0.24965171858975743), ('report', 0.17350505797418658), ('quarter', 0.20515555122539025), ('qtr', 0.07854412143481421), ('provid', 0.11188773821614739), ('product', 0.1436299009844971), ('princip', 0.15748365853907526), ('plant', 0.12388026242620222), ('plan', 0.0913323264251724), ('phase', 0.18680493113939883), ('per', 0.08659297951730781), ('pct', 0.10230512277605934), ('oper', 0.08128667371399964), ('nevada', 0.19917572511110773), ('mln', 0.10562157670474673), ('machin', 0.19163789587876634), ('loss', 0.1916628992631518), ('liquid', 0.13416259123407004), ('light', 0.15068902027078168), ('last', 0.07396676049021621), ('largest', 0.123

[('year', 0.09113365669763858), ('william', 0.3384825271413283), ('volum', 0.15017548664861932), ('use', 0.0674622360361224), ('unit', 0.12638943146861709), ('transport', 0.08780352292157083), ('total', 0.09496710592468877), ('took', 0.09297083495828551), ('telecommun', 0.17452030602861754), ('tax', 0.06366033136502512), ('tariff', 0.08909294399238026), ('system', 0.06469360125235249), ('substanti', 0.08257548187547191), ('special', 0.08132929671655781), ('somewhat', 0.12036600814777798), ('show', 0.06935235171630329), ('seven', 0.07295189720538264), ('servic', 0.06842292701935705), ('see', 0.06439678511830105), ('sale', 0.11725963355347156), ('said', 0.07421407699178124), ('revenu', 0.07522409935969164), ('result', 0.0591752856743862), ('respect', 0.08098893218035719), ('reserv', 0.0663675456778176), ('report', 0.08851753027444402), ('remov', 0.10110928712756038), ('region', 0.0915293472457254), ('receiv', 0.07084598093339783), ('ratio', 0.10632979563256839), ('rate', 0.05679531135621

[('year', 0.023346648967666545), ('would', 0.08531815657445109), ('word', 0.07994220871063959), ('west', 0.08013335477779483), ('well', 0.04710961065492307), ('want', 0.05078322920297494), ('volum', 0.05929213122449229), ('univers', 0.07375464059096012), ('u.s.', 0.08634316305804185), ('two', 0.033079748150965614), ('trend', 0.06139353750187454), ('transact', 0.12028969598885499), ('time', 0.04261913364051089), ('test', 0.06624961664324497), ('term', 0.042556080308433136), ('tell', 0.07061128882150688), ('talk', 0.04659547526876184), ('take', 0.042144179106034525), ('system', 0.09075828148080009), ('suffici', 0.07375464059096012), ('substanti', 0.05520060550817022), ('strategi', 0.06724040791065407), ('stock', 0.05798531341455734), ('steep', 0.08046306702004606), ('start', 0.04810949856326935), ('stabil', 0.057970463263731606), ('specul', 0.10192900227016893), ('sometim', 0.0810081598419233), ('signal', 0.06972653793937611), ('sharp', 0.09788975102200245), ('set', 0.03795274624475739),

[('yesterday', 0.19844323122316082), ('trader', 0.10787535189918458), ('touch', 0.17101880209876924), ('today', 0.07771792071197439), ('three', 0.07751609020950465), ('suppli', 0.09760645437400114), ('spot', 0.13809881213946903), ('six', 0.13881175553521896), ('short', 0.28187350331649547), ('sever', 0.10049172179590608), ('seven', 0.09990084218936594), ('saudi', 0.2383062777658362), ('said', 0.07239883271468123), ('rose', 0.14589487561667508), ('riyal', 0.3072517957392661), ('rise', 0.13642376098740575), ('rate', 0.13168604522113644), ('quot', 0.20133908766972372), ('posit', 0.10301241183368784), ('period', 0.14818916075150546), ('pct', 0.1541378769157075), ('one', 0.11111271707333055), ('month', 0.1527048525892627), ('money', 0.09625289121216152), ('lend', 0.12182197015288332), ('interest', 0.1353341227388717), ('high', 0.09953879502534342), ('found', 0.1338422155524509), ('fix', 0.12391052789368064), ('five', 0.08143735215317226), ('firmer', 0.1617616407078977), ('edg', 0.1573853268

[('yesterday', 0.09685083045042024), ('west', 0.058649323316415154), ('well', 0.05837863115502455), ('week', 0.08044607202892783), ('volum', 0.17533372868840758), ('unchang', 0.06951644433966729), ('tuesday', 0.14108129705224143), ('tie', 0.0899796414554968), ('thu', 0.08931951503546076), ('term', 0.05273585752868027), ('tender', 0.1428680012125019), ('suppli', 0.09997186810741984), ('stood', 0.08078468783546333), ('still', 0.059004939794644204), ('state', 0.04999786673101261), ('split', 0.06430446094249952), ('specul', 0.07460148181789701), ('soon', 0.0727359405146311), ('smaller', 0.0846661732501698), ('sinc', 0.053926267470596125), ('seven', 0.10232185856180129), ('set', 0.07963102594528673), ('secur', 0.052478399003307696), ('said', 0.054456644717573936), ('reserv', 0.09308668975148306), ('requir', 0.06083568344478781), ('repurchas', 0.19852445787919898), ('remark', 0.0831129080774268), ('remain', 0.05403948660497745), ('rather', 0.07569557677594237), ('rate', 0.17121369490618765),

[('year', 0.037512893721002104), ('would', 0.049104294824241966), ('world', 0.16945767050906668), ('winter', 0.10570025932992799), ('wheat', 0.14246238634639738), ('upward', 0.10804044208667873), ('unusu', 0.11899641781047772), ('union', 0.13909060936731465), ('u.s.', 0.04969423045562284), ('u.n.', 0.1391912067093576), ('trade', 0.05531131522697622), ('total', 0.06024586296714614), ('tonn', 0.188237064841293), ('thu', 0.11581317742023096), ('suppli', 0.12962519661643856), ('substanti', 0.08869514638399598), ('stock', 0.11548126217074466), ('soviet', 0.16059918486811217), ('see', 0.06916922739052202), ('secur', 0.0680443700582884), ('season', 0.08551190599374922), ('said', 0.06457130250757044), ('rise', 0.06319918616867345), ('revis', 0.08477348827311756), ('remain', 0.0700685023580319), ('reduc', 0.12247678178675735), ('record', 0.12123712840364509), ('prospect', 0.09352605547605136), ('product', 0.05761738394079568), ('plant', 0.08414058032408218), ('pct', 0.0694865528367125), ('parti

[('zone', 0.09299580949729583), ('yet', 0.045074311887742004), ('yesterday', 0.08482727986085047), ('yen', 0.10705095218379569), ('year', 0.03773149787407745), ('would', 0.05616099087341937), ('worth', 0.043837887527960556), ('worri', 0.09503593076056226), ('word', 0.06156351730102776), ('within', 0.04074685560508012), ('wilson', 0.10715507863830025), ('willing', 0.060116655838187566), ('whose', 0.05353383532238126), ('whether', 0.041064899437622746), ('west', 0.08697409316801254), ('welcom', 0.05473876380508515), ('week', 0.04999283115817377), ('weak', 0.04876465666084946), ('washington', 0.07610524325316112), ('warn', 0.04925253545555653), ('want', 0.03910817902162004), ('volcker', 0.09967565582814954), ('view', 0.04586520258866016), ('unsuccess', 0.10423609574358858), ('unit', 0.07114980585264047), ('undermin', 0.062384406565550936), ('u.s.', 0.06215043971940367), ('treasuri', 0.04047747561221727), ('trade', 0.07400878107224226), ('tokyo', 0.09956188460468758), ('today', 0.029216529

[('short-term', 0.3906197905395215), ('rate', 0.32480173350088715), ('pct', 0.15503646873975083), ('old', 0.25314144351221224), ('note', 0.117272915632801), ('mortgag', 0.24631454136759603), ('matur', 0.39311561558913755), ('loan', 0.1736973091724905), ('home', 0.21448658041653249), ('follow', 0.1635846180620537), ('feder', 0.15958094124163252), ('discount', 0.3523525155349706), ('day', 0.27156036678900414), ('corp', 0.10448067353670067), ('adjust', 0.2960146064252061)]
[('weather', 0.22347245677280825), ('temperatur', 0.26818972189311463), ('santa', 0.4540846714784829), ('reuter', 0.1647352243571411), ('report', 0.12260899211156481), ('rain', 0.25379362415164375), ('hour', 0.23529847191550368), ('grain', 0.31280387500854817), ('gmt', 0.24243741906252433), ('bueno', 0.31469932323510985), ('argentin', 0.44973314337256004), ('air', 0.2282027189851774)]
[('year', 0.05889144203090108), ('would', 0.0770887672280823), ('worri', 0.10858723207633152), ('west', 0.07051013343688856), ('upper', 0

[('u.k.', 0.07164623638009433), ('trade', 0.12869962724578507), ('tonnag', 0.10701620066567188), ('tonn', 0.10179775298736839), ('take', 0.10223619072780214), ('suspens', 0.11064618763748446), ('suspend', 0.15578756921562326), ('suppli', 0.0682670367537127), ('substanti', 0.07908901931290618), ('strong', 0.07461657646111865), ('stg', 0.17165150471702864), ('step', 0.08545994974895779), ('statement', 0.06384816140817379), ('squeez', 0.12063022250056664), ('sought', 0.09990114157159359), ('situat', 0.08328289840382326), ('short', 0.08261573162283349), ('section', 0.10701620066567188), ('secretari', 0.07670430127644229), ('say', 0.09262297850316542), ('said', 0.05757790814134362), ('rule', 0.07655168063811076), ('richard', 0.09279163319821199), ('result', 0.056676815021268985), ('restor', 0.10253999744773712), ('respons', 0.07873925344115364), ('relationship', 0.11382321158311925), ('recent', 0.06356540685743473), ('qualiti', 0.16620421137528119), ('price', 0.10089657642614334), ('premium

[('would', 0.10578630170247326), ('way', 0.10589904531866455), ('want', 0.10382252110402825), ('trade', 0.07037675361958647), ('told', 0.07624160420886088), ('tax', 0.18258499696412023), ('strongli', 0.14389434247203634), ('sprinkel', 0.47724225391310526), ('spend', 0.12121833609188928), ('signific', 0.12034075392768465), ('say', 0.0780589920920995), ('said', 0.0722541818139601), ('rule', 0.10923294387301424), ('revenu', 0.1028065958336437), ('restrain', 0.16241634904244387), ('remain', 0.08915343464007164), ('reduc', 0.19315524702257805), ('reagan', 0.11788584522456924), ('promot', 0.13968082632355616), ('polici', 0.0986332215378581), ('plan', 0.07893019042841268), ('oppos', 0.13449040359757516), ('new', 0.06959747908763184), ('lower', 0.08859738691271292), ('increas', 0.12263797531461978), ('includ', 0.06941169803913545), ('hous', 0.10044066326607695), ('hike', 0.14844689665687588), ('growth', 0.1612908441343898), ('gradual', 0.1478961755286143), ('govern', 0.08057638131184194), ('ef

[('year', 0.09619847320352405), ('valu', 0.1912678335694758), ('trend', 0.14940721562503498), ('tradit', 0.16405860834379596), ('trade', 0.1418409391524756), ('total', 0.09124732267365057), ('surplu', 0.21537364566057382), ('statist', 0.13403751279053921), ('show', 0.1128243477265023), ('set', 0.09236174314893597), ('rose', 0.10236568727100155), ('regist', 0.15900185730062344), ('plan', 0.09395519698140185), ('period', 0.1039754495990467), ('pct', 0.10524310875302453), ('one-third', 0.19333362791165656), ('nation', 0.09674593470043764), ('mln', 0.1235494273854613), ('major', 0.10033644032622369), ('last', 0.12883314165248885), ('januari', 0.16829129227728368), ('jan', 0.11579739253056348), ('institut', 0.13028555982515094), ('govern', 0.09591475374773024), ('forecast', 0.11816483647133005), ('fob', 0.18793731588681759), ('figur', 0.10899900594023025), ('fall', 0.10366661549280416), ('export', 0.2212291231944182), ('drop', 0.11964989443294072), ('dlr', 0.14194599345823558), ('depart', 0

[('year', 0.10044392368487604), ('treatment', 0.20186586669202214), ('ton', 0.41744908060940933), ('throughout', 0.1818550466770966), ('said', 0.0898040937380409), ('recent', 0.11273347297476971), ('program', 0.12861323533414354), ('process', 0.14575581713585228), ('prior', 0.1061767791719205), ('plan', 0.09810164674268144), ('per', 0.09301103146609291), ('ounc', 0.16834017477833402), ('oper', 0.08731143573914879), ('one', 0.08140172839024672), ('mln', 0.0915307295248806), ('miner', 0.270863978212784), ('ltd', 0.10292033691800571), ('locat', 0.2947637543387593), ('invest', 0.10485978481528233), ('intent', 0.14632657936766907), ('hour', 0.170423111393375), ('gold', 0.14322803372025011), ('facil', 0.2562330528063893), ('explor', 0.14872577079595325), ('expans', 0.15736065797759982), ('expand', 0.145195790882909), ('estim', 0.10802859981708612), ('enabl', 0.17129887603795715), ('dlr', 0.05679781919793128), ('deposit', 0.3501010207351628), ('day', 0.11368075917337063), ('complet', 0.112609

[('wheat', 0.095705275697033), ('use', 0.13955151551575767), ('usag', 0.1552578340957362), ('unless', 0.11420703834104971), ('u.s.', 0.09570421316645501), ('ton', 0.11506375045341583), ('thousand', 0.13837306167148622), ('stock', 0.21812268502419652), ('soybean', 0.2659589591768791), ('short', 0.10538451307522441), ('sept', 0.3704908940223428), ('season', 0.2865341954952387), ('project', 0.23278906747799816), ('oil', 0.07156790112912663), ('note', 0.059785380504765125), ('month', 0.2857182802477983), ('mln', 0.09278509391459971), ('may', 0.1128808116114402), ('last', 0.25150186291340765), ('juli', 0.17358982225187858), ('follow', 0.08339494744199161), ('export', 0.23657331042486313), ('domest', 0.14754182209538427), ('depart', 0.08280550802572594), ('crush', 0.2543032244857461), ('cotton', 0.12743375811576899), ('corn', 0.10326773478533248), ('comparison', 0.13837306167148622), ('bushel', 0.2185019106934634), ('bale', 0.16619713765145347), ('agricultur', 0.08364541751126107)]
[('usda',

[('yield', 0.14929436925303266), ('year', 0.036259354398105056), ('wheat', 0.09247765465270343), ('weight', 0.055038902990353564), ('use', 0.12145227557110241), ('usda', 0.08651412416996078), ('usag', 0.22412164498483908), ('unless', 0.04624553230408268), ('u.s.', 0.04803363683518573), ('total', 0.11593339386945503), ('tonn', 0.052581036774923526), ('ton', 0.09777946359911821), ('thousand', 0.05603101163024805), ('tabl', 0.05657315651217491), ('suppli', 0.13302751164649423), ('stock', 0.10792004124293614), ('start', 0.13202000280299195), ('soybean', 0.14429147261440523), ('sorghum', 0.16925951977714623), ('sinc', 0.03220470097004058), ('short', 0.11135268013652436), ('sept', 0.13193579800432534), ('seed', 0.09894497048224965), ('season', 0.14332527147625812), ('round', 0.04835223856228008), ('rice', 0.18752724396138062), ('revis', 0.03904517223064018), ('retain', 0.047525756938192894), ('residu', 0.12763397061443602), ('rang', 0.038402545416703175), ('project', 0.08289869435371397), ('

[('year', 0.059605250695685695), ('whose', 0.17747657660395735), ('three', 0.09660775526034733), ('subject', 0.13120381735764222), ('stock', 0.08743458692694163), ('sharehold', 0.10918796080035258), ('share', 0.07587218242794587), ('sell', 0.10554965434590728), ('satisfactori', 0.18680349775875119), ('said', 0.10259908772529318), ('result', 0.10099341403794344), ('purchas', 0.10811753862335587), ('price', 0.08567060528462263), ('perform', 0.14922290548362468), ('payabl', 0.22176365432282777), ('mln', 0.11398822341699708), ('may', 0.09313201260100713), ('maximum', 0.17348197055947914), ('march', 0.08686078347800645), ('investig', 0.1802744187390782), ('intern', 0.23296642532702722), ('inc', 0.11523446631847174), ('home', 0.2586221326522896), ('halt', 0.1725614330641916), ('futur', 0.120590774814151), ('entitl', 0.1898744078738899), ('enter', 0.15028008645085544), ('end', 0.08804647796703574), ('eight', 0.12003750737541274), ('earn', 0.11018522341616786), ('earlier', 0.1050235557525222),

[('york', 0.14616656202093892), ('yesterday', 0.13025412593604144), ('week', 0.18318406630705913), ('wednesday', 0.1856296235675449), ('two-week', 0.22858935979587908), ('statement', 0.1257486384072477), ('spokesman', 0.2126737380884893), ('second', 0.14033408766609112), ('said', 0.08046026800571933), ('reserv', 0.1251917547167122), ('period', 0.20412859624519067), ('new', 0.09606152928906898), ('n.y.', 0.22027985241281317), ('mln', 0.12598744360480407), ('major', 0.11634213240986922), ('made', 0.11835654116814728), ('half', 0.23742180824054004), ('first', 0.10778122958650538), ('feder', 0.1256085357081334), ('fed', 0.26930572241907785), ('end', 0.16476842055438304), ('eight', 0.13267351869847638), ('dlr', 0.13236912945238022), ('discount', 0.16380260158656684), ('daili', 0.16682107822330308), ('citi', 0.17569100898083034), ('borrow', 0.40795968093267077), ('bank', 0.2518691099184253), ('averag', 0.22826836775736387), ('april', 0.09806603315311362)]
[('wheat', 0.282016531725758), ('u.s

[('set', 0.22094383568713225), ('regular', 0.34121664248675915), ('record', 0.2093081306449953), ('qtli', 0.2719504478413821), ('prior', 0.24325614804882179), ('pay', 0.23104829034543353), ('june', 0.29065238602327403), ('juli', 0.32657417654874105), ('five', 0.391847612620996), ('enterpris', 0.3693568454636461), ('dividend', 0.242800631587121), ('div', 0.25373320416403267)]
[('treasuri', 0.3173380642532719), ('system', 0.26110192994365455), ('stabil', 0.3499957741073799), ('say', 0.23051952272495638), ('rate', 0.2292246083041849), ('need', 0.28276325560020676), ('greater', 0.37389339078427614), ('float', 0.42631456263115336), ('exchang', 0.233910150966014), ('baker', 0.38305708507608405)]
[('west', 0.11869595948198929), ('weigh', 0.1236592581816761), ('week', 0.1191847147276909), ('valu', 0.21173511429030492), ('u.s.', 0.12789411615553678), ('tone', 0.1236592581816761), ('three', 0.05604987343388843), ('texa', 0.08769626346529033), ('sweet', 0.12083855638883387), ('support', 0.0754741

[('yen', 0.05084937575771191), ('would', 0.04939096569008499), ('window', 0.07844406786492987), ('william', 0.06539797543467145), ('weekli', 0.06104788908027473), ('week', 0.09549970882499698), ('wednesday', 0.1849817469774729), ('weak', 0.06044316954109951), ('washington', 0.055713690312146534), ('ward', 0.07968808065545409), ('warburg', 0.08268850254767413), ('view', 0.09625428826620201), ('via', 0.10981923984451927), ('unit', 0.057222064469529425), ('two-week', 0.16227451467746548), ('two-day', 0.07680436855595991), ('two', 0.08815146467381078), ('tuesday', 0.10867118217142915), ('tri', 0.05329532414793836), ('trade', 0.03285846813305595), ('total', 0.03578990592306419), ('took', 0.059323777393404414), ('told', 0.03559673036144005), ('today', 0.08641610634758849), ('time', 0.04068121896032138), ('tighten', 0.07011780861991997), ('thursday', 0.06558284885009735), ('temporari', 0.06044316954109951), ('system', 0.08663145413808676), ('sure', 0.06984163194658886), ('support', 0.04863677

[('year', 0.05051554570959307), ('would', 0.0819599077217881), ('without', 0.07000054920795383), ('widen', 0.09244671214494854), ('way', 0.0661952110932396), ('washington', 0.21973430569750646), ('wang', 0.1179735257867618), ('unit', 0.09495491853714291), ('u.s.', 0.11034019270579862), ('trade', 0.13546768841306628), ('tough', 0.09630743247926343), ('told', 0.08069028771508148), ('today', 0.10174646617156627), ('time', 0.054464108706406306), ('thursday', 0.08780246758415465), ('third', 0.0661952110932396), ('televis', 0.0827641489498772), ('tariff', 0.21248086052484158), ('talk', 0.15538049388065658), ('take', 0.05385715184955145), ('taiwan', 0.2422678011259129), ('surplu', 0.11309656878847282), ('state', 0.10820443890286342), ('start', 0.10409534906290822), ('soybean', 0.07793095613367873), ('show', 0.0592460909714767), ('shortli', 0.09145906079652957), ('set', 0.048500809861989055), ('seri', 0.08466212272287255), ('sentiment', 0.09314330055888063), ('sent', 0.08857053083811402), ('sa

[('year', 0.07622584372039838), ('whose', 0.1340491462527467), ('ventur', 0.2421175094940138), ('u.k.', 0.09642818839803986), ('tonn', 0.1698190150996034), ('suppli', 0.09188014631417339), ('state-own', 0.1340491462527467), ('sign', 0.09627603250638173), ('show', 0.0893998710297743), ('said', 0.08474019549864485), ('rate', 0.0732129968874233), ('project', 0.10292838338808273), ('product', 0.06914813917323524), ('plant', 0.1709728599992361), ('peke', 0.16098558911996524), ('paper', 0.11984569303144882), ('mln', 0.04102520716546884), ('metal', 0.12072328157141178), ('manag', 0.08564679436779933), ('major', 0.0795046903040414), ('ltd', 0.07810516783648817), ('jointli', 0.14165175815043568), ('joint', 0.11018756793514967), ('involv', 0.10909183496913671), ('invest', 0.07957699457222453), ('industri', 0.06820933677142482), ('import', 0.1359185364028995), ('gave', 0.11239731425497823), ('first', 0.07365442854936861), ('figur', 0.08636874282714083), ('equip', 0.11148732290523986), ('econom', 

[('work', 0.10594260041609992), ('u.s.', 0.11425519417725284), ('total', 0.08180926689979352), ('subject', 0.11212895894098696), ('statement', 0.09723157739486105), ('stake', 0.17301865828636842), ('site', 0.17129003171837842), ('said', 0.08768288245434976), ('requir', 0.10711376461659619), ('regulatori', 0.135603450557085), ('product', 0.13247182479301423), ('plu', 0.13843272067344053), ('petroleum', 0.19001573341214414), ('permit', 0.2477707765339196), ('pct', 0.1454213910877463), ('paid', 0.11776103235686546), ('oil', 0.08544038104304578), ('obtain', 0.1416145095840725), ('new', 0.1257616562187426), ('mln', 0.11077024272201025), ('ltd', 0.0883747258417436), ('juli', 0.12239803408701237), ('intern', 0.0834335340865423), ('interest', 0.17866367488431245), ('hold', 0.09317451838122676), ('guinea', 0.3272546909027914), ('explor', 0.21622595327262392), ('exchang', 0.08453261890438322), ('due', 0.09340714917337047), ('dlr', 0.10235068481105464), ('discoveri', 0.2824054704262388), ('corp',

[('would', 0.039799237638717434), ('worth', 0.0741333497904955), ('violat', 0.08802277602599665), ('tri', 0.07271283126213392), ('trade', 0.04483005380094916), ('total', 0.04882952551423937), ('toronto', 0.10005428530838853), ('today', 0.04940747127007069), ('time', 0.05550292932994947), ('third', 0.0674577847788595), ('term', 0.0938355965411145), ('tax', 0.05542081492884857), ('talk', 0.10274240922265047), ('takeov', 0.1236941490992591), ('surpris', 0.08947716705672074), ('substanti', 0.12171660805573371), ('subsidiari', 0.05953121226214741), ('stock', 0.09359804905870465), ('statement', 0.15143780375734228), ('spirit', 0.10784925754529513), ('specul', 0.07839969066586255), ('sharehold', 0.055696265657978763), ('share', 0.08122058446980152), ('seem', 0.08312133391918496), ('said', 0.07012032966723558), ('requir', 0.06393302984463628), ('receiv', 0.061676430417069765), ('purchas', 0.05515024833611293), ('prospect', 0.07580326162153984), ('propos', 0.13317396588033623), ('prevent', 0.08

[('would', 0.1077274697222311), ('without', 0.07399658712722539), ('white', 0.14240755509142333), ('underway', 0.11101631111378706), ('u.s.', 0.08767952615080876), ('tri', 0.07542508245117181), ('treasuri', 0.07100389360484254), ('trade', 0.04650225339235163), ('top', 0.08410819610160963), ('told', 0.08529655799545466), ('think', 0.1283080473225594), ('texa', 0.07997873023468573), ('televis', 0.14813142736776932), ('telephon', 0.0884602010229141), ('tax', 0.12064514712290253), ('take', 0.09639363031773905), ('taiwan', 0.0869333555244141), ('surplu', 0.11955277781157132), ('support', 0.11654299671292208), ('summit', 0.18796695415847428), ('steadi', 0.09179317824307966), ('south', 0.07113747035217721), ('so-cal', 0.09963349775116263), ('servic', 0.06178889086583457), ('seri', 0.08949512841432189), ('senat', 0.18440556842979383), ('secretari', 0.07232082893921031), ('say', 0.05157838125696302), ('sale', 0.04437447915206581), ('said', 0.06351167149132976), ('richard', 0.0874888072747346), 

[('year', 0.07747791360221801), ('writedown', 0.1292375517123258), ('work', 0.09516954569098657), ('unless', 0.12247998725488508), ('unit', 0.06939658862900057), ('told', 0.07309361147557862), ('tire', 0.15216997088552092), ('three', 0.07416699403687492), ('takeov', 0.10995166488060493), ('stock', 0.06712463683429329), ('share', 0.1389969523295792), ('set', 0.07438782464565524), ('said', 0.0558872683038899), ('rubber', 0.13708623651685506), ('robert', 0.1277742411704548), ('result', 0.13127633750211168), ('restructur', 0.23241766056659396), ('reserv', 0.08695751777498839), ('report', 0.11597934854491299), ('quarter', 0.16997666263818512), ('program', 0.1679711218268139), ('predict', 0.1152072876577734), ('per', 0.12147402448153143), ('part', 0.09025137879672879), ('outstand', 0.15888120373212564), ('oper', 0.06734810455937963), ('one', 0.0627896227857174), ('oil', 0.07675214895230893), ('normal', 0.11855695196388778), ('net', 0.055143580991916026), ('mln', 0.09950627636597056), ('meet'

[('work', 0.10659044680794082), ('ventur', 0.22237433464483064), ('use', 0.09899936185241112), ('u.s.', 0.06789360900201992), ('ton', 0.13820732943466912), ('three', 0.08306746633488556), ('test', 0.14543278071069518), ('technolog', 0.12848746556594873), ('substanti', 0.12117772090966311), ('spend', 0.1301595383513903), ('sign', 0.10960090706815961), ('show', 0.10177306544066676), ('said', 0.08821906939710131), ('right', 0.11648799382530019), ('respond', 0.14441056864270735), ('resourc', 0.11682869132547731), ('reserv', 0.0973929275972504), ('properti', 0.12388783414362016), ('product', 0.07871843675069976), ('process', 0.12592165725584603), ('principl', 0.13386996793197764), ('ounc', 0.14543278071069518), ('ore', 0.2583775213335264), ('oper', 0.07543027030896163), ('much', 0.11003615070497169), ('mln', 0.04670321159834354), ('mine', 0.1997881291247524), ('may', 0.08007887462643114), ('ltd', 0.08891514345507037), ('joint', 0.12543783825456614), ('interest', 0.08565479993136481), ('indi

[('year', 0.11600551348885631), ('worldwid', 0.12676226131914275), ('without', 0.17660275928070282), ('valu', 0.08839046576000242), ('u.s.', 0.12359169188676233), ('substanti', 0.10511167359755552), ('strong', 0.16790544934781956), ('strength', 0.1304614114579146), ('specif', 0.11813259821541892), ('share', 0.0958129985927817), ('said', 0.0987503383267008), ('rose', 0.08009632717274431), ('revenu', 0.22849718075361067), ('result', 0.07532518335440401), ('report', 0.11267540362525952), ('record', 0.06846277932036332), ('rate', 0.0722956753452375), ('quarter', 0.219676470240492), ('profit', 0.1424532146585214), ('product', 0.14329691592047528), ('prior', 0.18986996365007963), ('post', 0.10799059157650163), ('plan', 0.07351551479431953), ('particularli', 0.12081325328127085), ('pacif', 0.11290265907046661), ('order', 0.2159023266988131), ('optim', 0.14556407945343264), ('new', 0.0648230350863609), ('mln', 0.10571141477761856), ('market', 0.061644333364489164), ('line', 0.16760201452798124

[('york', 0.12116351914959549), ('week', 0.08968438413633442), ('wagner', 0.1858273259989734), ('transact', 0.11791132056636229), ('told', 0.08723120254539314), ('texa', 0.2344791773208062), ('tender', 0.11301020227289973), ('take', 0.09857997189969289), ('stock', 0.08010772314122878), ('stake', 0.18548668790755413), ('sold', 0.1913429053528693), ('share', 0.16588140956620995), ('sell', 0.0967047799409109), ('secur', 0.09905748005635207), ('sale', 0.07683663439582192), ('said', 0.06669684973431884), ('remain', 0.10200416682472838), ('recent', 0.1037766323587528), ('pct', 0.05974494055829833), ('partnership', 0.2503002995057199), ('partner', 0.2766985786708961), ('open', 0.1125335246071105), ('offer', 0.09181840124174376), ('new', 0.07962938159473289), ('nearli', 0.14194476849705726), ('midland', 0.18363349346077504), ('market', 0.0757246268721325), ('leav', 0.14360638479943574), ('last', 0.07313664981172052), ('industri', 0.08273931915469653), ('includ', 0.07941682174058701), ('inc', 0

[('year', 0.097193470285734), ('would', 0.06062388850807984), ('versu', 0.12960853660972219), ('trust', 0.11349702102124044), ('trade', 0.11562001140743233), ('total', 0.07437920639459475), ('three', 0.07506411168024882), ('tax', 0.14293436458349734), ('statu', 0.14753219055196112), ('six', 0.07939113759051145), ('share', 0.05895259609176706), ('secur', 0.14223655340804203), ('said', 0.05656327594160626), ('result', 0.0784717633681052), ('remaind', 0.15019154517127128), ('reduc', 0.1512091516728837), ('receiv', 0.09394815737735013), ('quarter', 0.0819744965767013), ('put', 0.10349843339524506), ('provis', 0.11063049573880886), ('profit', 0.07071527864164573), ('place', 0.1095025330235806), ('period', 0.08475439277662783), ('per', 0.07261233125377844), ('pct', 0.05066760382799666), ('payment', 0.09652980130746533), ('partli', 0.12561404861206235), ('offset', 0.11832339117210022), ('net', 0.13318050348893937), ('month', 0.07046299788188824), ('mln', 0.13938054166300423), ('loss', 0.14134

[('year-ago', 0.260635678681677), ('tone', 0.3395224241652993), ('tax', 0.17307169880205592), ('split', 0.21103823503422148), ('shr', 0.2658394930802543), ('reflect', 0.20209849946339556), ('qtr', 0.1350285164613569), ('oper', 0.13974335391591375), ('one', 0.13028477247574158), ('note', 0.13303718462570205), ('net', 0.19372936820753997), ('mln', 0.0865231345541359), ('loss', 0.13807843254978397), ('includ', 0.23378710876780331), ('gain', 0.17058052917852504), ('figur', 0.18215372628743834), ('feb', 0.20584754690399898), ('dlr', 0.153916934547314), ('carryforward', 0.27488320768379987), ('benefit', 0.22814323461037062), ('bank', 0.1391121721482957), ('bancorp', 0.28357147287275963), ('avg', 0.18784392581246412), ('april', 0.1413369777841976), ('acquir', 0.16922460514306464)]
[('use', 0.1544065535746989), ('two', 0.1132594549192954), ('takeov', 0.19206818421495822), ('share', 0.21353402943985444), ('said', 0.09762622653798296), ('resourc', 0.182214463292219), ('recent', 0.151901042708341

[('year', 0.05033825919677674), ('would', 0.0928680673914795), ('wood', 0.09940917278889289), ('without', 0.06975487922478368), ('withdraw', 0.14731235674043544), ('valu', 0.059112215987993), ('use', 0.057429149434546564), ('usda', 0.1201061209980398), ('u.s.', 0.13725242711544272), ('trade', 0.07422182205184928), ('total', 0.04774744574210021), ('time', 0.05427296453938655), ('third', 0.06596289611029697), ('submit', 0.19060422591784099), ('standard', 0.08032658211339916), ('so-cal', 0.09392220468797861), ('slaughter', 0.10631216286423359), ('signific', 0.07495841558609037), ('shipment', 0.06952936260737148), ('ship', 0.11408287525646674), ('seen', 0.06930720368244876), ('said', 0.05596099324322472), ('rule', 0.14278859491440812), ('robert', 0.08301634067912851), ('review', 0.14921445830947028), ('retali', 0.14250323647426702), ('requir', 0.14918217725801866), ('request', 0.07943102945378214), ('repres', 0.06294192992092977), ('remain', 0.05553231126251439), ('regul', 0.08247368455624

[('year-end', 0.11006072114348323), ('year', 0.07508300996159034), ('u.s.', 0.047395255185108), ('total', 0.05745874366089229), ('three', 0.05798784042258143), ('though', 0.10152389680534113), ('system', 0.06627335576166249), ('substanti', 0.08459189443136557), ('subsidiari', 0.07005164660458157), ('strength', 0.10499288582987143), ('signific', 0.0902041212770087), ('show', 0.07104586835835447), ('sharp', 0.08859871469068523), ('share', 0.10867547209619144), ('see', 0.0659692916677336), ('sector', 0.0839422436568028), ('said', 0.0825120868027282), ('rose', 0.06446001495969403), ('result', 0.1446578521660798), ('restructur', 0.08658908917658949), ('repurchas', 0.09408222238355668), ('report', 0.053556502875510996), ('remain', 0.06682696400063695), ('quarter', 0.16524575504868994), ('program', 0.0775650663845287), ('profit', 0.0546283197325721), ('produc', 0.06477068438591878), ('pre-tax', 0.09593850943537245), ('period', 0.11085658968079523), ('pct', 0.039141273500816406), ('order', 0.2

[('york', 0.05172867776782557), ('yesterday', 0.04609723055201936), ('would', 0.07282794747189163), ('worldwid', 0.06648037468552938), ('will', 0.06861380873131685), ('whether', 0.09016308936708425), ('week', 0.09136930974009302), ('wednesday', 0.0656947447413637), ('venezuelan', 0.14115924535207877), ('venezuela', 0.07278743735789878), ('usual', 0.07306941463948259), ('u.s.', 0.05229440886769037), ('u.k.', 0.04993805492430981), ('two', 0.03303488386062981), ('turn', 0.06311406551987793), ('trust', 0.05713664410922922), ('transact', 0.05034020759355356), ('trade', 0.03437704654823369), ('told', 0.06305594956600821), ('today', 0.06414851994869603), ('texaco', 0.29482874691475136), ('texa', 0.059124716149747354), ('talk', 0.04653228032177336), ('take', 0.04208702121357572), ('system', 0.04318815408398691), ('suprem', 0.1450059315659825), ('supplier', 0.17648690194337852), ('suppli', 0.15213265193467096), ('stay', 0.06942112819224734), ('state', 0.06822010501862075), ('start', 0.048044250

[('yesterday', 0.07927436646572807), ('year', 0.023680909445970354), ('would', 0.09131808294346912), ('week', 0.038890145033199625), ('want', 0.05151030684511139), ('valu', 0.04708386833856485), ('upper', 0.13333336538828852), ('u.s.', 0.09660414124757064), ('two', 0.07041549387853904), ('troubl', 0.07511932871729149), ('total', 0.03803163882877375), ('toronto', 0.13194500131447512), ('took', 0.06303957547791632), ('today', 0.0651553188509739), ('three', 0.038381845179695596), ('thought', 0.07162225031785752), ('tax', 0.04316536756753111), ('talk', 0.09918586406950444), ('takeov', 0.09634112054608587), ('support', 0.051683184709636336), ('subsidiari', 0.11064486857827503), ('stress', 0.06930097774350742), ('stock', 0.09064504913624988), ('still', 0.048296738384327915), ('standard', 0.06398146564294645), ('specul', 0.10338834819596078), ('solut', 0.07421622529868337), ('sold', 0.049004999019040235), ('six', 0.04059434373404711), ('sharehold', 0.07344856667360791), ('share', 0.0719317207

[('would', 0.04404566333147461), ('week', 0.09356213864042885), ('unspecifi', 0.11314633598166046), ('told', 0.053747770982713355), ('target', 0.0744569789451882), ('take', 0.06074034976635587), ('storag', 0.1024442042886725), ('step', 0.0859666441737749), ('state', 0.05814960532977353), ('start', 0.06933787374205977), ('spokesman', 0.064155160813202), ('send', 0.1096397863358139), ('schedul', 0.08056509298839519), ('say', 0.05502896841748237), ('said', 0.04109546699099056), ('return', 0.07794837134960067), ('resum', 0.23125655080841237), ('report', 0.0503694247281426), ('repair', 0.11130221196897391), ('pump', 0.23358018979403147), ('product', 0.08750480950798868), ('prepar', 0.08545485676461194), ('port', 0.15007205170966204), ('point', 0.07313053468834065), ('pipelin', 0.2892886229770878), ('petroleum', 0.07413157982146075), ('pacif', 0.1446876497961567), ('output', 0.17943913076816867), ('origin', 0.08332631988679061), ('one', 0.046170960737180945), ('oilfield', 0.19907527251358076

[('year', 0.0358454941822313), ('yasuhiro', 0.11920111678991446), ('work', 0.126224620799971), ('within', 0.08123755383181283), ('welcom', 0.10913340932123561), ('week', 0.058867522411429334), ('washington', 0.08961539102792437), ('visit', 0.17664408991609004), ('urg', 0.0927225601856375), ('unveil', 0.12714986126054031), ('u.s.', 0.11331410950698365), ('trade', 0.11091753327250253), ('top', 0.09559436910105566), ('tax', 0.11062832840079423), ('talk', 0.07154079542541056), ('super', 0.12926882425859332), ('stimul', 0.17524871396230446), ('specifi', 0.117370006206943), ('specif', 0.16127512123103457), ('size', 0.10738539538545919), ('sever', 0.07531829361811018), ('seven', 0.07487543083396236), ('senior', 0.08675375479704801), ('said', 0.0721850954862053), ('reagan', 0.08853203012285754), ('purchas', 0.11008823656140371), ('propos', 0.06579517540380524), ('promis', 0.10738539538545919), ('problem', 0.1688861072271047), ('probabl', 0.0864390460349259), ('prime', 0.08718211770398497), ('p

[('two', 0.1592399363619231), ('shr', 0.2538676509184406), ('rev', 0.29130868344715705), ('qtr', 0.15982726224644875), ('petroleum', 0.24760155449283797), ('net', 0.2293088552610035), ('mine', 0.2587532603625116), ('loss', 0.5032956497647316), ('half', 0.23921502449887252), ('feb', 0.243652605568166), ('eight', 0.3832141314092332), ('doubl', 0.3196811275460158)]
[('two', 0.1731325294296745), ('shr', 0.2760158635330193), ('seven', 0.2552382717132264), ('sale', 0.2910907693205957), ('qtr', 0.1737710954722804), ('profit', 0.3158959983887701), ('net', 0.24931448127263747), ('mln', 0.23367728550793923), ('loss', 0.5234767383572171), ('intern', 0.20013628544687043), ('inc', 0.13952248320482663), ('half', 0.26008489588278044), ('dec', 0.3098667366897208)]
[('shr', 0.2295783535587078), ('scienc', 0.5722402355930911), ('sale', 0.2421170243672078), ('qtr', 0.24471982003359097), ('net', 0.20736926999874483), ('mln', 0.3290850132469991), ('inc', 0.19648789626219199), ('backlog', 0.5580501725461613

[('yeutter', 0.264225488743014), ('yasuhiro', 0.09925044052682862), ('would', 0.12902670870474786), ('worth', 0.07277202739156928), ('world', 0.056499335658621724), ('well', 0.06022428496881923), ('week', 0.08298939304737916), ('way', 0.11211859244552327), ('washington', 0.0746164740484885), ('want', 0.10992011207751344), ('visit', 0.18230086534735024), ('unit', 0.04526281994013825), ('understand', 0.09390768441135713), ('u.s.', 0.08297443532544449), ('trade', 0.10501325612555898), ('told', 0.04767414422459031), ('today', 0.08211796686257285), ('time', 0.05448372028143881), ('think', 0.1214227339229476), ('termin', 0.07783043887498359), ('televis', 0.08279395087550645), ('tariff', 0.2125573711744809), ('take', 0.11306597913518698), ('state', 0.05157856075531484), ('semiconductor', 0.14141977192470204), ('select', 0.09634211110493297), ('seem', 0.08159496374958392), ('see', 0.13132368597927138), ('schedul', 0.07146104466046795), ('say', 0.08264346760395133), ('said', 0.07315291649201411

[('unit', 0.07702709425528402), ('tender', 0.10510674963511785), ('subsidiari', 0.09944832745108126), ('share', 0.06465269842429304), ('sell', 0.08994165915413047), ('secur', 0.0921298214352937), ('sale', 0.12099738908826989), ('said', 0.06203235588896225), ('review', 0.12146854025765928), ('recent', 0.09651893630777784), ('preliminari', 0.14120189104024555), ('possibl', 0.17733475927110076), ('plc', 0.11085096754130468), ('oper', 0.07475336900987331), ('offer', 0.08539701298744633), ('materi', 0.13333657313701086), ('lumber', 0.1787865364881993), ('inc.', 0.13015925562737551), ('group', 0.08018797478305076), ('follow', 0.09926985677936735), ('file', 0.1155546964695328), ('explor', 0.2672256121456327), ('exchang', 0.08428631839308216), ('dlr', 0.04862854792748844), ('dixon', 0.3988526355862129), ('determin', 0.1266311787907634), ('cyl', 0.1698288924300923), ('cyclop', 0.42219927167627025), ('corp', 0.06340315868980853), ('control', 0.1764727462599391), ('concern', 0.11371374914365734),

[('zone', 0.0891334146466139), ('year', 0.07613610927163285), ('venezuela', 0.09108880355576025), ('use', 0.09542613335133185), ('two', 0.06999645804022939), ('transport', 0.0733540039742996), ('stretch', 0.10349634912523807), ('speak', 0.08340072948579187), ('sinc', 0.05438449836998459), ('sign', 0.062395657884676804), ('side', 0.08042472330578199), ('septemb', 0.06562648861752617), ('segment', 0.09462029979749174), ('said', 0.06200092567432379), ('run', 0.12602302641625104), ('resum', 0.08403292828481411), ('repair', 0.09651243303823503), ('quota', 0.07105223103045193), ('quak', 0.1874449542010296), ('pump', 0.20254217798329816), ('possibl', 0.06016643390243727), ('port', 0.07685720327106153), ('plan', 0.048249303840875876), ('pipelin', 0.2031960874595024), ('period', 0.053395056589462836), ('per', 0.09600225320933796), ('pay', 0.049600177987455946), ('pacif', 0.07409966069304713), ('order', 0.06752073478872044), ('opec', 0.1356073720008068), ('oilfield', 0.10195348508607861), ('oil'

[('year', 0.059093970340517), ('would', 0.09587813588843584), ('welcom', 0.10626043971017791), ('volum', 0.08863820767830383), ('vice', 0.09116499773845242), ('trade', 0.05146143318061198), ('told', 0.05574997451267792), ('third', 0.13111108280809336), ('term', 0.06361880752897109), ('sustain', 0.10662162342754501), ('surg', 0.10662162342754501), ('suppli', 0.07122994480832685), ('still', 0.07118162259924357), ('state', 0.06031578530205117), ('set', 0.05673709705032748), ('sent', 0.10361135861979621), ('schedul', 0.08356629118925024), ('say', 0.05707889891322755), ('saudi', 0.10271286476980311), ('said', 0.07752742999119068), ('review', 0.08346870436479809), ('remark', 0.1002646842193713), ('remain', 0.15556602020761534), ('quarter', 0.10459647553180663), ('product', 0.11250028916583811), ('produc', 0.06318550795296134), ('price', 0.14777986691369016), ('presid', 0.06610620186270172), ('power', 0.08631551427250131), ('possibl', 0.07197124539610181), ('plan', 0.05771594328747621), ('pet

[('would', 0.11268662844878759), ('will', 0.25334355321826496), ('transact', 0.1097788356555697), ('toward', 0.13069610560467532), ('tax', 0.09267782386423376), ('sharehold', 0.09313844816559737), ('share', 0.1544403702003676), ('said', 0.09570192965247064), ('reynold', 0.17641803274602116), ('retain', 0.1398556282122925), ('request', 0.13583931149988174), ('rais', 0.09481944388612154), ('purchas', 0.1561511202682976), ('pct', 0.05562426049761574), ('outstand', 0.17653388445588084), ('offer', 0.20399374846249546), ('new', 0.07413724783552832), ('negoti', 0.18572092121278125), ('mln', 0.04633210328374237), ('merger', 0.1104111697941008), ('meet', 0.08863360107121561), ('market', 0.11936993865839894), ('make', 0.08809392762218421), ('made', 0.0913438323382733), ('includ', 0.07393934848894167), ('inc', 0.15149171894736466), ('free', 0.1300956568800192), ('form', 0.11624288625114569), ('financ', 0.09901920486454502), ('entertain', 0.4344184730394351), ('eight', 0.102393222445604), ('earli'

[('year', 0.06787978254224131), ('would', 0.11820208591142768), ('valu', 0.13496283795017658), ('upon', 0.09915271962499238), ('two', 0.07759622318463501), ('trade', 0.0476915732994071), ('three', 0.05242466970880138), ('third', 0.07176364100924838), ('surviv', 0.11473331694728361), ('subject', 0.12054935597840363), ('structur', 0.09051841913681932), ('store', 0.08020598587018787), ('stock', 0.047446805159873026), ('sharehold', 0.05925137961968061), ('share', 0.06971094982269645), ('set', 0.05258076302062073), ('second', 0.17979043222124305), ('satisfactori', 0.10136982941027856), ('said', 0.07705435905781968), ('respons', 0.07613820279165819), ('respect', 0.07500704827062375), ('regard', 0.0966010349816279), ('redeem', 0.17598296041101752), ('receiv', 0.1376967949632741), ('public', 0.06966800894746286), ('provid', 0.06552598306642109), ('propos', 0.18282672164108044), ('previous', 0.06746524717461871), ('prefer', 0.242110190248191), ('potenti', 0.08407211577427755), ('physic', 0.1087

[('york', 0.06923667740833367), ('would', 0.04084876173332412), ('without', 0.07321685910315438), ('william', 0.09157783964017605), ('week', 0.05124850133599966), ('weaker', 0.17387396139486766), ('volatil', 0.1043429144818804), ('vice', 0.0815116438310077), ('two', 0.04421581404701617), ('trader', 0.11917702337470813), ('trade', 0.14169201195985046), ('total', 0.05011718242919373), ('today', 0.05071036887887016), ('time', 0.056966566954916587), ('three', 0.05057867596766659), ('spot', 0.15256687531129465), ('sinc', 0.058166294727167), ('silver', 0.19938631472460963), ('shearson', 0.09348687278556272), ('sever', 0.0655701057730181), ('sell', 0.05526017814277969), ('said', 0.06931814202096853), ('result', 0.05287477335259188), ('resolv', 0.0848052815906354), ('research', 0.08085070367169432), ('remov', 0.09034397687793408), ('rel', 0.0848052815906354), ('realist', 0.1098464932577882), ('reaction', 0.09858362459688828), ('rang', 0.06936048799395385), ('price', 0.13812085708615188), ('pre

[('year', 0.035433380222601084), ('would', 0.0843584805758251), ('worth', 0.08639534991105303), ('world', 0.03961635576430038), ('work', 0.04352438703233738), ('without', 0.04910084689840928), ('wide', 0.06074702947126425), ('wang', 0.2548261886733024), ('walter', 0.07366549062394068), ('wall', 0.0628906951458612), ('valu', 0.041609417137674735), ('util', 0.1261681155863418), ('user', 0.07312674404741142), ('unit', 0.031737505525347785), ('u.s.', 0.027723194748874587), ('two', 0.06222826970543728), ('turn', 0.056651170275910885), ('track', 0.07312674404741142), ('total', 0.03360969224271833), ('top', 0.0558104612660853), ('took', 0.11691358751165666), ('togeth', 0.058435663269040886), ('time', 0.08017337165598615), ('three', 0.03391918002810988), ('therefor', 0.06508683972229509), ('target', 0.04630832161057669), ('system', 0.1011566269678624), ('swiss', 0.10427710576632053), ('swap', 0.06959269017660334), ('surpris', 0.061587738481089845), ('support', 0.04567397003413462), ('supplier'

[('year', 0.045265908371503336), ('would', 0.05925297383595297), ('world', 0.1320626922142766), ('willing', 0.08939217562448211), ('western', 0.06702453335506749), ('west', 0.09176251320966433), ('washington', 0.06683817148026944), ('want', 0.05815302196556355), ('vital', 0.08843335073022052), ('upcom', 0.09483264128118706), ('unit', 0.04054445294310716), ('u.s.', 0.0743248673631601), ('translat', 0.09483264128118706), ('trade', 0.08272602534006622), ('tokyo', 0.07054489489010062), ('tanker', 0.08629904622159135), ('take', 0.08171170742525001), ('system', 0.0838495505024964), ('summit', 0.19749447599608871), ('state', 0.046201817124579915), ('stand', 0.07498663595236507), ('stabil', 0.0663832859083367), ('speech', 0.13478071748508408), ('situat', 0.06656339383788704), ('ship', 0.06058969883818725), ('share', 0.03403099018925834), ('seven', 0.055844613613079075), ('seek', 0.056300905593407426), ('secur', 0.04849401689047732), ('seamen', 0.09042482650573541), ('say', 0.0437223661443672),

[('year', 0.11532313251946195), ('two', 0.16340075978387927), ('shr', 0.15385610330379648), ('share', 0.1467960900517128), ('oper', 0.47384532544058555), ('note', 0.1615847929218027), ('net', 0.3316287742928606), ('march', 0.16805663136827437), ('loss', 0.1677078103636711), ('extraordinari', 0.2644473665478925), ('exclud', 0.4060934018019381), ('dlr', 0.2317135043998445), ('discontinu', 0.2729744025582093), ('credit', 0.22341086900387588), ('corp', 0.1439589687581388), ('charg', 0.25319422827531524)]
[('undisclos', 0.20912920537002413), ('termin', 0.21661111057736607), ('technolog', 0.5434031845745863), ('share', 0.10573432524412273), ('said', 0.10144896428264474), ('privately-held', 0.2681312348158325), ('principl', 0.21696896034478905), ('market', 0.11518062693318937), ('link', 0.48784165680217867), ('inc', 0.09484645531186937), ('get', 0.1873595258727424), ('exchang', 0.13784354280339203), ('develop', 0.1593554827351826), ('comput', 0.21806131738668608), ('amount', 0.170338392690033

[('york', 0.07055991591600239), ('year', 0.03180254093754025), ('would', 0.07048479550245584), ('william', 0.09332805828724251), ('whose', 0.09469310215165971), ('way', 0.07055991591600239), ('warburg', 0.2815898967878566), ('ventur', 0.13798853062493252), ('valu', 0.06323180508549961), ('upturn', 0.1103483983168426), ('transact', 0.06866598893050642), ('took', 0.08465969959455003), ('today', 0.05167953602192734), ('time', 0.0580553013925885), ('take', 0.09720074141714377), ('support', 0.06940850820196669), ('strong', 0.07094147876987954), ('store', 0.1335226929284871), ('servic', 0.062306253885292755), ('sell', 0.056316300394688296), ('sale', 0.0939045684178357), ('said', 0.06404345948223153), ('royalti', 0.17719700822222595), ('rival', 0.10465540754916806), ('retail', 0.17520831899236167), ('relat', 0.07413729160753822), ('refer', 0.09090503910851523), ('recent', 0.060434613526194676), ('reach', 0.06732841558966117), ('radio', 0.10413203981118356), ('question', 0.08053060214118533), 

[('year-on-year', 0.11023725560327266), ('year', 0.046818584360144036), ('would', 0.08152728431835135), ('widen', 0.06912691876994975), ('whole', 0.063769381383227), ('west', 0.09491007958011337), ('well', 0.04501642764209545), ('weather', 0.060868205094958334), ('weak', 0.060508920141174295), ('wait', 0.06345834044680347), ('vice', 0.0582727795215775), ('use', 0.043093817643504374), ('underli', 0.06704317738708455), ('unchang', 0.05360492229155407), ('u.s.', 0.05003871876670888), ('two', 0.05352028520329497), ('trade', 0.03289421185751132), ('toward', 0.05734690074854154), ('told', 0.035635452791116295), ('time', 0.04072547234045883), ('thu', 0.06887529573866702), ('three', 0.03615876081787404), ('term', 0.04066522060579979), ('target', 0.1035999261839274), ('taken', 0.054192641936566185), ('suppli', 0.10864862414182394), ('strongli', 0.06725645533406839), ('strength', 0.06546911591524439), ('stood', 0.062293993240048834), ('stock', 0.03272538842643565), ('still', 0.07703714928414966)

[('yet', 0.05197765699145199), ('year', 0.06107717264315471), ('would', 0.0708182691333302), ('world', 0.03924786873826998), ('work', 0.07300774433694796), ('west', 0.08820347574588545), ('watch', 0.06662344792584865), ('volcker', 0.06788627568595919), ('usual', 0.06497703844066995), ('unlik', 0.0548986373377129), ('unit', 0.06598520288994061), ('two', 0.02937629826495637), ('turn', 0.056124235860733715), ('tri', 0.0495832103195534), ('toward', 0.05329461308697011), ('top', 0.09361638762228133), ('took', 0.05519177110702166), ('told', 0.03311735497283752), ('think', 0.04981705298447413), ('test', 0.05883262743021133), ('talk', 0.041378869423242266), ('take', 0.037425919021528996), ('suppli', 0.04231297659833987), ('success', 0.05212421660676493), ('substitut', 0.07244656395720737), ('strong', 0.07830555221040929), ('strike', 0.05559592134887534), ('stop', 0.05406574808709562), ('stoltenberg', 0.0819810827559828), ('state', 0.07519243831276376), ('start', 0.042723389933996096), ('stabl'

[('u.s.', 0.18393172631611884), ('transport', 0.16633327751675334), ('term', 0.12059688736186033), ('subsidiari', 0.12954120052024193), ('statement', 0.12628463863021389), ('state', 0.19358703699802166), ('stake', 0.22471710655527535), ('spa', 0.22654298679441268), ('sector', 0.1552280288148674), ('said', 0.10015351806339724), ('remain', 0.12357803939471307), ('product', 0.10161831782950571), ('primarili', 0.20500297582015636), ('petroleum', 0.24679295421238262), ('pct', 0.12255166979280736), ('own', 0.13633435544862552), ('oper', 0.16486782790031843), ('oil', 0.18788888220739908), ('italian', 0.20987096217459383), ('itali', 0.18819222473226346), ('invest', 0.11694429413491939), ('insur', 0.1605156042363813), ('independ', 0.18473923353115193), ('hotel', 0.20900844296038032), ('hold', 0.12101531928802517), ('firm', 0.26682090859885604), ('financi', 0.1241019969760082), ('energi', 0.14923025137171078), ('east', 0.1754956020958275), ('disclos', 0.1584096747052219), ('concern', 0.148123311

[('zealand', 0.07673625805031081), ('yesterday', 0.052267669418325356), ('year', 0.04475980195282639), ('would', 0.0966073911609919), ('without', 0.06202468319652507), ('within', 0.05991226108003957), ('withdrew', 0.09377234203658574), ('want', 0.05750282806156225), ('ventur', 0.06774541420845849), ('valu', 0.05256143384443227), ('unlik', 0.06999960368902512), ('ultim', 0.08744460029882853), ('u.s.', 0.035020218185894895), ('trade', 0.03897865626729607), ('top', 0.07050033548796122), ('told', 0.07149643237364368), ('today', 0.07273525099451804), ('time', 0.04825846548804098), ('thought', 0.07995452168036928), ('third', 0.058652925493315605), ('think', 0.06352022809631422), ('term', 0.04818706898628084), ('target', 0.0584971371919833), ('takeov', 0.06352022809631422), ('take', 0.04772066532523735), ('support', 0.0576958179062805), ('sugar', 0.06572981982768297), ('subject', 0.05819094319007126), ('strong', 0.058970099590661684), ('strategi', 0.07613760833171171), ('still', 0.09128671557

[('year', 0.17686588969235342), ('shr', 0.3995186827799403), ('sale', 0.42133882900702013), ('qtr', 0.2515246706463577), ('northwest', 0.5850374070056701), ('net', 0.4472888320697602), ('mln', 0.16117116215408972)]
[('would', 0.10550165017094781), ('win', 0.1032752730027123), ('whether', 0.07054598498733253), ('week', 0.050724124323500014), ('want', 0.06718450666930051), ('violat', 0.08941957316938484), ('use', 0.05966261381896376), ('u.s.', 0.040916528135828405), ('top', 0.08237038802174995), ('told', 0.04933664210789082), ('today', 0.05019149806231464), ('time', 0.05638368243315601), ('textil', 0.09501589506647222), ('takeov', 0.1557486133225535), ('suffer', 0.08873049511918417), ('succeed', 0.09718244327905812), ('stop', 0.13637377826298921), ('stanley', 0.18145640312419278), ('sinc', 0.05757113453590146), ('side', 0.08513717519191558), ('share', 0.08250944043974086), ('sever', 0.06489918944808766), ('sell', 0.054694753469485796), ('said', 0.0712330402700426), ('rule', 0.07068564091

[('undisclos', 0.18727474675912958), ('term', 0.1355874640686814), ('subsidiari', 0.14564358380369735), ('sold', 0.15393042890888842), ('servic', 0.14573111637631236), ('sell', 0.13172092390293647), ('said', 0.09084732599348128), ('provid', 0.15069116354277698), ('product', 0.11424979797434974), ('plant', 0.282489376396267), ('market', 0.10314399991290098), ('lee', 0.4503567292100783), ('label', 0.28973106934226167), ('inc', 0.08493479362723878), ('food', 0.1811408825396245), ('exclus', 0.24871698862002656), ('equip', 0.18420458266727935), ('distribut', 0.1734033037995411), ('corp', 0.09285488748516756), ('compani', 0.08744175772183947), ('coffe', 0.45487591764738344), ('becom', 0.17607105451316563), ('account', 0.1475262999565015)]
[('year', 0.12342287645862342), ('shr', 0.39293252319487765), ('sale', 0.2940241916076075), ('qtr', 0.17552224685879325), ('period', 0.22586711424384526), ('note', 0.17293373409722695), ('net', 0.3121329633311127), ('mln', 0.2683879005827305), ('industri', 

[('wall', 0.12279680862566418), ('tonnag', 0.13072877265980085), ('ton', 0.26294817200568105), ('time', 0.07459309400429023), ('target', 0.09041900544112402), ('stage', 0.11595176766616519), ('said', 0.07691313096663385), ('requir', 0.0859227172827202), ('report', 0.06116758097199885), ('receiv', 0.14034490755889445), ('rang', 0.090821927275537), ('princip', 0.19727478765669262), ('plu', 0.11104563044420224), ('period', 0.07477835327396859), ('per', 0.15287908530986613), ('ounc', 0.24333780451700388), ('ore', 0.12166763931843701), ('optimist', 0.12130465529935702), ('one', 0.0560690536905927), ('north', 0.0995067553490951), ('next', 0.08010318208187996), ('mountain', 0.24739603917698147), ('month', 0.062169131011788295), ('miner', 0.11019100421553993), ('mine', 0.15928861854614515), ('mill', 0.11286977097479277), ('mile', 0.12279680862566418), ('ltd', 0.07089095048258179), ('low', 0.09106753574036038), ('locat', 0.11991374530951794), ('let', 0.1261525309432371), ('late', 0.091822518972

[('year-ago', 0.07690258334670333), ('year', 0.0731043164693541), ('would', 0.07696037521441167), ('u.s.', 0.06283710491377043), ('told', 0.10678648289978342), ('time', 0.051141812604179535), ('three', 0.04540707482255389), ('third', 0.062157320874808825), ('telegraph', 0.323019263242937), ('technolog', 0.07023495744040968), ('talk', 0.05591331547229299), ('sinc', 0.05221886983588911), ('sharpli', 0.0687437022083181), ('sharehold', 0.05131995762061417), ('share', 0.0748387150138244), ('septemb', 0.06301319621615604), ('seek', 0.05899767607871788), ('sale', 0.03941746965260064), ('said', 0.056416873649747054), ('rose', 0.0504750772059787), ('risen', 0.08501008067597711), ('revenu', 0.10216824487045509), ('report', 0.1170784053580752), ('remain', 0.0523285042593065), ('reduct', 0.06715655721710309), ('reduc', 0.05402248290067831), ('realli', 0.08588008248559119), ('quebec', 0.23196597208136804), ('quarter', 0.11832975050642201), ('qualiti', 0.1392006215880472), ('publish', 0.076284437934

[('year', 0.05186391705252025), ('would', 0.06788975259088469), ('withdraw', 0.15177711690289108), ('weekend', 0.1526306304949058), ('statement', 0.09899580863537616), ('said', 0.07851135827688571), ('rio', 0.16819222741809645), ('reduc', 0.10001008626501741), ('quota', 0.12629882832345124), ('produc', 0.19704551247823998), ('price', 0.07454398923688238), ('plan', 0.08576550594045519), ('per', 0.08131502820392936), ('organ', 0.21775020694958963), ('obtain', 0.14418405281902216), ('mln', 0.04726161613049228), ('mexico', 0.32606799574384604), ('meet', 0.09041176491464964), ('leav', 0.28621713511938673), ('janeiro', 0.18256248008036405), ('intern', 0.08494740490182583), ('intent', 0.12792622273632268), ('instrument', 0.1613023245980209), ('institut', 0.11892909933818109), ('import', 0.09247871002592152), ('ico', 0.3755464876215764), ('group', 0.08188153222589943), ('five', 0.08831298644297167), ('export', 0.08462723072507214), ('event', 0.16666061906505278), ('ensur', 0.15954683936308678)

[('year', 0.0503982658789777), ('would', 0.06597121844978653), ('venezuelan', 0.30513342085408524), ('total', 0.08093982411777476), ('three', 0.08168514147252141), ('term', 0.09186554580581867), ('swiss', 0.1483173569240557), ('state', 0.14746631694889653), ('sign', 0.10777703948843602), ('said', 0.07629266229450532), ('repres', 0.10669698992801607), ('report', 0.07544289427367616), ('rate', 0.08195892426372603), ('prove', 0.15145517450709303), ('product', 0.1310639565129058), ('produc', 0.09123986130160286), ('primarili', 0.15616249040513544), ('plan', 0.083341810979201), ('pct', 0.05513673970317017), ('one', 0.06915447075699892), ('norway', 0.17487181067116675), ('negoti', 0.18409316364360512), ('mln', 0.1282144121882653), ('mitsubishi', 0.1833826204893649), ('minist', 0.09970656606777781), ('meet', 0.08785676874621484), ('market', 0.0698838931288975), ('mark', 0.11389248809363364), ('line', 0.11221953199424807), ('japan', 0.09556186454959426), ('interest', 0.08422941927693853), ('in

[('year', 0.10141960456603352), ('widen', 0.12043038588073722), ('whole', 0.11109668048297867), ('urg', 0.10053696886600476), ('unchang', 0.0933885321601393), ('tri', 0.09295016373155786), ('told', 0.10511528491266045), ('today', 0.06315848887192041), ('three', 0.06299446866361488), ('team', 0.1292470672024966), ('target', 0.08600349749447143), ('talk', 0.07757006178360265), ('surplu', 0.14733096618544655), ('strengthen', 0.11312140033334192), ('stabl', 0.11028939669602943), ('spokesman', 0.07410411072839002), ('south', 0.14843198483883333), ('slow', 0.10021978181023386), ('seven', 0.08118573132052921), ('see', 0.07166503264200914), ('said', 0.05883586215972577), ('rose', 0.1185633865869273), ('rise', 0.11086656090711694), ('revalu', 0.1292470672024966), ('reuter', 0.07817031142447568), ('report', 0.058180532634741454), ('remain', 0.07259675578439556), ('real', 0.09012987927400885), ('quot', 0.09663711193779415), ('quarter', 0.06879372499355606), ('provision', 0.10951419902929399), ('p

[('utah', 0.2069611755619235), ('union', 0.12567595304640242), ('u.s.', 0.12872110838774906), ('trade', 0.08461806205672326), ('three', 0.0930158862001251), ('temporari', 0.15565496998895337), ('talk', 0.11453780296059117), ('summer', 0.1688908183845428), ('specif', 0.15249926448224638), ('soviet', 0.14511012431812917), ('sensit', 0.20356868649455184), ('senat', 0.41723224433412454), ('secur', 0.1040977366850739), ('sale', 0.08074624683116656), ('said', 0.08687540311450027), ('richard', 0.15919945342820332), ('republican', 0.1991287050224531), ('propos', 0.2210656901555215), ('product', 0.08814600321545875), ('part', 0.11318797651940851), ('offer', 0.09649031804433221), ('major', 0.1013479288231756), ('john', 0.1572122609522945), ('import', 0.1732610000797782), ('illeg', 0.1965084570276345), ('heinz', 0.22049862741104093), ('hear', 0.1688908183845428), ('good', 0.11610721889427796), ('export', 0.09364281735868213), ('due', 0.1052334807140858), ('countri', 0.10102763947602629), ('contro

[('written', 0.10348347239541308), ('would', 0.0689697838048882), ('world', 0.14057429244293457), ('workforc', 0.09642427315842013), ('well', 0.06279285107474813), ('weak', 0.08440313481395839), ('wast', 0.09955839051290756), ('volatil', 0.1040514699521827), ('view', 0.07938468436850803), ('trade', 0.07768789811202549), ('today', 0.05056872764063098), ('target', 0.06885990337354592), ('support', 0.06791663039928461), ('sugar', 0.21600919385055684), ('subsidi', 0.14647781987020722), ('strateg', 0.09351768840289879), ('soft', 0.0947432746860551), ('small', 0.07675726711366297), ('sector', 0.07301235405196602), ('say', 0.050892334092190974), ('save', 0.12471693987227181), ('said', 0.06612713435753767), ('rice', 0.2260853421373662), ('report', 0.07887197220149754), ('repli', 0.09442774453121289), ('remark', 0.08939737633512344), ('reject', 0.13065273437593172), ('reform', 0.14915797820367435), ('recommend', 0.13502258262811015), ('provid', 0.0630420831420769), ('propos', 0.0571195421738458

[('wage', 0.13218476684020436), ('u.s.', 0.0986581577864504), ('subsidi', 0.20704250497924648), ('season', 0.1002672045411068), ('salari', 0.1440241903166155), ('said', 0.0757132462523466), ('rose', 0.18911145716510092), ('rise', 0.17683483483036752), ('revis', 0.16830115096813428), ('retir', 0.13391710047513783), ('restrain', 0.14967415673855622), ('report', 0.06584391336757556), ('rate', 0.07153087591449192), ('previous', 0.15533863471965406), ('person', 0.3241644396336963), ('pct', 0.1481870798336329), ('payment', 0.19239847861882103), ('newli', 0.12944618965424323), ('may', 0.20246355251342615), ('march', 0.06409912636602119), ('manufactur', 0.09759085462108948), ('lower', 0.13823967401891132), ('initi', 0.0989346320689531), ('increas', 0.14008110148851566), ('incom', 0.25879483692465155), ('govern', 0.07425485178676587), ('fund', 0.08922699691535527), ('follow', 0.08596896219202786), ('fell', 0.086020361346228), ('feder', 0.0838649016435396), ('farmer', 0.18886066917002267), ('far

[('unit', 0.08999108901528398), ('three', 0.09617718527995844), ('telegraph', 0.21399509418156096), ('takeov', 0.14258150518819948), ('societ', 0.21591716620657447), ('said', 0.08982800769792736), ('report', 0.08882747938995152), ('quebec', 0.34861295003246157), ('privat', 0.12886021169536058), ('print', 0.21218958504428143), ('previous', 0.12377030916004315), ('plc', 0.12950766719017942), ('plant', 0.1330973862921472), ('pct', 0.06491873962106699), ('owner', 0.17564050135518705), ('own', 0.1222787153914949), ('ontario', 0.19732874001373368), ('newspap', 0.16126031621459774), ('name', 0.13260914945935623), ('media', 0.20589642414735074), ('ltd', 0.10294774360634375), ('juli', 0.14258150518819948), ('inc', 0.11472087916628963), ('held', 0.1275038892708504), ('french', 0.14579463049746108), ('four', 0.1074270410399413), ('former', 0.1622262964547685), ('execut', 0.22331169001544948), ('effect', 0.11095432654602419), ('daili', 0.25441248997307936), ('complet', 0.19071508570876922), ('comp

[('year', 0.0776203375306406), ('trade', 0.11444819450912959), ('suspend', 0.12610200799843346), ('surplu', 0.2153958874089002), ('sold', 0.0948686723658863), ('sale', 0.06450204802387689), ('said', 0.0693981657810287), ('respect', 0.17999870115710231), ('reserv', 0.14750252533752), ('report', 0.06862519050121381), ('product', 0.1477699055763026), ('princip', 0.13071912445180414), ('post', 0.11136150012638582), ('period', 0.20019912252319993), ('part', 0.09041728357289222), ('oil', 0.07689323874242365), ('monthli', 0.12316076358041403), ('month', 0.16644128857705903), ('mln', 0.16013529379816643), ('may', 0.17093039627907863), ('march', 0.06680670289557472), ('latest', 0.10888227660227426), ('last', 0.10395250160615524), ('januari', 0.0801999839204806), ('intern', 0.07508714939377968), ('import', 0.08174425956255608), ('gold', 0.11068273633051781), ('four', 0.19804954693360485), ('first', 0.17897656397519515), ('export', 0.15698488614586006), ('end', 0.17670760966885102), ('ecuador', 0

[('year', 0.038252516184109894), ('world', 0.12260590000067138), ('worker', 0.17002602836876843), ('wage', 0.1149551757973559), ('union', 0.21858960581933914), ('turn', 0.10355021872614573), ('trade', 0.05640186002765642), ('talk', 0.07634475398559742), ('taken', 0.09292108345004013), ('subsidiari', 0.07489777095059501), ('strike', 0.17367536614029816), ('south', 0.1460873319622662), ('show', 0.07596077227601011), ('sent', 0.11355830930007463), ('said', 0.05790648246027693), ('refineri', 0.22725777716743586), ('recognis', 0.13678531673483427), ('propos', 0.11888147810711175), ('produc', 0.06925147542688231), ('process', 0.09398465389652166), ('price', 0.05498032768763641), ('plc', 0.083485470186962), ('platinum', 0.4005400510897136), ('plan', 0.10710310442774892), ('own', 0.1334630348686829), ('object', 0.10907326731336199), ('near', 0.09292108345004013), ('move', 0.19972550035708053), ('mine', 0.1491166696628883), ('metal', 0.10257546900492223), ('mass', 0.1405128103951749), ('manag',

[('yet', 0.15668916708543232), ('would', 0.13852090649413573), ('vote', 0.24992603967210422), ('two', 0.08855627540014405), ('store', 0.32524637395296063), ('split', 0.13891650779401324), ('specialti', 0.19744516295602607), ('sharehold', 0.19385037666590957), ('share', 0.1347021323075613), ('set', 0.21322227171193495), ('said', 0.07633283251923542), ('retail', 0.16407499215691165), ('restructur', 0.2561121143156099), ('reject', 0.15498163987183128), ('record', 0.16296685181480186), ('propos', 0.19423875685676215), ('partner', 0.15089705830269495), ('offer', 0.10508380338934704), ('new', 0.09113378327602521), ('meet', 0.18447437434605565), ('june', 0.13365719967060352), ('inc', 0.07136493348707976), ('hold', 0.11432021165541616), ('held', 0.13429521625121185), ('get', 0.14097416775475197), ('exist', 0.15780558624073068), ('end', 0.09232284312231302), ('depart', 0.12129139330947444), ('decemb', 0.1216482579904771), ('date', 0.3214352474441381), ('compani', 0.19171896531007895), ('common'

[('yen', 0.05473497721120469), ('year', 0.05724231966990845), ('would', 0.05316512427541827), ('worsen', 0.08267329342767524), ('world', 0.13377327412261641), ('work', 0.04988936778402438), ('western', 0.10182283675217269), ('west', 0.04862809643533795), ('warn', 0.06571279918012009), ('vital', 0.07934741123172202), ('unemploy', 0.11918959152615873), ('undermin', 0.1409263276481152), ('uncertainti', 0.07231711929557465), ('u.s.', 0.06668848083568656), ('two', 0.033988410169811745), ('trade', 0.03536931334275324), ('toward', 0.06166192735053397), ('time', 0.04378983142933699), ('three', 0.03887950095879919), ('though', 0.06806941618138251), ('third', 0.05322178594398599), ('tax', 0.043725046095922364), ('switch', 0.07278930708629498), ('support', 0.052353304539280054), ('subsidi', 0.06668760559440047), ('strengthen', 0.06981729802669459), ('statist', 0.05659083507542059), ('state', 0.04145488724294229), ('stabil', 0.10084866161334227), ('sluggish', 0.07976988424261267), ('slower', 0.078

[('yesterday', 0.08292510218554495), ('year', 0.04194172846303535), ('would', 0.05490162969576882), ('warburg', 0.1556243525136975), ('u.s.', 0.09407331302023142), ('total', 0.06735858993929356), ('thu', 0.12948627413089114), ('tender', 0.14695455220290118), ('take', 0.12818998050911315), ('supermarket', 0.2882047214272554), ('suffici', 0.1324985488426605), ('subsidiari', 0.08212118535097535), ('stock', 0.06152407145957831), ('stg', 0.09019303928919904), ('stake', 0.1424568792387951), ('share', 0.16440550138529322), ('septemb', 0.09433693898186978), ('secur', 0.0760778017726004), ('sale', 0.0999156976389537), ('said', 0.06349119498202672), ('rise', 0.07066058739924136), ('report', 0.06278401311843301), ('purchas', 0.0760778017726004), ('profit', 0.10842998949789978), ('produc', 0.07593034047850175), ('pretax', 0.18115591438432482), ('plc', 0.09153711364600188), ('penc', 0.14997739193443857), ('pct', 0.10949538831024062), ('payabl', 0.092163182503573), ('outstand', 0.08600841188142483),

[('zone', 0.09574956236021812), ('year', 0.0923335347082282), ('worth', 0.07642188546360601), ('well', 0.0632448149788372), ('virtual', 0.10027498162305598), ('upgrad', 0.11032810983370546), ('trillion', 0.19690915566264264), ('tonn', 0.11822756752760546), ('suppli', 0.06396672478894017), ('sulphur', 0.19690915566264264), ('state-own', 0.09332467557487019), ('smaller', 0.09172357001577786), ('slightli', 0.07803669963062956), ('sinc', 0.05842132200086021), ('sharp', 0.07761719139355663), ('share', 0.03989679621805106), ('septemb', 0.07049777672356547), ('saudi', 0.25751014712644116), ('said', 0.058995909143286035), ('rose', 0.09561270734135316), ('riyal', 0.11892558480217234), ('rise', 0.05280449384291617), ('restraint', 0.10601104628717746), ('respons', 0.0737792702666703), ('reserv', 0.14213070450801524), ('report', 0.11196098942296438), ('refineri', 0.15023157210809374), ('proven', 0.10314301395511671), ('product', 0.14181835076392102), ('produc', 0.09607351928730298), ('process', 0.

[('week', 0.07771130877730661), ('water', 0.4068273566258593), ('vessel', 0.23256530331706418), ('unabl', 0.2610608134426476), ('two', 0.11352078621689073), ('travel', 0.16004945273761934), ('tomorrow', 0.11797638621015537), ('spokesman', 0.18934010086890493), ('speed', 0.16308539381475712), ('sinc', 0.08820119168635052), ('say', 0.07738738402064765), ('said', 0.08145204974111808), ('river', 0.24311520684065788), ('rise', 0.13497976523290817), ('reduc', 0.0912476157502077), ('reach', 0.1001796734536214), ('rais', 0.08824735147936055), ('rain', 0.1466236986164774), ('oper', 0.1179179502780308), ('north', 0.11523304685510384), ('normal', 0.12259898014773817), ('near', 0.11494690838139798), ('navig', 0.17218115572040255), ('move', 0.09468247020289104), ('might', 0.10613728494945841), ('meanwhil', 0.1520585042676748), ('main', 0.1106643671427801), ('lift', 0.13302682235283408), ('level', 0.14728285813176153), ('last', 0.06337273574646475), ('high', 0.16674996102784181), ('heavi', 0.1216373

[('york', 0.04644389840613198), ('year-end', 0.0643954708911834), ('year', 0.03544272816899082), ('world', 0.039626807252911254), ('within', 0.04744109425880433), ('west', 0.07184907867310183), ('week', 0.034377446732144844), ('way', 0.04644389840613198), ('war', 0.10106143463307747), ('vice', 0.0546779343941838), ('vehicl', 0.06888620901493386), ('upham', 0.0736849248707667), ('uncertain', 0.06854171826921167), ('u.s.', 0.0723610405065652), ('two', 0.029659926680665156), ('turkey', 0.0678848777453825), ('trade', 0.052258935949842045), ('time', 0.03821311979771296), ('throughout', 0.0641695261303892), ('though', 0.059400656960668596), ('third', 0.04644389840613198), ('therefor', 0.0651040107707409), ('texa', 0.0898795872410684), ('tension', 0.11663449057309838), ('technic', 0.06160398640533443), ('target', 0.04632053855700108), ('tanker', 0.11440805741371117), ('take', 0.03778726659578019), ('surpris', 0.06160398640533443), ('suppli', 0.07233380209961868), ('strong', 0.0466950504404133

[('york', 0.06325250999532557), ('year', 0.028508970176823915), ('would', 0.09737947068892767), ('worth', 0.06951192459079682), ('work', 0.059292030104751835), ('week', 0.046819062732935235), ('visit', 0.08297578262270136), ('veget', 0.08129050936045598), ('use', 0.11556934303556896), ('urg', 0.07374496469800247), ('translat', 0.10112600440757544), ('toward', 0.07328336708158542), ('told', 0.07710320952122676), ('tin', 0.2632686097319476), ('though', 0.08089847702443359), ('third', 0.06325250999532557), ('tax', 0.10905631217653683), ('system', 0.052809375104038596), ('suffer', 0.081899464460244), ('substitut', 0.09961847610398737), ('stock', 0.041819638404834686), ('still', 0.05814347111026891), ('state', 0.049267900788767104), ('spent', 0.09081622454799332), ('sinc', 0.05313894710635466), ('sign', 0.10322547688769401), ('septemb', 0.0641234655452334), ('see', 0.052567084147025545), ('said', 0.060580937743305176), ('rubber', 0.08540674649111316), ('review', 0.06817996027106282), ('reut

[('usa', 0.271775246667678), ('told', 0.12332749228661943), ('today', 0.1254643876370331), ('takeov', 0.1855163923182854), ('spokesman', 0.14720787403224087), ('seek', 0.16259325968922328), ('rumor', 0.24491591486427378), ('reuter', 0.15528538490141686), ('report', 0.19568662601736186), ('miner', 0.4369416864438207), ('might', 0.17317636603233116), ('intern', 0.2653876441293674), ('interest', 0.21847744613182216), ('hostil', 0.247011010215615), ('divis', 0.18618259482701935), ('deni', 0.35738396296447345), ('corp', 0.09637979140319658), ('convers', 0.247011010215615), ('chemic', 0.19637156932621289), ('acquir', 0.13760626428937575)]
[('shr', 0.4087687455814968), ('rev', 0.3784306018651813), ('qtr', 0.2076269280086853), ('nine', 0.276831938530401), ('net', 0.36922503823018865), ('mth', 0.30815271140951067), ('mln', 0.3714226845709245), ('ltd', 0.2532907990008098), ('group', 0.23049834913928352), ('avg', 0.28883867114614215)]
[('year', 0.07268059759403855), ('withdrew', 0.152266756322885

[('year', 0.15007686174632634), ('shr', 0.33900550425485315), ('rev', 0.3890028791322755), ('qtr', 0.21342743526207295), ('net', 0.3062105936956654), ('loss', 0.6977883919759919), ('corp', 0.18734238118109442), ('april', 0.22339865286757177)]
[('year', 0.13499783204435176), ('shr', 0.30494379741804334), ('rev', 0.3499176670593838), ('qtr', 0.3250558690212929), ('nine', 0.2559739767889986), ('net', 0.34140569858947956), ('mln', 0.2935580968877359), ('method', 0.4056909469753222), ('inc', 0.15414518321185272), ('end', 0.19941336553343175), ('electron', 0.3520380304765329), ('april', 0.20095258834587887)]
[('zinc', 0.1710199460619761), ('year', 0.051428163147283164), ('worker', 0.22858957262178559), ('went', 0.15312903192050933), ('union', 0.11262208009548531), ('tuesday', 0.1481176451878872), ('ton', 0.29104544573976765), ('talk', 0.24493126208936924), ('strike', 0.28941212920150083), ('spokesman', 0.09805468931750863), ('smelter', 0.3719677208670925), ('sinc', 0.09585889719306984), ('sh

[('would', 0.15461336034664563), ('world', 0.09370024847187515), ('valu', 0.09841421931300841), ('two', 0.118745304349171), ('trade', 0.07298229416209868), ('total', 0.07949334191520574), ('togeth', 0.13821150538187707), ('throughout', 0.15173315640508028), ('three', 0.08022533964267797), ('suspens', 0.16372825933588697), ('suspend', 0.13615239310500832), ('stg', 0.10644145189276627), ('statement', 0.09447906477724453), ('signific', 0.12479609886970829), ('sharpli', 0.12145655450190139), ('share', 0.18561011782126666), ('servic', 0.09697368794308453), ('scope', 0.1694867226635031), ('saw', 0.14079483921332075), ('said', 0.060452430890644705), ('rose', 0.08917949963005961), ('receiv', 0.1004078069491742), ('provid', 0.10027424638292849), ('propos', 0.09085389885216912), ('price', 0.07114287448073703), ('plc', 0.18290673350539002), ('permit', 0.14219477044267329), ('pct', 0.09168626742337552), ('own', 0.10199769773671288), ('opportun', 0.13760556789134692), ('oper', 0.07284944782689322),

[('year', 0.055514202706054505), ('would', 0.058628059374052424), ('visit', 0.07699131244084279), ('view', 0.06748129009239316), ('valu', 0.08905132106274972), ('undertak', 0.15436137418562412), ('u.s.', 0.035042678829701204), ('thu', 0.08166750834804716), ('three', 0.04287452952874855), ('third', 0.05869054326202025), ('therefor', 0.08227108170934068), ('term', 0.10119083350631351), ('system', 0.049000599571667317), ('support', 0.057732821822367705), ('studi', 0.0655949561280008), ('structur', 0.12534162962387507), ('stress', 0.07741281855326164), ('stabil', 0.13784351034890016), ('spot', 0.07638313004379019), ('significantli', 0.07542758630075053), ('signific', 0.06669431441354165), ('sign', 0.09578053609995235), ('secur', 0.10069681588724687), ('secretari', 0.10270448825052346), ('second', 0.09540650457101102), ('result', 0.04482088524580596), ('report', 0.03959812690486782), ('relat', 0.06166614378624036), ('reiter', 0.0857852267118536), ('quarter', 0.09826027785156911), ('purpos',

[('yet', 0.10563029597606394), ('year', 0.08842263189696996), ('winter', 0.28330270883618053), ('wheat', 0.09450529940240972), ('week', 0.11715669814485605), ('weather', 0.1149570617500878), ('two', 0.10107961490566608), ('test', 0.202434604622337), ('temperatur', 0.13796018921360437), ('sugar', 0.10476114181924351), ('state', 0.07281380970826816), ('spring', 0.2143850318400518), ('season', 0.09604554957053223), ('said', 0.03039248361283056), ('repres', 0.08920060503034788), ('report', 0.10678953257031351), ('rate', 0.11601298301651895), ('rapese', 0.24427415935281113), ('plant', 0.09450529940240972), ('overal', 0.10951534414952366), ('offici', 0.06914189892515796), ('oat', 0.1533111731712203), ('normal', 0.18482884841376074), ('mean', 0.10172560890403093), ('may', 0.06583331059540913), ('level', 0.07745432616264031), ('last', 0.09554002614649601), ('june', 0.18909240870540214), ('harvest', 0.12104051370049267), ('growth', 0.14237873501994758), ('gave', 0.10519125375040558), ('four', 0

[('yield', 0.08345487409673745), ('wet', 0.11314398435625245), ('western', 0.13539484591450412), ('welcom', 0.09711208989591237), ('weekend', 0.1969966067968003), ('week', 0.10993185265659793), ('weather', 0.08702711342858448), ('virtual', 0.10204759547755922), ('u.s.', 0.042254751587963095), ('two', 0.045194729439930206), ('trade', 0.047030930220491096), ('tomorrow', 0.07952472962976433), ('today', 0.051833070377096335), ('three', 0.05169846185253828), ('stage', 0.09051258449811243), ('spark', 0.10393314937377561), ('soybean', 0.08331626972261864), ('southwest', 0.1055090474800051), ('southern', 0.0861798943048519), ('senior', 0.07719761058559295), ('seen', 0.07435754554434039), ('saw', 0.09073039337102332), ('said', 0.06778047558064262), ('relief', 0.09847535614571219), ('receiv', 0.0647043590015225), ('rainfal', 0.11145729892062183), ('rain', 0.31599806667966357), ('price', 0.045845579449135865), ('pretti', 0.11405901655351967), ('pattern', 0.2606308104592105), ('one', 0.07410534444

[('yield', 0.12005412812855518), ('year', 0.0776909180519365), ('unit', 0.0695873756737983), ('u.s.', 0.14505241361602395), ('treasuri', 0.2695653671288955), ('termin', 0.2511137778171102), ('tax', 0.19958900953841316), ('subsidiari', 0.08984303756200135), ('state', 0.0792972397376212), ('spokesman', 0.18360102789099333), ('sale', 0.06456070002564615), ('said', 0.09240351773431994), ('refin', 0.12281671266569955), ('reach', 0.09714313521256615), ('rate', 0.12634291989109941), ('pct', 0.050199689454957404), ('past', 0.10532541221339468), ('parent', 0.1284116041999293), ('netherland', 0.39269008829648633), ('negoti', 0.09899263800399213), ('mean', 0.18757256858255805), ('may', 0.071695188507801), ('matur', 0.12728799864240786), ('lower', 0.14420999298818993), ('longer', 0.12236952685362276), ('less', 0.10162500209720335), ('januari', 0.08027291012581687), ('issu', 0.08566227605363257), ('interest', 0.12984297765960295), ('incom', 0.0967030706305477), ('howev', 0.09187138065195773), ('hol

[('stake', 0.41451982616420724), ('pct', 0.2260624375125271), ('invest', 0.3652438581663408), ('group', 0.32622975058154435), ('bell', 0.7329666854434513)]
[('stake', 0.19805097406758337), ('share', 0.21277831330426153), ('said', 0.12057694317781692), ('result', 0.16727965619366178), ('restrict', 0.25830698423933096), ('resourc', 0.22505082666504786), ('reserv', 0.3937229523333919), ('properti', 0.23864907816256536), ('pct', 0.10800903388324863), ('outstand', 0.20245532166463426), ('oper', 0.14530373058195747), ('oklahoma', 0.35019981397381506), ('oil', 0.16559298358752944), ('interest', 0.16499956743227842), ('inc', 0.11272954567475141), ('hold', 0.1805826039715942), ('explor', 0.24750949457402466), ('exchang', 0.2773944645683532), ('compani', 0.11605690907128312), ('common', 0.18269141825644097), ('agre', 0.17678251093811076), ('acquir', 0.17595803839330815)]
[('york', 0.06930925870587318), ('would', 0.04089158377852426), ('west', 0.06332702400776606), ('weak', 0.0847282732762214), (

[('year', 0.08474127018423304), ('would', 0.11092613504720489), ('worth', 0.12203321161508122), ('way', 0.11104435653140535), ('use', 0.09667833465713538), ('two', 0.1200694746200888), ('trust', 0.12265347101950434), ('thursday', 0.14729114619815417), ('swap', 0.166435517132558), ('suffer', 0.14378043388201653), ('submit', 0.15289634029142635), ('sharehold', 0.09168335428545962), ('share', 0.06370863720654744), ('set', 0.08136149327767081), ('second', 0.10661336114665233), ('said', 0.03610232926983247), ('respons', 0.11781338875166257), ('request', 0.13371710574341295), ('relat', 0.11667428645097312), ('propos', 0.09186704234038026), ('proceed', 0.1403807634941106), ('plan', 0.08276516734074468), ('oppos', 0.1410248815955055), ('offer', 0.0841500424813789), ('mln', 0.045608260861306725), ('merger', 0.25935732748423146), ('may', 0.07820143579958182), ('liabil', 0.23989517033133237), ('latest', 0.11887119681984679), ('last', 0.06702852702039852), ('juli', 0.1202593527323231), ('independ'

[('york', 0.17331685631082913), ('worri', 0.14403603014528213), ('u.s.', 0.06111886210396629), ('told', 0.07369636582201833), ('tokyo', 0.2554883673872223), ('talk', 0.0920807927087756), ('support', 0.17048864481513704), ('stock', 0.17660198008790207), ('still', 0.15931768760759124), ('stabilis', 0.12970180995105593), ('stabil', 0.11455975457338016), ('special', 0.1074397638963914), ('sign', 0.09866440779969698), ('seven', 0.09637283153764692), ('say', 0.07545308229819252), ('said', 0.0794161514857938), ('recoveri', 0.13000059594661403), ('rate', 0.07502923409998084), ('plung', 0.26748298337347653), ('plan', 0.07629519667868988), ('oversea', 0.1266469494091463), ('news', 0.10426279051576748), ('new', 0.11390486331892398), ('nation', 0.07856138194563812), ('miyazawa', 0.3507161666269818), ('minist', 0.1545442085101809), ('meet', 0.1361771378117263), ('market', 0.17860324796784752), ('london', 0.11179813929597363), ('lead', 0.10018993776433906), ('kiichi', 0.15033281668649695), ('japanes

[('would', 0.033418046537209325), ('west', 0.10860965113928876), ('weekend', 0.07513088968845653), ('week', 0.041925990652772326), ('underway', 0.08986454100133369), ('u.s.', 0.08070335053025147), ('tuesday', 0.07352718401671977), ('treasuri', 0.05747562897822037), ('trade', 0.07899648690342415), ('told', 0.04077916816587197), ('today', 0.041485748776826654), ('tighten', 0.08032608276381233), ('three', 0.04137801185535147), ('tender', 0.11087059209293607), ('target', 0.05649152718361413), ('taken', 0.06201495100505755), ('surpris', 0.07513088968845653), ('suppli', 0.052102228264229826), ('substanti', 0.10220123584479938), ('stress', 0.07471075622428011), ('stoltenberg', 0.10094768627713413), ('still', 0.05206688224824366), ('stabil', 0.06339052739820634), ('specul', 0.06582951500120848), ('shortli', 0.07825976645961549), ('sharp', 0.06322081733211882), ('septemb', 0.05742190595331205), ('seen', 0.05951371260995246), ('secur', 0.046307760519562195), ('secretari', 0.05854165061201523), (

[('would', 0.09184645339785255), ('world', 0.05566165488767863), ('weinberg', 0.10800786379108224), ('water', 0.08581181318974941), ('washington', 0.12446355680472891), ('use', 0.056797388439057066), ('u.s.', 0.10874338928727023), ('tell', 0.08893015493573386), ('tehran', 0.18889893373585745), ('tanker', 0.09491370609868136), ('strike', 0.07884659949104869), ('strait', 0.17046913811876457), ('still', 0.05996790673774751), ('significantli', 0.08384125665195895), ('shell', 0.0940702049273968), ('see', 0.05421654296534437), ('sea', 0.08210310260150182), ('say', 0.08141816392827309), ('said', 0.06248185660924865), ('rig', 0.1847248867608514), ('retaliatori', 0.09887254560636427), ('respons', 0.11718946354871397), ('repli', 0.08922238731582775), ('remark', 0.08446931964741217), ('region', 0.07705982182839621), ('react', 0.09366587133471901), ('radio', 0.1630109322421777), ('quot', 0.15342660420064377), ('prompt', 0.08836249829949375), ('price', 0.042261682243594334), ('presenc', 0.098872545

[('would', 0.06706474822612044), ('without', 0.09698152230715812), ('whether', 0.09440974305341203), ('voluntari', 0.22779784732819716), ('view', 0.062277999892358785), ('valid', 0.08413858501647548), ('u.s.', 0.1034002814158194), ('true', 0.08470857744199367), ('trader', 0.055065680819380895), ('trade', 0.10604155568431176), ('track', 0.08530638515200455), ('tool', 0.07917057205948454), ('time', 0.07545672474309184), ('tie', 0.12855625100500423), ('threat', 0.07336546885106464), ('think', 0.09931994974580699), ('test', 0.06927587039228654), ('term', 0.044500023596005966), ('temporarili', 0.0859348627843046), ('take', 0.04406930692536539), ('system', 0.11800479145089991), ('suggest', 0.06275481599389003), ('submit', 0.1262742272350572), ('subject', 0.053738448913945365), ('stock', 0.07515435124871081), ('still', 0.0843018901617556), ('spokesman', 0.04654687507725223), ('sourc', 0.10225265160960238), ('situat', 0.10291455855182226), ('ship', 0.055328059743805655), ('share', 0.0310757223

[('stock', 0.14786603699713366), ('shr', 0.13448318559853437), ('share', 0.12831213971786923), ('save', 0.23860260629633956), ('qtr', 0.14335280511889123), ('per', 0.15804297368977413), ('note', 0.14123871091090684), ('nine', 0.3236185614067037), ('net', 0.2549257240285002), ('mth', 0.36023277404088083), ('mln', 0.25644305499623754), ('loan', 0.20919394647807343), ('form', 0.23046093155481132), ('figur', 0.19338320756590646), ('deposit', 0.24929300578177976), ('convert', 0.2579251706726167), ('compani', 0.11849665664383176), ('bank', 0.2500578789779517), ('avail', 0.22978184365016222), ('asset', 0.20273866708719054), ('april', 0.15005017283285887)]
[('shr', 0.37912787457767844), ('qtr', 0.23868724516828072), ('nine', 0.3182451015200311), ('net', 0.4244599102223884), ('mth', 0.3542513607598893), ('mln', 0.25895879903998326), ('dlr', 0.2720759292527576), ('bancorp', 0.501263699269637)]
[('wheat', 0.2742948928986195), ('u.s.', 0.09568050887032668), ('trade', 0.10649555770724944), ('tonn',

[('year', 0.11148646985101496), ('would', 0.06953906778446803), ('without', 0.12464103958349722), ('trust', 0.13018757510217013), ('trade', 0.13262276650945662), ('total', 0.14445458374411793), ('time', 0.09697728383513515), ('third', 0.2473536927099379), ('tax', 0.09683380975766485), ('shr', 0.1487374947908099), ('septemb', 0.11948830719218238), ('reflect', 0.11307433731109541), ('quarter', 0.19733133090744306), ('qtr', 0.07554860653348733), ('provis', 0.26631286405843085), ('profit', 0.13733873532254573), ('previous', 0.11080563259677248), ('pct', 0.09840341104818551), ('note', 0.07443445413611169), ('nine', 0.1705507704740851), ('net', 0.1527656553810898), ('month', 0.13684877160951442), ('mln', 0.18263023122446376), ('loss', 0.2156774839060265), ('loan', 0.26308336660249293), ('last', 0.07114581693385846), ('larg', 0.11263938147048515), ('jump', 0.1640088799520682), ('increas', 0.1364957451247544), ('incom', 0.26716446331999777), ('foreign', 0.16174751065092977), ('fee', 0.16119973

[('shr', 0.34937809744798093), ('rev', 0.4009052481662638), ('qtr', 0.21995770080183785), ('nine', 0.2932727334149815), ('net', 0.3911529745514787), ('mth', 0.3264536182640668), ('mln', 0.43402813630105147), ('ltd', 0.26833350720359367), ('dlr', 0.25072640894475795)]
[('system', 0.26770656366570666), ('shr', 0.19280922940178508), ('rev', 0.22124521407234954), ('record', 0.13144911660376257), ('qtr', 0.12138675865021901), ('qtli', 0.17078957238110543), ('prior', 0.1527690571356038), ('payabl', 0.187562471870616), ('novemb', 0.3664911113676848), ('note', 0.11959660851555237), ('nine', 0.16184669315945555), ('net', 0.21586328436835014), ('name', 0.19074963153385813), ('mth', 0.18015803232282154), ('mln', 0.21714811369429518), ('inc.', 0.21873650408961778), ('full', 0.1794833860634135), ('freight', 0.4399999946368212), ('dlr', 0.13836690408594757), ('dividend', 0.15248298494001097), ('div', 0.15934882910484646), ('billion', 0.20893326317065125)]
[('stock', 0.1927207651212833), ('shr', 0.29

[('would', 0.07677317355293957), ('undersecretari', 0.19300583293644683), ('u.s.', 0.07769552119636212), ('trade', 0.14641943003693353), ('takeov', 0.2386073940030031), ('serv', 0.17024632834180284), ('sent', 0.17411231571158184), ('senat', 0.16340743819738468), ('said', 0.042306348890626316), ('reagan', 0.14485598122606858), ('reach', 0.12416727654582724), ('presid', 0.11108727887111379), ('pass', 0.16806489689992776), ('oppos', 0.27980842007343815), ('negoti', 0.21423609771703309), ('michael', 0.175163707972998), ('limit', 0.13012681964757156), ('job', 0.16723566308102267), ('iron', 0.1902002964076015), ('invest', 0.10366940019635093), ('hous', 0.12341965911758498), ('growth', 0.11705494728473076), ('foreign', 0.22133787403164987), ('effect', 0.10966555286559299), ('econom', 0.11270915559203493), ('differ', 0.15219907464641916), ('cost', 0.11139231635639144), ('congression', 0.1876390422260365), ('congress', 0.15171252052895087), ('compani', 0.06894566877706823), ('commerc', 0.251457

[('year', 0.11390905394873589), ('would', 0.05714130951521401), ('weekend', 0.12846584006822084), ('use', 0.08432186046958151), ('union', 0.16185589445821236), ('tuesday', 0.2128686837128544), ('tonn', 0.2047410012121178), ('three', 0.07075200460679826), ('target', 0.230502935197018), ('suprem', 0.2714950409828585), ('strategi', 0.12572367373429166), ('spur', 0.14516336573861743), ('soviet', 0.3251612855158412), ('slightli', 0.10868514917032618), ('show', 0.08668433879849788), ('short', 0.10781448877661885), ('set', 0.07096266715895697), ('session', 0.11558817683616697), ('said', 0.053313961497917), ('rise', 0.07354314466585053), ('releas', 0.10603897825102333), ('prospect', 0.10883368354687754), ('product', 0.06704775582262425), ('produc', 0.07902787423470661), ('plan', 0.12222310953936949), ('pay', 0.07420801252245093), ('parliament', 0.12705188260655773), ('output', 0.204725710710565), ('oil', 0.23409479548093184), ('nomin', 0.13290289083047555), ('nine', 0.08277139953441306), ('new

[('wheat', 0.14530461216471255), ('went', 0.19289056587689837), ('tonn', 0.19714953303910465), ('support', 0.3373877951015579), ('statement', 0.12365336991857556), ('sorghum', 0.2154270405709588), ('set', 0.17830676808665663), ('said', 0.11150990439683778), ('price', 0.24296781467512737), ('previous', 0.3224410771388806), ('per', 0.1719707866671511), ('oilse', 0.20636515917289208), ('offici', 0.10630765545615171), ('new', 0.15993623691649392), ('monday', 0.17307772319459055), ('maiz', 0.19119383690114325), ('increas', 0.09830801174429551), ('hike', 0.201478994789661), ('grain', 0.2474041687135909), ('effect', 0.1211306685721005), ('argentina', 0.4497034581889064), ('agricultur', 0.12699472272881232)]
[('term', 0.18505492795563988), ('talk', 0.3430658528045118), ('scienc', 0.5716155732349761), ('satisfactori', 0.31817354976303114), ('said', 0.07323160518333072), ('reach', 0.21493154600037256), ('purchas', 0.1841514825373403), ('privately-held', 0.3277125369745397), ('inc', 0.11592223679

[('two', 0.10714294314191238), ('termin', 0.1971917401223747), ('shr', 0.10088444968137286), ('seven', 0.15795402356735272), ('sept', 0.27514936320041494), ('sale', 0.10639436333239112), ('salari', 0.24759877198733624), ('retir', 0.230223197595186), ('reorgan', 0.23888601652061706), ('record', 0.11645249398210802), ('qtr', 0.1820778634698455), ('qtli', 0.1513047189953362), ('prior', 0.13534010852551434), ('pre-tax', 0.20277267701596946), ('plan', 0.12504699822536558), ('payabl', 0.16616405032696135), ('novemb', 0.3246792754773048), ('note', 0.10595220183502731), ('net', 0.15428809005126884), ('mln', 0.11667126243543947), ('includ', 0.10996710427794587), ('inc', 0.08634338985444849), ('holder', 0.1795983360558106), ('gain', 0.13585211313286594), ('food', 0.1841450031460578), ('employe', 0.21022184617559306), ('dlr', 0.15193605500955035), ('dividend', 0.13508667342076472), ('div', 0.1411692146880273), ('cost', 0.1436185655339986), ('charg', 0.16602110565362782), ('bell', 0.26822993815780

[('york', 0.21626081043500045), ('would', 0.11141030392845021), ('worth', 0.12585803424928574), ('whether', 0.06963176090472452), ('week', 0.050066776977168784), ('want', 0.0663138448735053), ('use', 0.05888943022255595), ('unlik', 0.08072547067200278), ('uncertain', 0.09982308892672234), ('transact', 0.06582461201835711), ('time', 0.055652991376028256), ('term', 0.09408929777422013), ('tender', 0.06308853707174779), ('takeov', 0.07325327630493265), ('succeed', 0.09592303028669073), ('stock', 0.11669551387157033), ('still', 0.06217672953774199), ('stearn', 0.1119861622693626), ('stand', 0.08550978050763737), ('spokesman', 0.09841708915399522), ('sourc', 0.06084538473183415), ('sinc', 0.05682505497283012), ('sharehold', 0.13326702425124587), ('share', 0.13834385443938368), ('serv', 0.08849436600011519), ('septemb', 0.06857154033111278), ('say', 0.0498580831743723), ('said', 0.08002622151795939), ('retain', 0.14198561532415632), ('remain', 0.056944360161877434), ('reject', 0.075597424476

[('yet', 0.1061201583913363), ('year', 0.07166965774553154), ('would', 0.13222194632154624), ('wood', 0.2396394474589433), ('wholly-own', 0.13542325398770302), ('use', 0.17159385597073762), ('unit', 0.06419416223936023), ('tonn', 0.1288195887392214), ('three', 0.06860694656709927), ('texa', 0.18174783908178005), ('take', 0.07641060956555125), ('studi', 0.17771900831689094), ('southern', 0.11436586683363664), ('sever', 0.08894192378709746), ('second', 0.09016790859836554), ('said', 0.08524199039680151), ('river', 0.12844434673216693), ('recent', 0.0804386082120309), ('readi', 0.12931158991843505), ('reach', 0.08961427445558384), ('quebec', 0.1468739694212873), ('qualiti', 0.12421985808684266), ('project', 0.16385622828939853), ('produc', 0.1608206649329771), ('presid', 0.0801741503415017), ('plant', 0.19924974156540465), ('per', 0.0663660731996819), ('pct', 0.07840808001647899), ('partner', 0.10219736326211844), ('pacif', 0.18201527584312802), ('offici', 0.06946254573667188), ('need', 0

[('yen', 0.35901826956113364), ('tokyo', 0.41517756732199357), ('sharpli', 0.3860838914913995), ('open', 0.32422843070801205), ('monday', 0.42036981894928854), ('higher', 0.3042818562779163), ('dollar', 0.2993287290135247), ('close', 0.2926648294707675)]
[('yeutter', 0.24057308304886876), ('yet', 0.06517241558050389), ('year', 0.054555527459821666), ('would', 0.08120251387821528), ('worldwid', 0.07412502666114798), ('world', 0.08332165919495167), ('white', 0.06932729999493445), ('week', 0.04269212114221297), ('way', 0.09765567662270815), ('want', 0.05654605448312805), ('uruguay', 0.08644747238919073), ('unit', 0.06675075402730253), ('unfair', 0.08025719144069027), ('undermin', 0.09020087717720397), ('u.s.', 0.07227101631721347), ('two', 0.07729944931768785), ('trade', 0.14911834332675955), ('threat', 0.07812230879941826), ('tariff', 0.11228267242178304), ('talk', 0.05188307881493096), ('take', 0.04692665443451026), ('system', 0.08153249978836265), ('sustain', 0.07941515358902797), ('su

[('would', 0.0875473888524107), ('worri', 0.20879763326420953), ('unit', 0.05990519560158685), ('told', 0.06309657548269013), ('time', 0.12209119633412022), ('three', 0.10840061790585519), ('tension', 0.27279828665480393), ('target', 0.08740791118089579), ('take', 0.07130543264967219), ('suffer', 0.11347732929737923), ('statement', 0.12766027600053118), ('state', 0.06826405811617291), ('ship', 0.1878728618673907), ('sent', 0.11726502060608784), ('said', 0.0679936811113559), ('rise', 0.06654884049065284), ('retali', 0.11182427963050807), ('recogn', 0.13904513136325544), ('reaction', 0.1247884445447995), ('protect', 0.10441246815363968), ('prime', 0.16266612739337538), ('possibl', 0.08145544742241392), ('past', 0.09067074824086088), ('oil', 0.06625473364995885), ('monday', 0.10553494402158453), ('minist', 0.1323160268340105), ('militari', 0.12428651618689349), ('merchant', 0.12195145441089471), ('meet', 0.06886034701000629), ('limit', 0.14838859399185625), ('known', 0.11559806345974671),

[('would', 0.06043669446177073), ('william', 0.13549154684136283), ('whether', 0.10545343234290297), ('well', 0.09316370519491883), ('way', 0.10243727693370969), ('upon', 0.14153315601206554), ('u.s.', 0.12835695492900978), ('tri', 0.11041726995707088), ('told', 0.07374931757611294), ('thu', 0.14254080306918235), ('though', 0.13101487506313464), ('think', 0.1109380161631682), ('stronger', 0.14009936486646962), ('strike', 0.12380702694577239), ('stress', 0.13511475429621908), ('stock', 0.06772679662609811), ('staff', 0.1383160058464691), ('said', 0.06989224133062825), ('risk', 0.12498353499561725), ('retali', 0.13070383373583983), ('respons', 0.18401401157425198), ('report', 0.06911376290556588), ('probabl', 0.11133640243921797), ('presid', 0.08744913908102944), ('predict', 0.1968127589674452), ('order', 0.10684525480172884), ('option', 0.10965745660953355), ('oper', 0.06795226902912625), ('oil', 0.07744067495726578), ('move', 0.09238220699824769), ('mine', 0.10630000068117526), ('milit

[('south', 0.2514109446481385), ('shr', 0.25177891155510407), ('qtr', 0.1585122562077482), ('nine', 0.21134664750702722), ('net', 0.2274221775743723), ('nation', 0.1897953635748642), ('mth', 0.2352584128201972), ('mln', 0.24237823080315782), ('loan', 0.23131604863777094), ('dlr', 0.1806856892385195), ('deposit', 0.2756555532380975), ('corp', 0.13913892320302082), ('carolina', 0.40943244448360194), ('billion', 0.44986520750552006), ('asset', 0.22417812831698206)]
[('stock', 0.161525711144982), ('split', 0.244745702059184), ('shr', 0.2487344433825535), ('share', 0.23732073649136154), ('reflect', 0.234378093278953), ('qtr', 0.15659555271577189), ('public', 0.23717455055985526), ('per', 0.1726427801483805), ('offer', 0.18513861055095845), ('note', 0.15428616120635594), ('nine', 0.20879107945831246), ('net', 0.2784754342449249), ('mth', 0.23241370773456457), ('marin', 0.3399541352891087), ('march', 0.16046567285345162), ('june', 0.23547975462464196), ('januari', 0.19263552644937348), ('init

[('transact', 0.15439995851613894), ('told', 0.1142256230328736), ('subsidiari', 0.1400155290320432), ('stock', 0.22013965208053593), ('stake', 0.24288708480797505), ('share', 0.1910282467078522), ('secur', 0.1297116403916964), ('sec', 0.22979204873607792), ('said', 0.08733674411135203), ('repres', 0.15139233754460077), ('purchas', 0.1297116403916964), ('price', 0.10278142553290964), ('pct', 0.16418180310641084), ('outstand', 0.14664319857677918), ('opportun', 0.1988013069807535), ('open', 0.14735795890975328), ('octob', 0.17732332702357076), ('mln', 0.06516442611415456), ('market', 0.16788949770570602), ('invest', 0.1264000730641756), ('inc', 0.08165268769391097), ('file', 0.16269204694546016), ('favor', 0.20702678618196843), ('exchang', 0.11866859667173137), ('entir', 0.20243884732700354), ('dlr', 0.14368195724134547), ('current', 0.1142256230328736), ('cost', 0.13581632477440767), ('corp', 0.0892667280967892), ('compani', 0.08406277603973854), ('common', 0.27770460842962763), ('comm

[('year', 0.02165536205677324), ('would', 0.04799534002962468), ('world', 0.04099417015829664), ('widen', 0.0671006275797516), ('whose', 0.06447954631677326), ('weigh', 0.07743635853093793), ('week', 0.03556367516441259), ('water', 0.0631994158023364), ('war', 0.06174812352258407), ('view', 0.055242958716378285), ('vessel', 0.06285974604963884), ('upward', 0.062369352456832235), ('unchang', 0.0520336215056459), ('unabl', 0.07056175707407379), ('u.s.', 0.1146270811170994), ('trader', 0.04884535690696708), ('total', 0.03477860131717519), ('took', 0.09760567374886145), ('today', 0.03519024048043409), ('thursday', 0.06372969401778235), ('threat', 0.06507796611888904), ('thought', 0.1374507917129714), ('tension', 0.07126319705483519), ('tanker', 0.14669899155245833), ('take', 0.039091154297587964), ('suppli', 0.09274954983981266), ('struck', 0.12906437236427815), ('strong', 0.04830631019772394), ('strike', 0.1385711968139399), ('strait', 0.0741510045250858), ('stockbrok', 0.1311111521194866

[('shr', 0.16213614597063045), ('share', 0.1546961854190871), ('seven', 0.14993105523022407), ('sale', 0.1709913874556025), ('result', 0.12161730351895579), ('restructur', 0.1737162892691591), ('quarter', 0.12704591821412275), ('qtr', 0.10207592904383748), ('profit', 0.22999978324464895), ('product', 0.11024522913977677), ('prior', 0.1284657701521031), ('per', 0.1905403901443348), ('oper', 0.10564014959574071), ('octob', 0.17798581275183858), ('note', 0.10057056519562241), ('nine', 0.28561964073079643), ('net', 0.18152264335393115), ('mth', 0.15149756636185235), ('month', 0.18490037264098705), ('mln', 0.17067780845536035), ('medic', 0.21436821126669303), ('loss', 0.21905637931024333), ('includ', 0.10438153845428401), ('inc', 0.13876652452637345), ('group', 0.113320239128515), ('gould', 0.2886290787020019), ('figur', 0.23314742159495846), ('extraordinari', 0.16459235202341202), ('dlr', 0.06872101078153443), ('current', 0.1146523736828415), ('cost', 0.13632373898966696), ('compani', 0.08

[('use', 0.32197637616733443), ('unchang', 0.40051031836970963), ('u.s.', 0.22081090001349227), ('septemb', 0.3749130528277278), ('rate', 0.2710664455885648), ('pct', 0.18235622523043557), ('industri', 0.2525407135431666), ('capac', 0.43116743005878166), ('august', 0.43611061874193335)]
[('york', 0.12260430994713406), ('util', 0.2687842069233306), ('tax', 0.1007271554103675), ('stock', 0.08106030747621454), ('shr', 0.12482526972079043), ('share', 0.07034084168133281), ('save', 0.22146734965020598), ('respect', 0.2169691857845919), ('qtr', 0.18752968494735278), ('per', 0.08663931421923962), ('oper', 0.13770394648126782), ('note', 0.07742720077230594), ('nine', 0.25003599828727124), ('new', 0.0805762778306678), ('net', 0.18590850613326168), ('name', 0.12349187992312358), ('n.y.', 0.18477043536442075), ('mth', 0.2783250777754643), ('mln', 0.15506890550933086), ('loss', 0.13606332289329376), ('loan', 0.11468032800530194), ('includ', 0.1686469816324238), ('gain', 0.16809108556876418), ('ful

[('would', 0.05423389302443528), ('transact', 0.08945646388578088), ('total', 0.06653934648730453), ('third', 0.15564057055032235), ('talk', 0.08268963226645105), ('subject', 0.09119978619749622), ('stock', 0.127544819476829), ('stake', 0.08311401423181192), ('sharehold', 0.15927745802654275), ('share', 0.1376185233997137), ('seek', 0.208206716596174), ('secur', 0.07515251160545033), ('seat', 0.15075594563606265), ('said', 0.08343433932176912), ('restructur', 0.10027336206208523), ('recapit', 0.15075594563606265), ('reach', 0.08771390424128202), ('purchas', 0.12724426313676682), ('proxi', 0.14815330392429146), ('privat', 0.08997173967359055), ('possibl', 0.20387615896928762), ('pct', 0.04532703977650333), ('parti', 0.18640805416016984), ('open', 0.08537646030594523), ('one', 0.0568507942907132), ('octob', 0.10273783718881303), ('mln', 0.03775505633124361), ('media', 0.40134150929717927), ('may', 0.1358559141433885), ('market', 0.0574504408610928), ('led', 0.10358095245445417), ('join',

[('year', 0.19733197910059364), ('shr', 0.21240183775048874), ('rev', 0.2437273890273235), ('qtr', 0.13372166205879174), ('profit', 0.24309070405213684), ('pre-tax', 0.2521440729204622), ('oper', 0.13839086762575664), ('note', 0.13174960304671957), ('net', 0.19185438591207313), ('mln', 0.2638642045942474), ('loss', 0.4028298222555005), ('inventori', 0.2608417927469407), ('includ', 0.23152443329477737), ('dlr', 0.2148285406419011), ('discontinu', 0.22257211541165295), ('corp', 0.11737823000508811), ('charg', 0.34954037076647254), ('also', 0.13034934784528227), ('allow', 0.20124188933288692)]
[('worldwid', 0.16316782762150497), ('western', 0.14346087887062428), ('west', 0.27681797729788243), ('weekend', 0.1684040766690103), ('u.s.', 0.07580565757310405), ('treasuri', 0.12883023572839392), ('struck', 0.20142920063063238), ('still', 0.11670666042168215), ('stabilis', 0.16086901249955554), ('spirit', 0.20298200349638287), ('sourc', 0.19337045560374022), ('short-term', 0.15773282444480927), 

[('warrant', 0.1270807928305093), ('valu', 0.08861257562421541), ('use', 0.08608956308348857), ('unit', 0.06758907723180854), ('time', 0.13775151246033424), ('target', 0.09861949368745791), ('subsidiari', 0.08726306956859166), ('strateg', 0.13393377901047748), ('stock', 0.06537630095053691), ('step', 0.11386423465648048), ('spokesman', 0.08497456607114064), ('spin', 0.1621676343968994), ('specif', 0.11842944488460605), ('sharehold', 0.13823134968552356), ('servic', 0.14783801827841953), ('seriou', 0.1241717610822362), ('sell', 0.07892123941191644), ('second', 0.09493644788698342), ('sale', 0.13159715524700383), ('said', 0.08388871340534265), ('return', 0.10324390037257021), ('restructur', 0.10786369884135642), ('repurchas', 0.1171978663594621), ('reorgan', 0.14079472959357617), ('realiz', 0.1366211700961881), ('rather', 0.11660660657926844), ('railroad', 0.15573275433331407), ('program', 0.2027731826369881), ('proce', 0.12363274642304745), ('price', 0.06405734241510251), ('plan', 0.124

[('tender', 0.2604472248177535), ('share', 0.19856983908954345), ('said', 0.09078470605092427), ('restaur', 0.40626292792511876), ('respons', 0.1749755652699723), ('receiv', 0.1507878691526596), ('presid', 0.14079160571312216), ('offer', 0.12497901475110289), ('octob', 0.18432386372546647), ('novemb', 0.18850242436404618), ('martin', 0.24105915106091666), ('manag', 0.2394315134281837), ('major', 0.2222608384195711), ('led', 0.18583651249812158), ('industri', 0.11262098282353672), ('inc', 0.0848762491203987), ('group', 0.19870023160742523), ('give', 0.15642161875160843), ('extend', 0.1817005736992045), ('dlr', 0.07116815677906337), ('compani', 0.08738148519553508), ('class', 0.43462676405646167), ('chairman', 0.1445942965097454), ('associ', 0.14969781600686205)]
[('year', 0.13328375837118295), ('technolog', 0.3341446417907378), ('shr', 0.1778179212502252), ('share', 0.16965836921733823), ('sept', 0.4849755135915134), ('sale', 0.3175152810722934), ('qtr', 0.18954561269634057), ('prior', 

[('year', 0.05021177289858925), ('would', 0.04663534185261026), ('war', 0.05999842583583069), ('want', 0.04576961937192856), ('vulner', 0.07463849252829641), ('volum', 0.13944438026925787), ('visit', 0.06124228251274708), ('unsuccess', 0.07204985822805655), ('unlik', 0.09433620241831144), ('unilater', 0.07204985822805655), ('two', 0.02981392687214659), ('trader', 0.0996028099401362), ('trade', 0.031025226430140034), ('tonn', 0.09025102307073223), ('term', 0.10707710112359152), ('talk', 0.08813186182694825), ('supplier', 0.1592791308312254), ('suppli', 0.13729946134460352), ('summer', 0.061923846457740614), ('suggest', 0.0540885608841238), ('substitut', 0.07352582481554808), ('subject', 0.04631732752086004), ('strait', 0.07204985822805655), ('state', 0.036363365352460944), ('stabl', 0.059709076891862466), ('spot', 0.10287309555821242), ('southeast', 0.0696018997104449), ('sourc', 0.14269564621427122), ('sometim', 0.07301057258816412), ('sluggish', 0.0699724842535059), ('slightli', 0.052

[('year', 0.08667835535484808), ('worst', 0.10440202354430862), ('unlik', 0.0822119219417205), ('unemploy', 0.09111357231731235), ('two', 0.04399165547659668), ('tin', 0.1027004193613651), ('three', 0.050322259933200574), ('tax', 0.09582182599584677), ('take', 0.056046140348941446), ('surplu', 0.06951156152379158), ('studi', 0.07698944965371128), ('strongli', 0.09360101814754823), ('strateg', 0.0933041985651461), ('stood', 0.08669474421128923), ('state', 0.05365561696770534), ('spend', 0.1335057232700621), ('slash', 0.10928961030934632), ('sharpli', 0.07618501004119405), ('seven', 0.06485409847957616), ('semiconductor', 0.08688831883628789), ('sector', 0.12333845003930506), ('said', 0.07160457519126309), ('rubber', 0.09301292733848089), ('risen', 0.0942121771428011), ('ringgit', 0.28970707763035736), ('repay', 0.08650354479503268), ('recoveri', 0.08748390306070643), ('recov', 0.08942077930617418), ('recess', 0.09848860491258593), ('receiv', 0.06298194290138007), ('public', 0.0668740818

[('zealand', 0.19661932348723798), ('year', 0.0677359185338042), ('would', 0.10082070728480635), ('widespread', 0.1067643758064951), ('valu', 0.0641742160947401), ('urg', 0.08349061405990468), ('unsuccess', 0.11051937048652939), ('unlik', 0.08546512842441249), ('turn', 0.08737311631348144), ('thought', 0.09761945930269023), ('tax', 0.16424867585781772), ('tariff', 0.08233761714019604), ('take', 0.05826394115941628), ('speech', 0.09610441441526174), ('solv', 0.10196511429997898), ('sinc', 0.0601614407515815), ('short-term', 0.08896776418174233), ('servic', 0.06323487041816948), ('said', 0.0649979674423363), ('restor', 0.09894257593922237), ('reserv', 0.06133533500724791), ('relianc', 0.1163981397250464), ('reduc', 0.10538044118776003), ('ratio', 0.0982675126791015), ('rate', 0.08887143254938358), ('product', 0.04957466428483597), ('problem', 0.07246260698163014), ('probabl', 0.0778327196505323), ('primarili', 0.10001104053629296), ('price', 0.04639104219286648), ('prefer', 0.07556462097

In [43]:
# List of document ids
documents = reuters.fileids()
 
train_docs_id = list(filter(lambda doc: doc.startswith("train"),
                            documents))
test_docs_id = list(filter(lambda doc: doc.startswith("test"),
                           documents))
 
train_docs = [reuters.raw(doc_id) for doc_id in train_docs_id]
test_docs = [reuters.raw(doc_id) for doc_id in test_docs_id]

In [44]:
%%time
# Tokenisation
vectorizer = TfidfVectorizer(stop_words=cachedStopWords,
                             tokenizer=tokenize)
 
# Learn and transform train documents
vectorised_train_documents = vectorizer.fit_transform(train_docs)
vectorised_test_documents = vectorizer.transform(test_docs)
 
# Transform multilabel labels
mlb = MultiLabelBinarizer()
train_labels = mlb.fit_transform([reuters.categories(doc_id)
                                  for doc_id in train_docs_id])
test_labels = mlb.transform([reuters.categories(doc_id)
                             for doc_id in test_docs_id])

CPU times: user 49.1 s, sys: 12.3 ms, total: 49.1 s
Wall time: 49.1 s


MultiLabelBinarizer API
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MultiLabelBinarizer.html

# Incremental PCA

In [45]:
%%time
n = vectorised_train_documents.shape[0] #how many rows we have in the dataset

n_components = 2400
chunk_size = n_components + 100

ipca = iPCA(n_components=n_components) 

vectorised_train_documents_arr = vectorised_train_documents.toarray()
vectorised_test_documents_arr = vectorised_test_documents.toarray()
print(vectorised_train_documents_arr.shape)

for i in tqdm_notebook(range(0, n // chunk_size)):
    ipca.partial_fit(vectorised_train_documents_arr[i * chunk_size : (i + 1) * chunk_size])

ipca.partial_fit(vectorised_train_documents_arr[(i + 1) * chunk_size:])
    
print(np.sum(ipca.explained_variance_ratio_))

(7769, 20682)


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))


0.8638125559454235
CPU times: user 23min 51s, sys: 1min 18s, total: 25min 10s
Wall time: 6min 36s


In [46]:
%%time
vectorised_train_ipca = ipca.transform(vectorised_train_documents_arr)
vectorised_test_ipca = ipca.transform(vectorised_test_documents_arr)

print(np.shape(vectorised_train_ipca), np.shape(vectorised_train_documents))

(7769, 2400) (7769, 20682)
CPU times: user 1min 27s, sys: 2.5 s, total: 1min 30s
Wall time: 23.7 s


# Modeling and scoring

## Helper functions

In [47]:
def classifier_f(clf, X_train, y_train, X_test):
    # Classifier
    classifier = OneVsRestClassifier(clf)
    classifier.fit(X_train, y_train)

    predictions = classifier.predict(X_test)
    return predictions



def eval_f(test_labels, predictions):
    precision = precision_score(test_labels, predictions,
                                average='micro')
    recall = recall_score(test_labels, predictions,
                          average='micro')
    f1 = f1_score(test_labels, predictions, average='micro')

    print("Micro-average quality numbers")
    print("Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}"
            .format(precision, recall, f1))

    precision = precision_score(test_labels, predictions,
                                average='macro')
    recall = recall_score(test_labels, predictions,
                          average='macro')
    f1 = f1_score(test_labels, predictions, average='macro')

    print("Macro-average quality numbers")
    print("Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}"
            .format(precision, recall, f1))

## Logistic regression

In [49]:
C_values = [50, 100, 200, 500]

for c in tqdm_notebook(C_values):
    
    predictions_LR = classifier_f( LogisticRegression( C=c, random_state=42), vectorised_train_ipca, train_labels, vectorised_test_ipca)
    print("C = {0:2.2f}".format(c))
    eval_f(test_labels, predictions_LR)
    print("\n")

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))




Exception in thread Thread-5:
Traceback (most recent call last):
  File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.5/dist-packages/tqdm/_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "/usr/lib/python3.5/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration



C = 50.00
Micro-average quality numbers
Precision: 0.9416, Recall: 0.7925, F1-measure: 0.8606
Macro-average quality numbers
Precision: 0.6184, Recall: 0.3612, F1-measure: 0.4328


C = 100.00
Micro-average quality numbers
Precision: 0.9399, Recall: 0.7975, F1-measure: 0.8629
Macro-average quality numbers
Precision: 0.6297, Recall: 0.3726, F1-measure: 0.4455


C = 200.00
Micro-average quality numbers
Precision: 0.9391, Recall: 0.7994, F1-measure: 0.8637
Macro-average quality numbers
Precision: 0.6222, Recall: 0.3800, F1-measure: 0.4491


C = 500.00
Micro-average quality numbers
Precision: 0.9361, Recall: 0.8024, F1-measure: 0.8641
Macro-average quality numbers
Precision: 0.6190, Recall: 0.3866, F1-measure: 0.4542





## SVM classifiers

SV Classifier sclearn API
https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

In [50]:
%%time

predictions_LinearSVC = classifier_f( LinearSVC(random_state=42), vectorised_train_documents, train_labels, vectorised_test_documents)
eval_f(test_labels, predictions_LinearSVC)

Micro-average quality numbers
Precision: 0.9455, Recall: 0.8013, F1-measure: 0.8674
Macro-average quality numbers
Precision: 0.6493, Recall: 0.3948, F1-measure: 0.4665
CPU times: user 1.98 s, sys: 7.77 ms, total: 1.99 s
Wall time: 1.99 s


In [51]:
%%time

predictions_LinearSVC_ipca = classifier_f( LinearSVC(random_state=42), vectorised_train_ipca, train_labels, vectorised_test_ipca)
eval_f(test_labels, predictions_LinearSVC_ipca)

Micro-average quality numbers
Precision: 0.9479, Recall: 0.8021, F1-measure: 0.8689
Macro-average quality numbers
Precision: 0.6189, Recall: 0.3745, F1-measure: 0.4439
CPU times: user 52.2 s, sys: 14.4 s, total: 1min 6s
Wall time: 1min 4s


In [52]:
C_values = [ 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, 50, 100, 200]

for c in tqdm_notebook(C_values):
    
    predictions_LinearSVC = classifier_f( LinearSVC( C=c, random_state=42), vectorised_train_documents, train_labels, vectorised_test_documents)
    print("C = {0:2.2f}".format(c))
    eval_f(test_labels, predictions_LinearSVC)
    print("\n")

HBox(children=(IntProgress(value=0, max=11), HTML(value='')))

C = 0.05
Micro-average quality numbers
Precision: 0.9766, Recall: 0.6140, F1-measure: 0.7540
Macro-average quality numbers
Precision: 0.2671, Recall: 0.1041, F1-measure: 0.1354


C = 0.10
Micro-average quality numbers
Precision: 0.9680, Recall: 0.6782, F1-measure: 0.7975
Macro-average quality numbers
Precision: 0.3845, Recall: 0.1623, F1-measure: 0.2102


C = 0.50
Micro-average quality numbers
Precision: 0.9542, Recall: 0.7839, F1-measure: 0.8607
Macro-average quality numbers
Precision: 0.5660, Recall: 0.3419, F1-measure: 0.4095


C = 1.00
Micro-average quality numbers
Precision: 0.9455, Recall: 0.8013, F1-measure: 0.8674
Macro-average quality numbers
Precision: 0.6493, Recall: 0.3948, F1-measure: 0.4665


C = 2.00
Micro-average quality numbers
Precision: 0.9397, Recall: 0.8120, F1-measure: 0.8712
Macro-average quality numbers
Precision: 0.6791, Recall: 0.4379, F1-measure: 0.5078


C = 5.00
Micro-average quality numbers
Precision: 0.9351, Recall: 0.8154, F1-measure: 0.8712
Macro-averag

In [53]:
%%time

kernels = [ "rbf", "sigmoid"]  
C_values = [5e03, 1e04, 2e04, 5e04, 1e05, 2e05, 5e05]

for kern in kernels:
    for c in tqdm_notebook(C_values):
    
        predictions_SVC = classifier_f( SVC( C=c, kernel=kern, random_state=42), vectorised_train_documents, train_labels, vectorised_test_documents)
        print("Kernel: {}, C = {:.2f}".format(kern, c))
        eval_f(test_labels, predictions_SVC)
        print("\n")

Kernel: rbf, C = 5000.00
Micro-average quality numbers
Precision: 0.9577, Recall: 0.7559, F1-measure: 0.8449
Macro-average quality numbers
Precision: 0.4732, Recall: 0.2641, F1-measure: 0.3236


Kernel: rbf, C = 10000.00
Micro-average quality numbers
Precision: 0.9472, Recall: 0.7957, F1-measure: 0.8649
Macro-average quality numbers
Precision: 0.5765, Recall: 0.3701, F1-measure: 0.4335


Kernel: rbf, C = 20000.00
Micro-average quality numbers
Precision: 0.9369, Recall: 0.8176, F1-measure: 0.8732
Macro-average quality numbers
Precision: 0.6714, Recall: 0.4362, F1-measure: 0.5057


Kernel: rbf, C = 50000.00
Micro-average quality numbers
Precision: 0.9298, Recall: 0.8170, F1-measure: 0.8698
Macro-average quality numbers
Precision: 0.6871, Recall: 0.4629, F1-measure: 0.5316


Kernel: rbf, C = 100000.00
Micro-average quality numbers
Precision: 0.9261, Recall: 0.8170, F1-measure: 0.8682
Macro-average quality numbers
Precision: 0.6905, Recall: 0.4625, F1-measure: 0.5299


Kernel: rbf, C = 200

In [54]:
%%time

predictions_SVC_poly = classifier_f( SVC( C=1e04, gamma=0.01, kernel="poly", random_state=42), vectorised_train_documents, train_labels, vectorised_test_documents)
eval_f(test_labels, predictions_SVC_poly)

Micro-average quality numbers
Precision: 1.0000, Recall: 0.0331, F1-measure: 0.0641
Macro-average quality numbers
Precision: 0.0111, Recall: 0.0013, F1-measure: 0.0023
CPU times: user 4min 51s, sys: 1.21 s, total: 4min 52s
Wall time: 4min 52s


understanding parameters

https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html

## Naive Bayes classifiers.

In [55]:
%%time

predictions_GaussianNB_ipca = classifier_f( GaussianNB(), vectorised_train_ipca, train_labels, vectorised_test_ipca)

CPU times: user 33.1 s, sys: 14.5 s, total: 47.6 s
Wall time: 47.6 s


In [56]:
eval_f(test_labels, predictions_GaussianNB_ipca)

Micro-average quality numbers
Precision: 0.0806, Recall: 0.6854, F1-measure: 0.1442
Macro-average quality numbers
Precision: 0.1823, Recall: 0.4474, F1-measure: 0.1728


In [57]:
%%time

predictions_GaussianNB = classifier_f(  GaussianNB(), vectorised_train_documents_arr, train_labels, vectorised_test_documents_arr)

CPU times: user 4min 42s, sys: 2min 7s, total: 6min 50s
Wall time: 6min 50s


In [58]:
eval_f(test_labels, predictions_GaussianNB)

Micro-average quality numbers
Precision: 0.3563, Recall: 0.5200, F1-measure: 0.4228
Macro-average quality numbers
Precision: 0.0998, Recall: 0.1697, F1-measure: 0.1230


In [59]:
%%time

predictions_MultinomialNB = classifier_f( MultinomialNB(), vectorised_train_documents_arr, train_labels, vectorised_test_documents_arr)

CPU times: user 1min 23s, sys: 3.44 s, total: 1min 26s
Wall time: 1min 26s


In [60]:
eval_f(test_labels, predictions_MultinomialNB)

Micro-average quality numbers
Precision: 0.9938, Recall: 0.3397, F1-measure: 0.5064
Macro-average quality numbers
Precision: 0.0554, Recall: 0.0154, F1-measure: 0.0188


In [61]:
%%time

predictions_BernoulliNB = classifier_f( BernoulliNB(), vectorised_train_documents_arr, train_labels, vectorised_test_documents_arr)

CPU times: user 3min 37s, sys: 1min 27s, total: 5min 4s
Wall time: 5min 4s


In [62]:
eval_f(test_labels, predictions_BernoulliNB)

Micro-average quality numbers
Precision: 0.5976, Recall: 0.5296, F1-measure: 0.5616
Macro-average quality numbers
Precision: 0.0631, Recall: 0.0525, F1-measure: 0.0520


In [63]:
%%time

predictions_BernoulliNB_ipca = classifier_f( BernoulliNB(), vectorised_train_ipca, train_labels, vectorised_test_ipca)

CPU times: user 55 s, sys: 9.1 s, total: 1min 4s
Wall time: 1min 4s


In [64]:
eval_f(test_labels, predictions_BernoulliNB_ipca)

Micro-average quality numbers
Precision: 0.8668, Recall: 0.7316, F1-measure: 0.7935
Macro-average quality numbers
Precision: 0.6909, Recall: 0.5161, F1-measure: 0.5718
