In [1]:
import numpy as np
from collections import defaultdict, Counter
import os.path
import os
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import datetime
from operator import itemgetter

In [136]:
def weight_normalized_cnb(complement_probs, idf, vectorized_text, 
                          prior_probs):
    '''
    :param complement_probs: dictionary where key = label and values = dictionary where
                            keys = words and values = (# of times word w appears in docs
                            NOT labeled l)/(# of words in documents NOT labeled l)
    :param idf: dictionary where keys = words and values = (total # docs)/(# of docs in 
                which we see that word)
    :param vectorized_text: words from text that are in valid_words
    :param prior_probs: dictionary where keys = labels and values = the probability
                        of seeing that label in the dataset
     :param normalize_comp: dictionary where keys = labels and values = sum(|log(p)| for 
                            p in complement_probs)
    '''
    labels = []
    freq = Counter(vectorized_text)
    doc_len_discount = np.sqrt(sum([v**2 for v in freq.values()]))
    for label in prior_probs.keys():
        prob = prior_probs[label]
        conditional = 0.0
        for word in freq.keys():
            conditional -= ((freq[word] * idf[word]) * complement_probs[label][word])
        prob += conditional
        labels.append((label, prob))
    return sorted(labels, key=itemgetter(1), reverse=True)

In [138]:
def complement_naive_bayes(complement_probs, idf, vectorized_text, prior_probs):
    '''
    :param complement_probs: dictionary where key = label and values = dictionary where
                            keys = words and values = (# of times word w appears in docs
                            NOT labeled l)/(# of words in documents NOT labeled l)
    :param vectorized_text: words from text that are in valid_words
    :param prior_probs: dictionary where keys = labels and values = the probability
                        of seeing that label in the dataset
    '''
    labels = []
    for label in prior_probs.keys():
        prob = np.log(prior_probs[label])
        conditional = 0.0
        freq = Counter(vectorized_text)
        for word in freq.keys():
            conditional -= (freq[word]*idf[word] * complement_probs[label][word])
        prob += conditional
        labels.append((label, prob))
    return sorted(labels, key=itemgetter(1), reverse=True)

In [139]:
def multinomial_naive_bayes(conditional_probs, idf, vectorized_text, prior_probs):
    '''
    :param conditional_probs: dictionary where keys = labels and values = dictionary where
                    keys = words and values = P(x|Y)
    :param vectorized_text: words from text that are in valid_words
    :param prior_probs: dictionary where keys = labels and values = the probability
                        of seeing that label in the dataset
    '''
    labels = []
    freq = Counter(vectorized_text)
    for label in prior_probs.keys():
        prob = np.log(prior_probs[label])
        conditional = 0.0
        for word in vectorized_text:
            if conditional_probs[label][word] != 0.0:
                conditional += (freq[word]*idf[word] * conditional_probs[label][word])
        prob += conditional
        labels.append((label, prob))
    return sorted(labels, key=itemgetter(1), reverse=True)

In [5]:
def bayes_accuracy_model(num, number_labels, labels):
    '''
    :param num: the number of the document being checked, so we can check
                the correct labels for it
    :param number_labels: dictionary where keys = number of sample and
                            values = the set of labels associated with
                            that sample
    :param labels: the set of labels computed by Naive Bayes
    rye 0.00012871669455528383
    groundnut-oil 0.00012871669455528383
    cotton-oil 0.00012871669455528383
    castor-oil 0.00012871669455528383
    nkr 0.00012871669455528383
    sun-meal 0.00012871669455528383
    '''
    sample_labels = number_labels[num]
    successes = 0
    earned = 0
    bottom_5_times = 0
    bottom_5 = ['rye', 'groundnut-oil', 'cotton-oil', 'castor-oil', 'nkr', 'sun-meal']
    computed_labels = [x for x,y in labels]
    if "earn" in computed_labels[:3]:
        earned += 1
    computed_labels_trim = computed_labels[:len(sample_labels)]
    for label in bottom_5:
        if label in computed_labels[:5]:
            bottom_5_times += 1
            break
    if all(x in computed_labels_trim for x in sample_labels):
        successes += 1
    else:
        print(num)
        # print(sample_labels, computed_labels[:10])
        print(sample_labels, labels[:10])
        diff = set(sample_labels).difference(set(computed_labels_trim))
        if len(diff) < len(computed_labels_trim):
            successes += (len(diff)/len(computed_labels_trim))
    return [successes,earned, bottom_5_times]

In [6]:
def vectorize_text(stop_words, valid_words, filepath):
    '''
    This function removes non valid words from the text to put it into
    the Naive Bayes classifier
    :param stop_words: a set of words like "the", "and", etc
                        that should be stripped out of any computations
    :param valid_words: dictionary where keys = valid words in the corpus
    :param filepath: path to the text file
    :return: a vector of text stripped of stop words and non-valid words
    '''
    with open(filepath, "r") as f:
        content = f.read()
        words = nltk.word_tokenize(content)
        words = [word.lower() for word in words]
        new_words = [word.lower() for word in words if word in valid_words]
    return new_words

In [7]:
def cosine_similarity(avg_tf_idf, tf_idf_vector):
    '''
    This function takes the average TF-IDF vector for 
    every unique label and computes the cosine similarity between in
    and the tf-idf vector for a given sample. 
    :param avg_tf_idf: dictionary where keys = labels and values = dictionary
                        where keys = words and values = the average tf-idf score
                        for that term in documents with that specific label
    :param tf_idf_vector: numpy array 
    '''
    labels = []
    for label in avg_tf_idf.keys():
        # Cosine similarity = (a * b)/(|a| * |b|)
        # Higher cosine similarity = more similar documents
        vector = np.asarray(list(avg_tf_idf[label].values()))
        similarity = np.dot(vector, tf_idf_vector)
        mag_a = np.sqrt(np.dot(vector, vector))
        mag_b = np.sqrt(np.dot(tf_idf_vector, tf_idf_vector))
        denom = np.dot(mag_a, mag_b)
        similarity /= denom
        labels.append((label, similarity))
    return sorted(labels, key=itemgetter(1), reverse=True)

In [8]:
def compute_total_word_frequencies(dir_path, valid_words):
    '''
    :param dir_path: a path to the directory containing all the training samples
    :param valid_words: a dictionary where the keys are all the unique, valid
                        terms are present in the text file
    :param st: Lancaster Stemmer object 
    :return: a dictionary where keys = words and values = # of documents in which
            that word appears 
    '''
    frequencies = {word: 0 for word in valid_words}
    for file in os.listdir(dir_path):
        with open(dir_path + '\\' + file, "r") as f:
            content = f.read()
            num = int(file[0:len(file) - 4]) 
            words = nltk.word_tokenize(content)
            new_words = [word.lower() for word in words if word not in stop_words]
            new_words = [word.lower() for word in new_words if word in valid_words.keys()]
            new_words = set(new_words)
            for word in new_words:
                frequencies[word] += 1
    return frequencies

In [9]:
def compute_tf_idf_by_label(tf_idf, prior_probs, number_labels):
    '''
    This function will compute the total tf_idf score for
    each individual label
    :param tf_idf: a dictionary where keys = number of document and values = 
                    dictionary where keys = words and values = the tf_idf score 
                    of that word in that document
    :param prior_probs: a dictionary where keys = labels and values = the prob
                        of seeing that label (only used so I can grab the unique
                        labels for the document set)
    :param number_labels: a dictionary where the keys = numbers of a document and values
                            = the set of labels associated with it
    :return: a dictionary where keys = labels and values = sum of all tf-idf scores for
            all words that are in that label
    '''
    total_tf_by_label = {label: 0.0 for label in prior_probs.keys()}
    for num, vector in tf_idf.items():
        labels = number_labels[num]
        for l in labels:
            total_tf_by_label[l] += sum(list(vector.values()))
    return total_tf_by_label

In [10]:
def get_valid_words(dir_path, stop_words):
    '''
    Utility function that determines the set of valid words 
    to be used for classification and probability calculation
    :param dir_path: a path to the directory containing 
                    all the training samples
    :param stop_words: a set of words like "the", "and", etc
                        that should be stripped out of any computations
    :return: a Python dictionary where the keys = valid words and the 
            values = True, so we can use "key in dict" for future access
            in guaranteed constant time
    '''
    valid_words = defaultdict(bool)
    for file in os.listdir(dir_path):
        with open(dir_path + '\\' + file, "r") as f:
            content = f.read()
            words = nltk.word_tokenize(content)
            new_words = [word.lower() for word in words]
            new_words = [word.lower() for word in new_words if word.isalpha()]
            new_words = [word.lower() for word in new_words if word not in stop_words]
            new_words = set(new_words)
            for word in new_words:
                valid_words[word] = True
    return valid_words
            

In [11]:
def add_labels_to_samples(filename):
    '''
    This function iterates over the file containing all 
    labels for each numbered sample, and maps them together with
    a dictionary
    :param filename: path to the file with all the labels in it (assumes
                    the file is located in this directory)
    :return: a dictionary with keys = number of the training sample and
            values = the set of labels associated with it
            AND
            the same, but with the test samples. Keep them separate for easy
            access later
    '''
    number_labels_training = defaultdict(list)
    number_labels_test = defaultdict(list)
    with open(filename, "r") as f:
        for line in f:
            terms = line.split()
            if line[0:4] == "test":
                num = int(terms[0][5:len(terms[0])])  # Test number, so we can map this back to the proper label(s) later on
                number_labels_test[num] = terms[1:]
            else:
                num = int(terms[0][9:len(terms[0])])  
                number_labels_training[num] = terms[1:]
    return [number_labels_training, number_labels_test]

In [12]:
def compute_prior_probabilities(number_labels):
    '''
    This function will compute the prior probabilities
    P(y) = probability of seeing a label with a sample. 
    Note: since many samples have multiple labels, these prior
    probabilites will sum to > 1
    :param number_labels: dictionary where keys = number of training sample
                            and value = the list of labels associated with it
    :return: a dictionary where keys = the label and value = probability of seeing
            that label in the document list
    '''
    prior_probs = defaultdict(float)
    i = 0
    for num, labels in number_labels.items():
        for l in labels:
            if not prior_probs[l]:
                prior_probs[l] = 1
            else:
                prior_probs[l] += 1
        i += 1
    for label, freq in prior_probs.items():
        prior_probs[label] /= i
    return prior_probs

In [123]:
def word_vectors_for_mega_docs(dir_path, valid_words, number_labels, label_list):
    '''
    This function returns a list of all the words from documents
    of each label
    :param dir_path: a path to the directory containing all the training samples
    :param valid_words: a dictionary where the keys are all the unique, valid
                        terms are present in the text file
    :param number_labels: dictionary where keys = document # and values = the set of 
                            labels associated with those labels
    :param label_list: list of all the unique labels
    :return: a dictionary where keys = labels and values = a dictionary where 
            keys = words and values = a vector with all the valid words in 
            documents with that label
            AND
            the total # of words in the entire corpus
            AND
            a dictionary where keys = words and values = idf scores of those words
    '''
    mega_docs = {label: [] for label in label_list}
    total_words = 0
    idf = {word: 0 for word in valid_words}
    i = 0
    for file in os.listdir(dir_path):
        with open(dir_path + '\\' + file, "r") as f:
            i += 1
            content = f.read()
            num = int(file[0:len(file) - 4])
            labels = number_labels[num]
            words = nltk.word_tokenize(content)
            words = [word.lower() for word in words]
            new_words = [word.lower() for word in words if word in valid_words]
            unique_words = set(new_words)
            for word in unique_words:
                idf[word] += 1
            total_words += len(new_words)
            for l in labels:
                mega_docs[l] += new_words
    for word in idf.keys():
        idf[word] = 1 + np.log(i/(idf[word] + 1))
    return [mega_docs, total_words, idf]

In [14]:
def compute_tf_distributions(dir_path, valid_words, number_labels, label_list):
    '''
    This function creates one "mega document" for each class and computes
    the tf scores of that document
    :param dir_path: a path to the directory containing all the training samples
    :param valid_words: a dictionary where the keys are all the unique, valid
                        terms are present in the text file
    :param number_labels: dictionary where keys = document # and values = the set of 
                            labels associated with those labels
    :param label_list: list of all the unique labels
    :return: a dictionary where keys = labels and values = dictionary where keys 
            = words and values = the tf score of that word in the "mega-document"
            of that label 
            AND 
            a dictionary where keys = labels and values = dictionary where keys = words 
            and values =  the number of documents with that label in which that word appears
    '''
    tf = {label: {word: 0.0 for word in valid_words} for label in label_list}
    label_frequencies = {label: {word: 0 for word in valid_words} for label in label_list}
    for file in os.listdir(dir_path):
        with open(dir_path + '\\' + file, "r") as f:
            content = f.read()
            num = int(file[0:len(file) - 4])
            labels = number_labels[num]
            words = nltk.word_tokenize(content)
            new_words = [word.lower() for word in words]
            new_words = [word.lower() for word in new_words if word in valid_words]
            frequencies = Counter(new_words)
            unique_words = set(new_words)
            other_labels = set(label_list).difference(labels)
            for l in labels:
                for word in unique_words:
                    tf[l][word] += (frequencies[word]/len(new_words))
                    label_frequencies[l][word] += 1
    return [tf, label_frequencies]

In [15]:
def compute_idf_distributions(dir_path, valid_words, number_labels, label_list, frequencies):
    '''
    IDF is inverse document frequency, defined as 
    (# of total documents)/(# of occurrences of the word across all documents)
    This function will compute the idf score of each word across each label
    :param dir_path: a path to the directory containing all the training samples
    :param valid_words: a dictionary where the keys are all the unique, valid
                        terms are present in the text file
    :param number_labels: dictionary where keys = number of document and labels = 
                            the set of labels associated with that document
    :param label_list: list of all unique labels in the dataset
    :param frequencies: dictionary where keys = labels and values = Counter object
                        with frequencies of all terms in the label's "mega document"
    :return: a dictionary where keys = labels and values = dictionary where keys
            = words and values = idf score of that word
    '''
    idf_scores = {label: {word: 0.0 for word in valid_words} for label in label_list}
    label_counts = {label: 0 for label in label_list}
    for file in os.listdir(dir_path):
        num = int(file[0:len(file) - 4])
        labels = number_labels[num]
        for l in labels:
            label_counts[l] += 1
    for label, vector in idf_scores.items():
        for word in vector.keys():
            # Only one occurrence of lin-oil, and thus, the "mega document" is just
            # the single document itself. 
            idf_scores[label][word] = 1 + np.log(label_counts[label]/(frequencies[label][word] + 1))
    return idf_scores, label_counts

In [16]:
def compute_tf_idf_distributions(tf, idf):
    '''
    This function will compute the tf_idf score, grouped by label
    :param tf: Dictionary where keys = labels and values = dictionary
                where keys = words and values = tf score
    :param idf: Dictionary where keys = labels and values = dictionary
                where keys = words and values = idf score
    :return: Dictionary where keys = labels and values = dictionary
                where keys = words and values = tf_idf score
    '''
    tf_idf = {label: {word: 0.0 for word in valid_words} for label in tf.keys()}
    for label, vector in tf.items():
        for word, value in vector.items():
            # if label == "earn":
            #    print(word, tf[label][word], idf[label][word])
            tf_idf[label][word] = tf[label][word] * idf[label][word]
    return tf_idf

In [17]:
def rename_files(dir_path):
    '''
    Utility function designed to rename all files in any directory
    to a .txt file so they can be read from
    :param dir_path: directory of the files to be renamed
    '''
    for file in os.listdir(dir_path):
        filepath = dir_path + '\\' + file 
        os.rename(filepath, filepath+".txt")

In [160]:
def compute_frequencies_by_class(mega_docs, valid_words, label_list):
    '''
    This function computes the frequencies of all words by class. This is done because
    the outright frequencies are needed for Naive Bayes and conditional_probs can easily
    be obtained from this by dividing each entry by the number of elements in each "mega doc"
    :param mega_docs: a dictionary where keys = labels and values = vectors of all the
                        valid words present in documents with that label
    :param valid_words: a dictionary where the keys are all the unique, valid
                        terms are present in the text file
    :param label_list: list of all unique labels in the dataset
    :return: a dictionary where keys = labels and values = dictionary where keys = words
                and values = frequencies of that word in docs with that label
            AND
            a dictionary where keys = words and values = the total frequency of those words
            all documents throughout the corpus
    '''
    frequencies = {label: {word: 0 for word in valid_words} for label in label_list}
    total_frequencies = {word:0 for word in valid_words}
    for label, vector in mega_docs.items():
        freq = Counter(vector)
        for word in freq.keys():
            frequencies[label][word] += freq[word]
            total_frequencies[word] += freq[word]
    return [frequencies, total_frequencies]

In [161]:
if __name__ == '__main__':
    dir_path = "C:\\Users\\ksing\\OneDrive\\Documents\\Text Classifiers\\training"
    stop_words = set(stopwords.words('english'))
    valid_words = get_valid_words(dir_path, stop_words)
    number_labels_training, number_labels_test = add_labels_to_samples("cats.txt")
    prior_probs = compute_prior_probabilities(number_labels_training)
    
    mega_docs, total_num_words, idf = word_vectors_for_mega_docs(dir_path, valid_words, number_labels_training, prior_probs.keys())
    frequencies, total_frequencies = compute_frequencies_by_class(mega_docs, valid_words, prior_probs.keys())
    conditional_probs = {label: {word: 0.0 for word in valid_words} for label in prior_probs.keys()}
    complement_probs = {label: {word: 0.0 for word in valid_words} for label in prior_probs.keys()}

In [149]:
    '''
    tf_idf_by_label = {label: {word: 0 for word in valid_words} for label in prior_probs.keys()}
    tf_idf_total = {word: 0 for word in valid_words}
    for label, vector in tf_idf_by_label.items():
        for word in vector.keys():
            tf_idf_by_label[label][word] = (frequencies[label][word] * idf[word])
            tf_idf_total[word] += (frequencies[label][word] * idf[word])
    for label, vector in tf_idf_by_label.items():
        if label != "earn":
            continue
        print("Label:", label, len(mega_docs[label]))
        for word, score in sorted(vector.items(), key=itemgetter(1), reverse=True):
            if score == 0.0:
                continue
            print(word, score, tf_idf_total[word])
        print("\n")
    '''

'\ntf_idf_by_label = {label: {word: 0 for word in valid_words} for label in prior_probs.keys()}\ntf_idf_total = {word: 0 for word in valid_words}\nfor label, vector in tf_idf_by_label.items():\n    for word in vector.keys():\n        tf_idf_by_label[label][word] = (frequencies[label][word] * idf[word])\n        tf_idf_total[word] += (frequencies[label][word] * idf[word])\nfor label, vector in tf_idf_by_label.items():\n    if label != "earn":\n        continue\n    print("Label:", label, len(mega_docs[label]))\n    for word, score in sorted(vector.items(), key=itemgetter(1), reverse=True):\n        if score == 0.0:\n            continue\n        print(word, score, tf_idf_total[word])\n    print("\n")\n'

In [155]:
    '''
    conditional_probs = {label: {word: 0.0 for word in valid_words} for label in prior_probs.keys()}
    complement_probs = {label: {word: 0.0 for word in valid_words} for label in prior_probs.keys()}
    for label, vector in conditional_probs.items():
        denom = total_num_words - len(mega_docs[label]) + len(valid_words.keys())
        for word in vector.keys():
            conditional_probs[label][word] = np.log((tf_idf_by_label[label][word]+1)/(len(mega_docs[label]) + len(valid_words.keys())))
            # Odd, the values of complement_probs are the same regardless of the word, why is that
            # print(label, word, total_frequencies[word], frequencies[label][word])
            complement_probs[label][word] = np.log((tf_idf_total[word] - tf_idf_by_label[label][word] + 1)/denom)
    '''

'\nconditional_probs = {label: {word: 0.0 for word in valid_words} for label in prior_probs.keys()}\ncomplement_probs = {label: {word: 0.0 for word in valid_words} for label in prior_probs.keys()}\nfor label, vector in conditional_probs.items():\n    denom = total_num_words - len(mega_docs[label]) + len(valid_words.keys())\n    for word in vector.keys():\n        conditional_probs[label][word] = np.log((tf_idf_by_label[label][word]+1)/(len(mega_docs[label]) + len(valid_words.keys())))\n        # Odd, the values of complement_probs are the same regardless of the word, why is that\n        # print(label, word, total_frequencies[word], frequencies[label][word])\n        complement_probs[label][word] = np.log((tf_idf_total[word] - tf_idf_by_label[label][word] + 1)/denom)\n'

In [162]:
    for label, vector in conditional_probs.items():
        denom = total_num_words - len(mega_docs[label]) + len(valid_words.keys())
        for word in vector.keys():
            conditional_probs[label][word] = np.log((frequencies[label][word]+1)/(len(mega_docs[label]) + len(valid_words.keys())))
            # Odd, the values of complement_probs are the same regardless of the word, why is that
            # print(label, word, total_frequencies[word], frequencies[label][word])
            complement_probs[label][word] = np.log((total_frequencies[word] - frequencies[label][word] + 1)/denom)

In [163]:
    complement_probs_normalized = {label: {word: 0.0 for word in valid_words} for label in prior_probs.keys()}
    for label, vector in complement_probs.items():
        print(label, normalize_term)
        normalize_term = np.sqrt(sum([(complement_probs[label][word]**2) for word in valid_words]))
        for word in vector.keys():
            complement_probs_normalized[label][word] = complement_probs[label][word] / normalize_term
    '''
    for label, vector in complement_probs_normalized.items():
        length = np.sqrt(sum([(complement_probs_normalized[label][word]**2) for word in valid_words]))
        print(label, length)
    '''

cocoa 1913.8088404915427
sorghum 1782.4417889739893
oat 1782.5175775484267
barley 1783.0584915217912
corn 1782.6948407506004
wheat 1779.890148611328
grain 1779.8148637124964
sunseed 1777.2785634702038
oilseed 1782.7611379714065
soybean 1780.9243528018635
sun-oil 1781.380202839575
soy-oil 1783.1195900952264
lin-oil 1782.8618050757598
veg-oil 1783.1391525120123
earn 1782.0028426146205
acq 1774.5090471752076
copper 1790.8327264736997
housing 1783.2668274903901
money-supply 1783.00187915611
coffee 1781.465702940765
ship 1782.4182074001592
sugar 1783.5692824509222
trade 1781.7007164741863
reserves 1778.7578651493916
soy-meal 1782.2502581932663
meal-feed 1782.8448810969771
rye 1782.7691231573979
cotton 1783.1551749240655
livestock 1782.6415543290207
carcass 1782.6311404856667
crude 1782.599645692607
nat-gas 1780.5027447603354
gnp 1782.3437008611002
cpi 1780.943974920371
interest 1782.2211932006949
money-fx 1779.0133372252426
bop 1776.1090693374847
rice 1781.4605005972173
tea 1782.64461216123

'\nfor label, vector in complement_probs_normalized.items():\n    length = np.sqrt(sum([(complement_probs_normalized[label][word]**2) for word in valid_words]))\n    print(label, length)\n'

In [87]:
    for label, vector in complement_probs.items():
        if label != "earn":
            continue
        print("Label:", label, len(mega_docs[label]), len(vector.keys()))
        for word, score in sorted(vector.items(), key=itemgetter(1), reverse=True):
            if score == 0.0:
                continue
            print(word, score)
        print("\n")

Label: earn 151040 23799
prev -9.34629703640943
countertrade -9.364285597954886
apr -9.413414761835133
bushels -9.42332570763097
nino -9.437133848333884
rains -9.43744768614196
eep -9.440739077800085
soybean -9.440823780324289
eiu -9.453666345098949
sorghum -9.456500348018807
maize -9.467084685704481
mar -9.46936376623988
csr -9.471070052824205
palm -9.471888453657163
aires -9.472921909994486
buenos -9.472921909994486
rice -9.473631319211053
unc -9.485374491373332
bales -9.491880341771756
harvest -9.494801645136684
enrollment -9.495370371747216
barley -9.501277097596708
glickman -9.503340626868306
cordoba -9.504207999529205
sunflower -9.504783726721524
poehl -9.504874165944292
soybeans -9.509311657197422
planted -9.509586900279436
cotton -9.514977142626627
hectares -9.516754980086557
beef -9.517734861983163
huckaby -9.518489547677047
oecd -9.522200382239607
cocoa -9.524737892824348
decoupling -9.52614521008702
wheat -9.52678104496071
indonesia -9.528401295998668
buffer -9.5297232731321

agpm -9.8792935063942
bagged -9.8792935063942
peanut -9.8792935063942
sunmeal -9.8792935063942
citruspulp -9.8792935063942
lo -9.8792935063942
rye -9.8792935063942
signups -9.8792935063942
pri -9.8792935063942
mamman -9.8792935063942
neutral -9.879312327225165
starch -9.879406348458467
congressman -9.879406348458467
coordinated -9.879406348458467
rare -9.879406348458467
rescheduling -9.879406348458467
peseta -9.879406348458467
grams -9.879406348458467
nato -9.879503737546845
hole -9.879503737546845
responsive -9.879503737546845
cuban -9.879503737546845
gilt -9.879503737546845
pioneer -9.879553270314567
assumption -9.880567640337134
calm -9.88123490827374
afternoon -9.881877080092128
friendly -9.882628875521949
worried -9.882636371320697
allies -9.88318543023964
deliver -9.88318543023964
fail -9.884782631289804
critical -9.88479984515555
attempting -9.88514869382692
advisers -9.886170072204903
lend -9.886170072204903
swaps -9.886439018715066
costa -9.887033382483287
microchips -9.887033

counselor -9.988166914558533
landing -9.988166914558533
reichhold -9.988166914558533
rebuilding -9.988166914558533
mij -9.988166914558533
graan -9.988166914558533
midmississippi -9.988166914558533
departure -9.988166914558533
pincus -9.988166914558533
remarkable -9.988166914558533
psa -9.988166914558533
proponents -9.988166914558533
kohl -9.988166914558533
tribunal -9.988166914558533
shv -9.988166914558533
seafarers -9.988166914558533
montedison -9.988166914558533
iso -9.988166914558533
members -9.988232064977465
comprising -9.988394397001267
package -9.98993679042877
parties -9.98997206020261
duty -9.990007633354129
mob -9.990177010631319
oxy -9.990177010631319
motion -9.990177010631319
steps -9.990182451215347
slide -9.990445588600611
negotiated -9.990880787654207
injunction -9.991144718783248
denis -9.991282858340464
alaska -9.991296334855559
discussions -9.991418422355673
cardenas -9.991658507842477
compensatory -9.991658507842477
gruppo -9.991658507842477
deflator -9.9916585078424

ongpin -10.079067719199136
lerner -10.079067719199136
pivot -10.079067719199136
oii -10.079067719199136
intercare -10.079067719199136
ranco -10.079067719199136
greyhound -10.079067719199136
sluggishness -10.079067719199136
educators -10.079067719199136
sputnik -10.079067719199136
billboard -10.079067719199136
hata -10.079067719199136
kangyo -10.079067719199136
kidokoro -10.079067719199136
tanaka -10.079067719199136
unreservedly -10.079067719199136
flattening -10.079067719199136
aberrational -10.079067719199136
evaporated -10.079067719199136
tea -10.079584535254462
prevailing -10.079584535254462
exchanged -10.080065177668912
officially -10.080612915285304
bpd -10.080893295425728
perhaps -10.082014292003379
bp -10.082356889239941
running -10.082428362194552
finding -10.08269804814204
bullish -10.08269804814204
contrary -10.08273010131839
publish -10.08273010131839
milled -10.08273010131839
dominant -10.082919103130195
emerging -10.082919103130195
disappointed -10.083237379142343
receives

sucres -10.15189371975485
habibie -10.15189371975485
seton -10.15189371975485
tesfay -10.15189371975485
adedeji -10.15189371975485
alcad -10.15189371975485
rgc -10.15189371975485
inl -10.15189371975485
gillette -10.15189371975485
obrinsky -10.15189371975485
rotation -10.15189371975485
vongarlem -10.15189371975485
goodwood -10.15189371975485
telecommunication -10.15189371975485
spares -10.15189371975485
skewed -10.15189371975485
wisenbaker -10.15189371975485
lichtblau -10.15189371975485
seasonable -10.15189371975485
muldoon -10.15189371975485
boltz -10.15189371975485
bindings -10.15189371975485
panamax -10.15189371975485
xerox -10.15189371975485
insects -10.15189371975485
jwt -10.15189371975485
simex -10.15189371975485
carteret -10.15189371975485
indium -10.15189371975485
feasability -10.15189371975485
gurria -10.15189371975485
hemsley -10.15189371975485
amadeus -10.15189371975485
surat -10.15189371975485
mixes -10.15189371975485
kyats -10.15189371975485
haubourdin -10.15189371975485
ka

bangkok -10.226496638633012
disadvantage -10.226496638633012
praised -10.226496638633012
withdrawing -10.226496638633012
women -10.226496638633012
counterpart -10.226496638633012
preparations -10.226496638633012
cax -10.226496638633012
bleak -10.226496638633012
eliminating -10.226496638633012
barred -10.226496638633012
matched -10.226496638633012
leon -10.226496638633012
sweeping -10.226496638633012
liberalise -10.226496638633012
unwanted -10.226496638633012
miner -10.226496638633012
terminates -10.226496638633012
extraction -10.226496638633012
hopeful -10.226496638633012
consummated -10.226496638633012
conflicts -10.226496638633012
sues -10.226496638633012
died -10.226496638633012
journal -10.226496638633012
walt -10.226496638633012
pleas -10.226496638633012
pazzionotto -10.226496638633012
expertise -10.226496638633012
hussein -10.226496638633012
germany -10.226612122653105
advisory -10.226733078362502
charles -10.226972049786518
buyout -10.22722660200689
manner -10.22744700542312
hit

simultaneous -10.303092412850136
graded -10.303092412850136
span -10.303092412850136
reminder -10.303092412850136
fluctuated -10.303092412850136
scenarios -10.303092412850136
rosenbaum -10.303092412850136
refine -10.303092412850136
abundance -10.303092412850136
persuading -10.303092412850136
genova -10.303092412850136
construct -10.303092412850136
liquified -10.303092412850136
alteration -10.303092412850136
coffin -10.303092412850136
encountering -10.303092412850136
ripe -10.303092412850136
owen -10.303092412850136
furnace -10.303092412850136
hawaiian -10.303092412850136
russians -10.303092412850136
deadly -10.303092412850136
liberia -10.303092412850136
shoji -10.303092412850136
amends -10.303092412850136
cove -10.303092412850136
anne -10.303092412850136
eli -10.303092412850136
peasants -10.303092412850136
stave -10.303092412850136
nilsson -10.303092412850136
amt -10.303092412850136
lard -10.303092412850136
titanium -10.303092412850136
grove -10.303092412850136
aged -10.303092412850136

whose -10.353540985034781
speaking -10.353718300970082
repayment -10.353722311429578
stopped -10.353843371339813
discovery -10.353978479425514
fibre -10.354261802426016
worker -10.354293691799741
strain -10.354293691799741
frankfurt -10.354293691799741
challenged -10.354293691799741
breached -10.354293691799741
wmx -10.354293691799741
sure -10.354499547516669
holds -10.354732048633814
extend -10.354828055829895
station -10.35525392123538
infrastructure -10.355280180095363
anxious -10.355280180095363
fired -10.355280180095363
southeastern -10.355280180095363
entered -10.355305523830497
submitted -10.355350143245795
hedging -10.355499077901234
soon -10.355637322599797
certificates -10.356101378550257
republic -10.356352786349959
run -10.356378595196615
ensure -10.356872288189804
help -10.35700173056171
consider -10.35721730262456
calculated -10.357420814695244
transferred -10.357596106215752
respect -10.358209330950846
saudi -10.358220449251622
herald -10.35844295294742
adviser -10.35844

dissatisfaction -10.428077074345884
ocf -10.428077074345884
hired -10.428077074345884
snap -10.428077074345884
itel -10.428077074345884
conceivable -10.428077074345884
asher -10.428077074345884
conversation -10.428077074345884
newfoundland -10.428077074345884
sdrs -10.428077074345884
andres -10.428077074345884
openness -10.428077074345884
groundwork -10.428077074345884
den -10.428077074345884
ok -10.428077074345884
spite -10.428077074345884
gather -10.428077074345884
symb -10.428077074345884
instrumentation -10.428077074345884
mania -10.428077074345884
walked -10.428077074345884
acquirers -10.428077074345884
unfortunate -10.428077074345884
merit -10.428077074345884
rpch -10.428077074345884
erik -10.428077074345884
particulary -10.428077074345884
drawings -10.428077074345884
weighing -10.428077074345884
incorporating -10.428077074345884
brink -10.428077074345884
objected -10.428077074345884
scene -10.428077074345884
manipulate -10.428077074345884
leroy -10.428077074345884
enjoin -10.428

recording -10.508501874104928
reconsider -10.508501874104928
advantages -10.508501874104928
offerings -10.508501874104928
anticipate -10.508501874104928
unissued -10.508501874104928
accurate -10.508501874104928
visa -10.508501874104928
exploring -10.508501874104928
contacts -10.508501874104928
elders -10.50852734035728
optimism -10.508850528897804
conducted -10.510285711474863
process -10.510397836321026
production -10.510507718448169
necessary -10.511153150494097
carey -10.511377186876386
sc -10.511377186876386
nitrogen -10.511377186876386
incorporated -10.511377186876386
parities -10.511377186876386
brisbane -10.511377186876386
practises -10.511377186876386
enjoy -10.511377186876386
liner -10.511377186876386
boone -10.511377186876386
legally -10.511377186876386
shoot -10.511377186876386
inroads -10.511377186876386
idaho -10.511377186876386
achievement -10.511377186876386
improves -10.511377186876386
challenging -10.511377186876386
bus -10.511377186876386
managerial -10.51144693587262

farmlands -10.541934456188914
zealander -10.541934456188914
reservoir -10.541934456188914
koichi -10.541934456188914
vows -10.541934456188914
confident -10.542204913118857
entity -10.543023903734655
transaction -10.543375086853262
card -10.543496744466397
buy -10.543666800390405
internal -10.543834919501524
efforts -10.544307412691563
examination -10.544496009611729
telesis -10.544496009611729
elaborating -10.544496009611729
sliding -10.544496009611729
benefitting -10.544496009611729
retains -10.544496009611729
eventual -10.544496009611729
existed -10.544496009611729
nsc -10.544496009611729
inevitably -10.544496009611729
amp -10.544496009611729
outlines -10.544496009611729
civil -10.545970746913047
security -10.546189166460596
almost -10.546306832147625
depend -10.54636536685411
icn -10.547150725897378
someone -10.547259113452627
advanced -10.547617550634595
participation -10.548064029995896
raw -10.548243740512069
reacted -10.548958901698851
recognized -10.548958901698851
worry -10.54

francoise -10.683844296615145
skidding -10.683844296615145
foray -10.683844296615145
seventies -10.683844296615145
plumbing -10.683844296615145
lip -10.683844296615145
leaps -10.683844296615145
stomach -10.683844296615145
mesmerized -10.683844296615145
discord -10.683844296615145
jimmy -10.683844296615145
lyonnais -10.683844296615145
smorada -10.683844296615145
deleted -10.683844296615145
hoenemeyer -10.683844296615145
azores -10.683844296615145
adriatic -10.683844296615145
tvt -10.683844296615145
ppt -10.683844296615145
genoc -10.683844296615145
constanza -10.683844296615145
mokha -10.683844296615145
buenaventura -10.683844296615145
benham -10.683844296615145
tweendecker -10.683844296615145
redel -10.683844296615145
disks -10.683844296615145
infringement -10.683844296615145
stryker -10.683844296615145
norma -10.683844296615145
inspeech -10.683844296615145
nearbys -10.683844296615145
hvy -10.683844296615145
tcf -10.683844296615145
suspiciously -10.683844296615145
steepening -10.6838442

rimmer -10.683844296615145
misalignment -10.683844296615145
gilliam -10.683844296615145
cablesystems -10.683844296615145
compton -10.683844296615145
aranibar -10.683844296615145
efps -10.683844296615145
generics -10.683844296615145
cruise -10.683844296615145
navon -10.683844296615145
echolab -10.683844296615145
dispatching -10.683844296615145
disasterous -10.683844296615145
modes -10.683844296615145
traveled -10.683844296615145
gay -10.683844296615145
mcclure -10.683844296615145
realises -10.683844296615145
nagano -10.683844296615145
sfem -10.683844296615145
uncoupling -10.683844296615145
ncso -10.683844296615145
rangoon -10.683844296615145
reclassified -10.683844296615145
inspectors -10.683844296615145
injects -10.683844296615145
fallows -10.683844296615145
paddocks -10.683844296615145
pulses -10.683844296615145
faq -10.683844296615145
bremen -10.683844296615145
shg -10.683844296615145
consignments -10.683844296615145
recycle -10.683844296615145
unofficially -10.683844296615145
overta

zulia -10.72450921452659
exempting -10.72450921452659
abusive -10.72450921452659
precluding -10.72450921452659
janatha -10.72450921452659
fading -10.72450921452659
hyo -10.72450921452659
xovr -10.72450921452659
hugo -10.72450921452659
driftwood -10.72450921452659
marker -10.72450921452659
gfc -10.72450921452659
ratify -10.72450921452659
springfield -10.72450921452659
defers -10.72450921452659
muskogee -10.72450921452659
belkacem -10.72450921452659
nabi -10.72450921452659
releated -10.72450921452659
measurex -10.72450921452659
excahnge -10.72450921452659
forsee -10.72450921452659
algiers -10.72450921452659
wll -10.72450921452659
admissions -10.72450921452659
wrap -10.72450921452659
judged -10.72450921452659
tactic -10.72450921452659
julian -10.72450921452659
balloon -10.72450921452659
hanging -10.72450921452659
cloud -10.72450921452659
wondering -10.72450921452659
preparatory -10.72450921452659
waf -10.72450921452659
twenty -10.72450921452659
tedi -10.72450921452659
son -10.724509214526

revlon -10.883995193450058
cost -10.884036063894
good -10.88403718233647
bauxite -10.88436322983943
invested -10.884443486575497
effect -10.886551149082793
valuation -10.887137556012785
floated -10.887137556012785
gas -10.887208695149516
stauffer -10.887609342063273
ducommun -10.887609342063273
deprived -10.887609342063273
brougher -10.887609342063273
performers -10.887609342063273
cgee -10.887609342063273
reinstated -10.887609342063273
fiscally -10.887609342063273
orion -10.887609342063273
persists -10.887609342063273
eaten -10.887609342063273
maxxam -10.887609342063273
sis -10.887609342063273
cheaply -10.887609342063273
pop -10.887609342063273
modular -10.887609342063273
tube -10.887609342063273
metromedia -10.887609342063273
lieberman -10.887609342063273
municipalities -10.887609342063273
liked -10.887609342063273
quartz -10.887609342063273
fragmented -10.887609342063273
valtek -10.887609342063273
beverages -10.887609342063273
hadson -10.887609342063273
tremendously -10.887609342063

exhcnmage -11.0933984174292
intw -11.0933984174292
ubks -11.0933984174292
approvcal -11.0933984174292
renedered -11.0933984174292
masy -11.0933984174292
summe -11.0933984174292
yasuda -11.0933984174292
undecided -11.0933984174292
lightened -11.0933984174292
lighten -11.0933984174292
acccounts -11.0933984174292
primagas -11.0933984174292
kingston -11.0933984174292
bgenf -11.0933984174292
guarini -11.0933984174292
bronfman -11.0933984174292
rosner -11.0933984174292
bowcan -11.0933984174292
degunzberg -11.0933984174292
ivanhoe -11.0933984174292
acra -11.0933984174292
trasaction -11.0933984174292
reinstating -11.0933984174292
stockholm -11.0933984174292
merchandiser -11.0933984174292
closeout -11.0933984174292
norwest -11.0933984174292
afternoons -11.0933984174292
strangely -11.0933984174292
holdover -11.0933984174292
wedndsday -11.0933984174292
avearge -11.0933984174292
wires -11.0933984174292
spokeman -11.0933984174292
thunholm -11.0933984174292
anders -11.0933984174292
contravene -11.09

ofp -11.0933984174292
utensil -11.0933984174292
populaires -11.0933984174292
caisses -11.0933984174292
amdc -11.0933984174292
channeled -11.0933984174292
modeled -11.0933984174292
octanos -11.0933984174292
petrochemicals -11.0933984174292
motions -11.0933984174292
ramifications -11.0933984174292
contends -11.0933984174292
prefered -11.0933984174292
moss -11.0933984174292
hampers -11.0933984174292
neighbour -11.0933984174292
urgings -11.0933984174292
seperately -11.0933984174292
tilted -11.0933984174292
buccaneers -11.0933984174292
unchecked -11.0933984174292
sigler -11.0933984174292
tesitified -11.0933984174292
seperation -11.0933984174292
apolonio -11.0933984174292
midrate -11.0933984174292
dynastar -11.0933984174292
barrie -11.0933984174292
unsworth -11.0933984174292
thermoplastics -11.0933984174292
industruments -11.0933984174292
lastly -11.0933984174292
groceries -11.0933984174292
rionda -11.0933984174292
epsi -11.0933984174292
rupee -11.0933984174292
pbx -11.0933984174292
padukka 

yaobang -11.0933984174292
unawares -11.0933984174292
bourgeois -11.0933984174292
phrase -11.0933984174292
doha -11.0933984174292
organiaation -11.0933984174292
hwt -11.0933984174292
cruden -11.0933984174292
budgeting -11.0933984174292
seputro -11.0933984174292
ramly -11.0933984174292
rachman -11.0933984174292
hardjoko -11.0933984174292
untoward -11.0933984174292
flagging -11.0933984174292
misgivings -11.0933984174292
multilaterals -11.0933984174292
troublesome -11.0933984174292
unacceptably -11.0933984174292
pooled -11.0933984174292
syndications -11.0933984174292
enact -11.0933984174292
respectful -11.0933984174292
capturing -11.0933984174292
catchword -11.0933984174292
profoundly -11.0933984174292
inference -11.0933984174292
planner -11.0933984174292
spate -11.0933984174292
leds -11.0933984174292
linder -11.0933984174292
velasco -11.0933984174292
summoned -11.0933984174292
additives -11.0933984174292
scarcity -11.0933984174292
businesess -11.0933984174292
vishnu -11.0933984174292
habe

grinder -11.0933984174292
compliment -11.0933984174292
stuff -11.0933984174292
wolkin -11.0933984174292
esther -11.0933984174292
deacon -11.0933984174292
orientated -11.0933984174292
endless -11.0933984174292
indistinguishable -11.0933984174292
permanence -11.0933984174292
thereof -11.0933984174292
reconsidered -11.0933984174292
reconsiders -11.0933984174292
reconsidering -11.0933984174292
phaseout -11.0933984174292
insulators -11.0933984174292
domes -11.0933984174292
norwood -11.0933984174292
xtr -11.0933984174292
ftr -11.0933984174292
hylbert -11.0933984174292
brinkman -11.0933984174292
baa -11.0933984174292
knobbs -11.0933984174292
fwf -11.0933984174292
ferroalloy -11.0933984174292
logo -11.0933984174292
mainenance -11.0933984174292
finalizes -11.0933984174292
halifax -11.0933984174292
scotian -11.0933984174292
thaddeus -11.0933984174292
optimstic -11.0933984174292
begain -11.0933984174292
teriffic -11.0933984174292
securitization -11.0933984174292
demonstrates -11.0933984174292
pal

ssiaa -11.0933984174292
barend -11.0933984174292
anxiety -11.0933984174292
intn -11.0933984174292
sensation -11.0933984174292
westerners -11.0933984174292
faw -11.0933984174292
overran -11.0933984174292
electrocuted -11.0933984174292
breawater -11.0933984174292
crtyz -11.0933984174292
commonwealty -11.0933984174292
rollover -11.0933984174292
seax -11.0933984174292
tails -11.0933984174292
plummeted -11.0933984174292
disenchanted -11.0933984174292
rallying -11.0933984174292
rausen -11.0933984174292
stocker -11.0933984174292
intex -11.0933984174292
blackman -11.0933984174292
syo -11.0933984174292
uhler -11.0933984174292
mississauga -11.0933984174292
foreclosing -11.0933984174292
composition -11.0933984174292
bokma -11.0933984174292
beleggineng -11.0933984174292
beleggingen -11.0933984174292
hendrik -11.0933984174292
slaine -11.0933984174292
pair -11.0933984174292
tow -11.0933984174292
proceeded -11.0933984174292
grind -11.0933984174292
bankworkers -11.0933984174292
fleischer -11.093398417

banca -11.536388133636956
boeing -11.536388133636956
anchor -11.536388133636956
perform -11.536388133636956
qsv -11.538305778788493
repurchasing -11.538305778788493
kriwet -11.538305778788493
ruhr -11.538305778788493
appraisals -11.538305778788493
payables -11.538305778788493
deutz -11.538305778788493
stones -11.538305778788493
subdue -11.538305778788493
improbability -11.538305778788493
bbd -11.538305778788493
accentuated -11.538305778788493
camillo -11.538305778788493
gancia -11.538305778788493
trak -11.538305778788493
gundie -11.538305778788493
conner -11.538305778788493
estimation -11.538305778788493
cater -11.538305778788493
pertti -11.538305778788493
voutilainen -11.538305778788493
valleycast -11.538305778788493
bruks -11.538305778788493
nippert -11.538305778788493
mgcpv -11.538305778788493
benficiaries -11.538305778788493
ladder -11.538305778788493
doctors -11.538305778788493
congestive -11.538305778788493
diabetic -11.538305778788493
mrk -11.538305778788493
prostate -11.5383057

turner -12.334881955595945
criminal -12.334881955595945
declared -12.33556115808531
rocky -12.348873334532483
pan -12.350872038166154
taxable -12.370347777953848
jan -12.37740049352614
alusuisse -12.378999546126765
effected -12.378999546126767
norcen -12.383173738157591
cinema -12.38669357775815
barnes -12.399894356222262
jucker -12.399894356222262
mcdonald -12.399894356222262
flowers -12.413059188626846
zayre -12.413059188626846
periods -12.413088357963117
dividends -12.414632655877014
guardian -12.4239457997713
earnings -12.428000672794331
brennan -12.438534929728979
capitalized -12.441368336719615
accountants -12.448590001218093
mellon -12.451620446874339
volvo -12.454336179340617
fairfax -12.454336179340617
pretax -12.459707599631225
dec -12.474926363533575
nov -12.498213243639585
distributions -12.500270160867467
jaguar -12.500659447835035
sandoz -12.500659447835035
includes -12.534564566349754
att -12.54152468772978
franklin -12.547660785794696
prior -12.589705809246922
liquidati

dlta -13.097824752957175
solds -13.097824752957175
dispositions -13.097824752957175
aidc -13.097824752957175
ocelot -13.097824752957175
questioner -13.097824752957175
fiat -13.097824752957175
flick -13.097824752957175
oub -13.097824752957175
stockbroking -13.097824752957175
hoon -13.097824752957175
loh -13.097824752957175
ventured -13.097824752957175
fuji -13.097824752957175
reckitt -13.097824752957175
colman -13.097824752957175
bayer -13.097824752957175
translation -13.097824752957175
polyurethanes -13.097824752957175
coating -13.097824752957175
howden -13.097824752957175
nederlandsche -13.097824752957175
koninklijke -13.097824752957175
staalfabrieken -13.097824752957175
fo -13.097824752957175
sludge -13.097824752957175
landfill -13.097824752957175
remediation -13.097824752957175
panoche -13.097824752957175
agengy -13.097824752957175
califoirnia -13.097824752957175
ent -13.097824752957175
settlem -13.097824752957175
vine -13.097824752957175
pleasing -13.097824752957175
yellowknife -13

bankiers -13.097824752957175
beleggingscompagnie -13.097824752957175
rabo -13.097824752957175
verzekeringsgroep -13.097824752957175
lanschot -13.097824752957175
homemade -13.097824752957175
bjic -13.097824752957175
bpco -13.097824752957175
bonneville -13.097824752957175
admirably -13.097824752957175
tuned -13.097824752957175
clri -13.097824752957175
tcel -13.097824752957175
gwti -13.097824752957175
groundwater -13.097824752957175
qpon -13.097824752957175
ptras -13.097824752957175
chic -13.097824752957175
disposing -13.097824752957175
strng -13.097824752957175
semegran -13.097824752957175
probablility -13.097824752957175
abbett -13.097824752957175
shre -13.097824752957175
nwph -13.097824752957175
raym -13.097824752957175
nicor -13.097824752957175
handleman -13.097824752957175
hdl -13.097824752957175
perini -13.097824752957175
banknote -13.097824752957175
ibk -13.097824752957175
sbm -13.097824752957175
asbs -13.097824752957175
reute -13.097824752957175
boveri -13.097824752957175
sidy -13

gldc -13.097824752957175
vested -13.097824752957175
knudsen -13.097824752957175
mrn -13.097824752957175
dcny -13.097824752957175
corpoartion -13.097824752957175
dcy -13.097824752957175
saj -13.097824752957175
ati -13.097824752957175
atim -13.097824752957175
sav -13.097824752957175
lfio -13.097824752957175
horne -13.097824752957175
eldb -13.097824752957175
eldorado -13.097824752957175
admac -13.097824752957175
qt -13.097824752957175
jpac -13.097824752957175
pps -13.097824752957175
paco -13.097824752957175
bionomic -13.097824752957175
bsii -13.097824752957175
sasles -13.097824752957175
fbgia -13.097824752957175
infl -13.097824752957175
megaphone -13.097824752957175
res -13.097824752957175
rgs -13.097824752957175
srv -13.097824752957175
crmr -13.097824752957175
cramer -13.097824752957175
oar -13.097824752957175
clearning -13.097824752957175
moritorium -13.097824752957175
ogden -13.097824752957175
bailout -13.097824752957175
rlifa -13.097824752957175
novr -13.097824752957175
novar -13.0978

In [129]:
    for label, vector in conditional_probs.items():
        if label != "earn":
            continue
        print("Label:", label, len(mega_docs[label]))
        for word, score in sorted(vector.items(), key=itemgetter(1), reverse=True):
            if score == 0.0:
                continue
            print(word, score)
        print("\n")

Label: earn 151040
vs -2.093389793444158
mln -2.513607560692844
cts -2.621883787375367
loss -2.7115554653861937
net -2.831634325519871
dlrs -3.0144289914899667
profit -3.2060943261564905
shr -3.22446572268939
year -3.4961714100857306
revs -3.5856025361278485
lt -3.6189110864319036
qtr -3.6363724712912835
said -3.7107534845810584
oper -3.7445276412041073
share -3.8324802304652796
billion -3.951180655610537
company -3.9931456594449157
dividend -4.021353120911253
quarter -4.053434606699402
sales -4.097241982142668
inc -4.101733390594929
note -4.105267931227764
earnings -4.15920958199126
avg -4.202222027111682
shrs -4.212581350035083
record -4.262320398494335
april -4.329886982785653
corp -4.357562782671637
pct -4.3634532761985625
div -4.3781944271284985
stock -4.398516868568333
prior -4.416201806252432
includes -4.511072586682071
per -4.538448527330369
split -4.553551353627624
qtly -4.5855537427382
gain -4.606875913124366
pay -4.609106711790941
first -4.622225031263904
nine -4.63707389891

television -7.5672086323463885
calendar -7.571642714587134
depressed -7.574897227165144
campbell -7.5749787425054365
bfg -7.576345373560068
adsteam -7.576345373560068
vogtle -7.576345373560068
outokumpu -7.576345373560068
gencorp -7.577557101546615
filing -7.580934810240197
bay -7.581435572669731
dairy -7.581435572669731
transamerica -7.581504067890333
see -7.582278547575254
southwestern -7.584940609210164
required -7.587309679402043
subordinated -7.58951647328301
going -7.59043396005353
valley -7.593177779797925
conversion -7.593177779797925
boosted -7.593339238303638
accrued -7.594304942389196
illinois -7.597086202548868
many -7.597458199828254
chain -7.598762456685437
falling -7.59956887759935
northern -7.600389919328568
double -7.6007606315153975
portion -7.6007606315153975
selling -7.602545390687667
personnel -7.6031447905784875
truck -7.6031447905784875
gpu -7.60898214453198
avery -7.60898214453198
fiber -7.60898214453198
nwa -7.60898214453198
woolworths -7.60898214453198
singapo

lift -8.255473425007368
pro -8.257041223690006
jacobs -8.257041223690006
quantech -8.258392779514999
abn -8.258392779514999
tokheim -8.258392779514999
accuray -8.258392779514999
knutson -8.258392779514999
irs -8.258392779514999
dyr -8.258392779514999
harding -8.258392779514999
robins -8.258392779514999
coleman -8.258392779514999
weatherford -8.258392779514999
nissan -8.258392779514999
polaroid -8.258392779514999
christians -8.258392779514999
stockbroking -8.258392779514999
coleco -8.258392779514999
foresman -8.258392779514999
telxon -8.258392779514999
boddington -8.258392779514999
laurentian -8.258392779514999
spar -8.258392779514999
asea -8.258392779514999
selz -8.258392779514999
furman -8.258392779514999
invacare -8.258392779514999
tofutti -8.258392779514999
laenderbank -8.258392779514999
fruehauf -8.258392779514999
stuttgart -8.258392779514999
sulpetro -8.258392779514999
shing -8.258392779514999
taikoo -8.258392779514999
sheehy -8.258392779514999
money -8.262550591662679
except -8.2

mccrann -8.711445161441985
emerald -8.711445161441985
mohawk -8.711445161441985
initials -8.711445161441985
sport -8.711445161441985
whipple -8.711445161441985
bdm -8.711445161441985
corby -8.711445161441985
kingsport -8.711445161441985
poy -8.711445161441985
hargreaves -8.711445161441985
panin -8.711445161441985
crest -8.711445161441985
lummus -8.711445161441985
widdrington -8.711445161441985
galaxy -8.711445161441985
transponders -8.711445161441985
gevirtz -8.711445161441985
dealers -8.713952160096014
obtaining -8.71634532780413
giant -8.71634532780413
gradually -8.71634532780413
equally -8.71634532780413
electrical -8.71634532780413
discovery -8.71634532780413
proposing -8.71634532780413
handling -8.71634532780413
edward -8.71634532780413
upturn -8.71634532780413
try -8.716977125934775
else -8.722860654913998
turned -8.722860654913998
preference -8.722860654913998
roughly -8.722860654913998
offsetting -8.722860654913998
aggressively -8.722860654913998
proceeding -8.722860654913998
c

depreciating -9.09969473215806
equitorial -9.09969473215806
lessors -9.09969473215806
transponder -9.09969473215806
phlcorp -9.09969473215806
condemnation -9.09969473215806
approach -9.103583405492692
stabilize -9.103583405492692
thus -9.103583405492692
example -9.106978580063355
belgian -9.106978580063355
matter -9.106978580063355
really -9.110311929914525
nothing -9.110311929914525
gap -9.110311929914525
intend -9.113585897015165
bringing -9.113585897015165
commitment -9.119964735963968
leave -9.119964735963968
priced -9.119964735963968
expire -9.119964735963968
setting -9.119964735963968
running -9.123073810018111
appropriate -9.12613192537478
yields -9.12613192537478
suggested -9.129140901431116
seeks -9.129140901431116
talk -9.132102459531351
mexico -9.132102459531351
course -9.132102459531351
remarks -9.137889758339046
decide -9.140718511420493
sent -9.140718511420493
gv -9.142104809365525
genetic -9.142104809365525
hmsb -9.142104809365525
pirie -9.142104809365525
crn -9.14210480

expiring -9.266967689650018
pittsburgh -9.266967689650018
antonio -9.266967689650018
shifting -9.266967689650018
swift -9.266967689650018
totals -9.266967689650018
training -9.266967689650018
materially -9.266967689650018
dismissal -9.266967689650018
procurement -9.266967689650018
congress -9.270588789103495
fidelity -9.27980419871098
accumulating -9.27980419871098
defending -9.27980419871098
pose -9.27980419871098
pesetas -9.27980419871098
borrower -9.27980419871098
reacted -9.27980419871098
hedging -9.27980419871098
comfortable -9.27980419871098
tangible -9.27980419871098
facts -9.27980419871098
viable -9.27980419871098
dept -9.27980419871098
allegations -9.27980419871098
tropical -9.27980419871098
roger -9.27980419871098
denominated -9.27980419871098
conventional -9.27980419871098
offerings -9.27980419871098
plains -9.27980419871098
scenario -9.27980419871098
anticipate -9.27980419871098
transition -9.27980419871098
ample -9.27980419871098
recognized -9.27980419871098
hoc -9.2798041

crna -9.742905177286241
multiples -9.742905177286241
heminghaus -9.742905177286241
delcared -9.742905177286241
marketshare -9.742905177286241
kji -9.742905177286241
estmates -9.742905177286241
roadway -9.742905177286241
plazas -9.742905177286241
dion -9.742905177286241
dionics -9.742905177286241
waxm -9.742905177286241
waxman -9.742905177286241
wtel -9.742905177286241
bkrs -9.742905177286241
fwch -9.742905177286241
cnlg -9.742905177286241
conolog -9.742905177286241
sho -9.742905177286241
starrett -9.742905177286241
mltf -9.742905177286241
convest -9.742905177286241
cep -9.742905177286241
dylex -9.742905177286241
ins -9.742905177286241
callon -9.742905177286241
clnp -9.742905177286241
csba -9.742905177286241
ciri -9.742905177286241
ciro -9.742905177286241
sasco -9.742905177286241
psli -9.742905177286241
awk -9.742905177286241
pw -9.742905177286241
atsushi -9.742905177286241
nsany -9.742905177286241
rationalizations -9.742905177286241
miglio -9.742905177286241
nold -9.742905177286241
nol

eaton -9.742905177286241
vance -9.742905177286241
eavn -9.742905177286241
adx -9.742905177286241
stdl -9.742905177286241
strb -9.742905177286241
strober -9.742905177286241
regulary -9.742905177286241
shawmut -9.742905177286241
shas -9.742905177286241
cjiia -9.742905177286241
cji -9.742905177286241
brenco -9.742905177286241
bren -9.742905177286241
bmtc -9.742905177286241
reorganized -9.742905177286241
dyson -9.742905177286241
mccaughan -9.742905177286241
kubota -9.742905177286241
aluminimum -9.742905177286241
cleansing -9.742905177286241
presupposes -9.742905177286241
alternations -9.742905177286241
previoius -9.742905177286241
aepi -9.742905177286241
aep -9.742905177286241
benficiaries -9.742905177286241
ladder -9.742905177286241
reversions -9.742905177286241
ffws -9.742905177286241
farwest -9.742905177286241
hemo -9.742905177286241
hemotec -9.742905177286241
dryr -9.742905177286241
cream -9.742905177286241
turbo -9.742905177286241
turismo -9.742905177286241
sociedad -9.742905177286241

astrocom -9.742905177286241
pae -9.742905177286241
amsh -9.742905177286241
iinc -9.742905177286241
pti -9.742905177286241
emulator -9.742905177286241
gmtif -9.742905177286241
kiddie -9.742905177286241
kidd -9.742905177286241
telecredit -9.742905177286241
tcrd -9.742905177286241
authroization -9.742905177286241
scc -9.742905177286241
lasr -9.742905177286241
qualifed -9.742905177286241
lesco -9.742905177286241
lsco -9.742905177286241
seis -9.742905177286241
varlen -9.742905177286241
vrln -9.742905177286241
aati -9.742905177286241
primark -9.742905177286241
pmk -9.742905177286241
sax -9.742905177286241
stealing -9.742905177286241
faultered -9.742905177286241
propelled -9.742905177286241
slew -9.742905177286241
unwrapped -9.742905177286241
vax -9.742905177286241
medal -9.742905177286241
games -9.742905177286241
exhibition -9.742905177286241
reichart -9.742905177286241
olympics -9.742905177286241
cmi -9.742905177286241
drh -9.742905177286241
atlantis -9.742905177286241
datc -9.7429051772862

creative -9.812820200815214
midafternoon -9.812820200815214
hia -9.812820200815214
metallgesellschaft -9.812820200815214
midmorning -9.812820200815214
stagnation -9.812820200815214
hydro -9.812820200815214
norsk -9.812820200815214
saga -9.812820200815214
fledgling -9.812820200815214
defenses -9.812820200815214
logical -9.812820200815214
ted -9.812820200815214
meston -9.812820200815214
cch -9.812820200815214
towbin -9.812820200815214
unterberg -9.812820200815214
modulaire -9.812820200815214
modx -9.812820200815214
morton -9.812820200815214
regal -9.812820200815214
delisted -9.812820200815214
cardiovascular -9.812820200815214
hvt -9.812820200815214
vitamins -9.812820200815214
culminating -9.812820200815214
dipping -9.812820200815214
frontier -9.812820200815214
wilf -9.812820200815214
fashion -9.812820200815214
passive -9.812820200815214
implies -9.812820200815214
parity -9.812820200815214
estimating -9.812820200815214
softened -9.812820200815214
geographical -9.812820200815214
cracker -9

closures -9.901348197119974
overly -9.901348197119974
calif -9.901348197119974
hbj -9.901348197119974
consequently -9.901348197119974
funeral -9.901348197119974
words -9.91344763430753
dominated -9.91344763430753
realise -9.91344763430753
alfred -9.91344763430753
harold -9.91344763430753
platform -9.91344763430753
mt -9.91344763430753
renewal -9.91344763430753
locally -9.91344763430753
arguing -9.91344763430753
dampen -9.91344763430753
wales -9.91344763430753
notified -9.91344763430753
representation -9.91344763430753
complement -9.91344763430753
directed -9.91344763430753
withheld -9.91344763430753
adopts -9.91344763430753
recording -9.91344763430753
alabama -9.91344763430753
francisco -9.91344763430753
rated -9.91344763430753
heated -9.91344763430753
reconsider -9.91344763430753
assessment -9.91344763430753
abolish -9.91344763430753
extract -9.91344763430753
fought -9.91344763430753
planters -9.91344763430753
advantages -9.91344763430753
donaldson -9.91344763430753
young -9.913447634

portugal -12.071620829445909
increaes -12.071620829445909
dominican -12.071620829445909
turkey -12.071620829445909
decreases -12.071620829445909
anywhere -12.071620829445909
trillion -12.071620829445909
causing -12.071620829445909
childhood -12.071620829445909
persecution -12.071620829445909
prison -12.071620829445909
freed -12.071620829445909
stunned -12.071620829445909
ivan -12.071620829445909
nazi -12.071620829445909
accuse -12.071620829445909
herstatt -12.071620829445909
jailed -12.071620829445909
prosecution -12.071620829445909
crash -12.071620829445909
paranoia -12.071620829445909
danny -12.071620829445909
trial -12.071620829445909
sentenced -12.071620829445909
collapsed -12.071620829445909
manipulating -12.071620829445909
overturned -12.071620829445909
dattel -12.071620829445909
interval -12.071620829445909
stand -12.071620829445909
crucial -12.071620829445909
combat -12.071620829445909
rubio -12.071620829445909
ramnath -12.071620829445909
luis -12.071620829445909
critical -12.0

entrenched -12.071620829445909
pronounced -12.071620829445909
constructive -12.071620829445909
tolerate -12.071620829445909
leuzzi -12.071620829445909
factored -12.071620829445909
leslie -12.071620829445909
supports -12.071620829445909
leaning -12.071620829445909
reassessing -12.071620829445909
electrics -12.071620829445909
conclusion -12.071620829445909
advisers -12.071620829445909
merging -12.071620829445909
potato -12.071620829445909
biffex -12.071620829445909
baltic -12.071620829445909
amalgamation -12.071620829445909
soya -12.071620829445909
instructed -12.071620829445909
imminent -12.071620829445909
cgct -12.071620829445909
lodged -12.071620829445909
lm -12.071620829445909
telephoniques -12.071620829445909
battling -12.071620829445909
alain -12.071620829445909
ericsson -12.071620829445909
anonyme -12.071620829445909
bouygues -12.071620829445909
matra -12.071620829445909
sat -12.071620829445909
gerard -12.071620829445909
madelin -12.071620829445909
longuet -12.071620829445909
cons

policymaking -12.071620829445909
urgent -12.071620829445909
unravel -12.071620829445909
hasten -12.071620829445909
imply -12.071620829445909
sunday -12.071620829445909
reality -12.071620829445909
reminder -12.071620829445909
realisation -12.071620829445909
signaled -12.071620829445909
explain -12.071620829445909
policymakers -12.071620829445909
tongue -12.071620829445909
everybody -12.071620829445909
soaring -12.071620829445909
sovereignty -12.071620829445909
adhere -12.071620829445909
departing -12.071620829445909
mulford -12.071620829445909
undermined -12.071620829445909
tolerance -12.071620829445909
stimulative -12.071620829445909
unsettled -12.071620829445909
policymaker -12.071620829445909
upset -12.071620829445909
stimulating -12.071620829445909
flouting -12.071620829445909
enviroment -12.071620829445909
alternate -12.071620829445909
conglommerate -12.071620829445909
intrawest -12.071620829445909
exhcnmage -12.071620829445909
intw -12.071620829445909
ubks -12.071620829445909
ssbk

unveiled -12.071620829445909
rattled -12.071620829445909
sendai -12.071620829445909
skills -12.071620829445909
honour -12.071620829445909
kilobit -12.071620829445909
obstacles -12.071620829445909
leads -12.071620829445909
chipmaker -12.071620829445909
revising -12.071620829445909
tyln -12.071620829445909
furnace -12.071620829445909
tylan -12.071620829445909
quel -12.071620829445909
mauritian -12.071620829445909
deodorised -12.071620829445909
rbd -12.071620829445909
stearine -12.071620829445909
bleached -12.071620829445909
lombard -12.071620829445909
overview -12.071620829445909
finely -12.071620829445909
knowledge -12.071620829445909
pains -12.071620829445909
learn -12.071620829445909
draining -12.071620829445909
drained -12.071620829445909
commanded -12.071620829445909
excesses -12.071620829445909
undertone -12.071620829445909
generous -12.071620829445909
fletcher -12.071620829445909
fcl -12.071620829445909
nzfp -12.071620829445909
amcor -12.071620829445909
wattie -12.071620829445909


anxiously -12.071620829445909
expedite -12.071620829445909
contact -12.071620829445909
praising -12.071620829445909
sudan -12.071620829445909
arrive -12.071620829445909
tanzania -12.071620829445909
mauritania -12.071620829445909
colza -12.071620829445909
bangladesh -12.071620829445909
ethiopia -12.071620829445909
illubabor -12.071620829445909
cyclone -12.071620829445909
victims -12.071620829445909
displaced -12.071620829445909
fao -12.071620829445909
wollo -12.071620829445909
vanuatu -12.071620829445909
prat -12.071620829445909
sm -12.071620829445909
receptive -12.071620829445909
hamper -12.071620829445909
initiate -12.071620829445909
rtb -12.071620829445909
seeming -12.071620829445909
pursued -12.071620829445909
trademarks -12.071620829445909
mayp -12.071620829445909
ang -12.071620829445909
edp -12.071620829445909
mep -12.071620829445909
partnners -12.071620829445909
authorisations -12.071620829445909
restitution -12.071620829445909
sgp -12.071620829445909
schering -12.071620829445909

stored -12.071620829445909
efficiently -12.071620829445909
borders -12.071620829445909
adherence -12.071620829445909
geelong -12.071620829445909
cartier -12.071620829445909
segregate -12.071620829445909
submission -12.071620829445909
appendix -12.071620829445909
asks -12.071620829445909
purpose -12.071620829445909
forcefully -12.071620829445909
sympathizing -12.071620829445909
dictates -12.071620829445909
minds -12.071620829445909
withdrawals -12.071620829445909
machakos -12.071620829445909
irrigate -12.071620829445909
nyeri -12.071620829445909
determines -12.071620829445909
kirinyaga -12.071620829445909
embu -12.071620829445909
meru -12.071620829445909
thika -12.071620829445909
majeure -12.071620829445909
pmt -12.071620829445909
cherry -12.071620829445909
plantation -12.071620829445909
arabica -12.071620829445909
monsooned -12.071620829445909
bangalore -12.071620829445909
robusta -12.071620829445909
bbb -12.071620829445909
liqudity -12.071620829445909
rbi -12.071620829445909
malhotra 

hgic -12.071620829445909
harleysville -12.071620829445909
kr -12.071620829445909
kroger -12.071620829445909
metalicos -12.071620829445909
rivalling -12.071620829445909
duffour -12.071620829445909
assocation -12.071620829445909
associes -12.071620829445909
carburos -12.071620829445909
csac -12.071620829445909
rvial -12.071620829445909
igon -12.071620829445909
reconfirm -12.071620829445909
consumes -12.071620829445909
malt -12.071620829445909
opc -12.071620829445909
sustainable -12.071620829445909
gobert -12.071620829445909
pgrt -12.071620829445909
robust -12.071620829445909
cushion -12.071620829445909
txc -12.071620829445909
peters -12.071620829445909
maciej -12.071620829445909
plexman -12.071620829445909
brake -12.071620829445909
sooner -12.071620829445909
jam -12.071620829445909
yardstick -12.071620829445909
horse -12.071620829445909
misconception -12.071620829445909
plug -12.071620829445909
waited -12.071620829445909
corridors -12.071620829445909
whilst -12.071620829445909
politics -

distortion -12.071620829445909
tackles -12.071620829445909
complements -12.071620829445909
measuring -12.071620829445909
matures -12.071620829445909
array -12.071620829445909
collaterals -12.071620829445909
counterparty -12.071620829445909
assignment -12.071620829445909
avaition -12.071620829445909
okay -12.071620829445909
relatives -12.071620829445909
commerice -12.071620829445909
ceding -12.071620829445909
hyo -12.071620829445909
melted -12.071620829445909
shoots -12.071620829445909
exposed -12.071620829445909
teresa -12.071620829445909
tomaszewska -12.071620829445909
meteorology -12.071620829445909
spells -12.071620829445909
truk -12.071620829445909
intransigence -12.071620829445909
bracing -12.071620829445909
incorporating -12.071620829445909
softening -12.071620829445909
manuevering -12.071620829445909
melville -12.071620829445909
xovr -12.071620829445909
exovir -12.071620829445909
torrential -12.071620829445909
sunflowers -12.071620829445909
heaviest -12.071620829445909
paralysed

stagnates -12.071620829445909
underlines -12.071620829445909
approached -12.071620829445909
legislate -12.071620829445909
linkages -12.071620829445909
poses -12.071620829445909
equalizer -12.071620829445909
waiver -12.071620829445909
arnott -12.071620829445909
flaws -12.071620829445909
dubious -12.071620829445909
punta -12.071620829445909
este -12.071620829445909
contradicted -12.071620829445909
continually -12.071620829445909
reviews -12.071620829445909
sheikh -12.071620829445909
fernando -12.071620829445909
suppose -12.071620829445909
baseless -12.071620829445909
rumour -12.071620829445909
alvite -12.071620829445909
clever -12.071620829445909
staffers -12.071620829445909
proponents -12.071620829445909
arland -12.071620829445909
foley -12.071620829445909
stenholm -12.071620829445909
moos -12.071620829445909
substitution -12.071620829445909
crossroads -12.071620829445909
assesses -12.071620829445909
pertains -12.071620829445909
exerted -12.071620829445909
devalue -12.071620829445909
br

skbs -12.071620829445909
smbs -12.071620829445909
persuaded -12.071620829445909
combatting -12.071620829445909
duesseldorf -12.071620829445909
pointer -12.071620829445909
symposium -12.071620829445909
overshooting -12.071620829445909
respark -12.071620829445909
realised -12.071620829445909
responsiveness -12.071620829445909
mountains -12.071620829445909
twicce -12.071620829445909
pocono -12.071620829445909
cjn -12.071620829445909
honeymoon -12.071620829445909
maximizing -12.071620829445909
pwj -12.071620829445909
mitigated -12.071620829445909
nni -12.071620829445909
soyfood -12.071620829445909
pest -12.071620829445909
remarked -12.071620829445909
schlecht -12.071620829445909
tietmeyer -12.071620829445909
uganda -12.071620829445909
salaam -12.071620829445909
dar -12.071620829445909
bomani -12.071620829445909
rebel -12.071620829445909
feburary -12.071620829445909
ugandan -12.071620829445909
makumbi -12.071620829445909
sanpao -12.071620829445909
issuable -12.071620829445909
rothschilds -1

fluid -12.071620829445909
valorem -12.071620829445909
purified -12.071620829445909
posti -12.071620829445909
nicolas -12.071620829445909
datel -12.071620829445909
tagaris -12.071620829445909
paccar -12.071620829445909
pcar -12.071620829445909
psg -12.071620829445909
controllers -12.071620829445909
seniority -12.071620829445909
dockworkers -12.071620829445909
lugar -12.071620829445909
enrollment -12.071620829445909
predominate -12.071620829445909
enroll -12.071620829445909
signups -12.071620829445909
pittston -12.071620829445909
armendariz -12.071620829445909
figueras -12.071620829445909
despatch -12.071620829445909
balked -12.071620829445909
overwhelming -12.071620829445909
conant -12.071620829445909
erection -12.071620829445909
authors -12.071620829445909
stories -12.071620829445909
obolensky -12.071620829445909
deceased -12.071620829445909
fpa -12.071620829445909
carlin -12.071620829445909
sulfide -12.071620829445909
goldstrike -12.071620829445909
mineralization -12.071620829445909
f

bipartisan -12.071620829445909
disagreements -12.071620829445909
belfast -12.071620829445909
bryson -12.071620829445909
communicate -12.071620829445909
outsanding -12.071620829445909
acquisititon -12.071620829445909
torino -12.071620829445909
paolo -12.071620829445909
rescind -12.071620829445909
associaiton -12.071620829445909
ramirez -12.071620829445909
portend -12.071620829445909
bodes -12.071620829445909
maria -12.071620829445909
stikers -12.071620829445909
hints -12.071620829445909
automakers -12.071620829445909
disparity -12.071620829445909
strengths -12.071620829445909
resevoir -12.071620829445909
mepsi -12.071620829445909
riskier -12.071620829445909
receding -12.071620829445909
extricate -12.071620829445909
mindscsape -12.071620829445909
mindscape -12.071620829445909
cbs -12.071620829445909
sweeten -12.071620829445909
confidentiality -12.071620829445909
fenner -12.071620829445909
vying -12.071620829445909
dedham -12.071620829445909
pierce -12.071620829445909
hollister -12.071620

ingenuity -12.071620829445909
minsiter -12.071620829445909
oceanic -12.071620829445909
newsagent -12.071620829445909
finlays -12.071620829445909
casualties -12.071620829445909
pivot -12.071620829445909
gunboats -12.071620829445909
gunboat -12.071620829445909
ablaze -12.071620829445909
nationality -12.071620829445909
heidweiler -12.071620829445909
voltage -12.071620829445909
afobakka -12.071620829445909
desi -12.071620829445909
bouterse -12.071620829445909
billiton -12.071620829445909
guerilla -12.071620829445909
henk -12.071620829445909
stalled -12.071620829445909
initiation -12.071620829445909
tulare -12.071620829445909
cattani -12.071620829445909
schuman -12.071620829445909
marketer -12.071620829445909
cmmc -12.071620829445909
sid -12.071620829445909
watchmaking -12.071620829445909
watches -12.071620829445909
timex -12.071620829445909
tissot -12.071620829445909
wavehill -12.071620829445909
purchse -12.071620829445909
lacklustre -12.071620829445909
suzaki -12.071620829445909
uneasy -1

puree -12.071620829445909
hides -12.071620829445909
sholes -12.071620829445909
mats -12.071620829445909
inexact -12.071620829445909
lively -12.071620829445909
barrett -12.071620829445909
joensson -12.071620829445909
volatily -12.071620829445909
apiece -12.071620829445909
traced -12.071620829445909
bachem -12.071620829445909
variant -12.071620829445909
maxcom -12.071620829445909
taxpayer -12.071620829445909
nonwithheld -12.071620829445909
nondurables -12.071620829445909
nontax -12.071620829445909
sytem -12.071620829445909
panamax -12.071620829445909
bunker -12.071620829445909
timechartering -12.071620829445909
resonance -12.071620829445909
mediq -12.071620829445909
corbett -12.071620829445909
jeoffrey -12.071620829445909
greyerz -12.071620829445909
budd -12.071620829445909
amtech -12.071620829445909
substaintial -12.071620829445909
frota -12.071620829445909
petroleiros -12.071620829445909
fronape -12.071620829445909
cadenas -12.071620829445909
nonoc -12.071620829445909
dbp -12.071620829

kaysersberg -12.071620829445909
ardeshir -12.071620829445909
rubble -12.071620829445909
nowruz -12.071620829445909
raided -12.071620829445909
platforms -12.071620829445909
injects -12.071620829445909
lebegue -12.071620829445909
les -12.071620829445909
echos -12.071620829445909
licencing -12.071620829445909
fallows -12.071620829445909
paddocks -12.071620829445909
awf -12.071620829445909
pulses -12.071620829445909
quarries -12.071620829445909
shb -12.071620829445909
faq -12.071620829445909
bremen -12.071620829445909
unwashed -12.071620829445909
excelso -12.071620829445909
shg -12.071620829445909
colombians -12.071620829445909
fnc -12.071620829445909
unq -12.071620829445909
quals -12.071620829445909
avaj -12.071620829445909
consignments -12.071620829445909
recycle -12.071620829445909
unofficially -12.071620829445909
overtaken -12.071620829445909
tanga -12.071620829445909
tanzanian -12.071620829445909
bottleneck -12.071620829445909
abstain -12.071620829445909
bradstreet -12.071620829445909

insterest -12.071620829445909
exasperating -12.071620829445909
bamberger -12.071620829445909
formulated -12.071620829445909
angl -12.071620829445909
auergesellschaft -12.071620829445909
boart -12.071620829445909
assn -12.071620829445909
precipitators -12.071620829445909
elex -12.071620829445909
electrostatic -12.071620829445909
shouls -12.071620829445909
secrets -12.071620829445909
wrongdoing -12.071620829445909
impede -12.071620829445909
predator -12.071620829445909
pannzoil -12.071620829445909
nonsubsidiary -12.071620829445909
unkonwn -12.071620829445909
porcine -12.071620829445909
somatotropin -12.071620829445909
ibs -12.071620829445909
painstaking -12.071620829445909
kadoorie -12.071620829445909
lau -12.071620829445909
redevelopment -12.071620829445909
lai -12.071620829445909
coincidence -12.071620829445909
evergo -12.071620829445909
liang -12.071620829445909
styles -12.071620829445909
countermoves -12.071620829445909
levelled -12.071620829445909
ealier -12.071620829445909
marlboro

In [133]:
    prior_probs_normalized = {label: np.log(prior_probs[label]) for label in prior_probs.keys()}
    normalize = np.sqrt(sum([prior_probs_normalized[label]**2 for label in prior_probs.keys()]))
    for label in prior_probs_normalized.keys():
        prior_probs_normalized[label] /= normalize
    for label, score in sorted(prior_probs.items(), key=itemgetter(1), reverse=True):
        print(label, score)


earn 0.37031793023555154
acq 0.2123825460162183
money-fx 0.0692495816707427
grain 0.055734328742437896
crude 0.050070794182005406
trade 0.04736774359634444
interest 0.04466469301068349
wheat 0.02728793924572017
ship 0.025357188827390912
corn 0.023426438409061657
money-supply 0.018020337237739735
dlr 0.01686188698674218
sugar 0.016218303513965762
oilseed 0.015960870124855194
coffee 0.014287553095636504
gnp 0.013000386150083665
gold 0.012099369288196679
veg-oil 0.011198352426309692
soybean 0.010039902175312138
livestock 0.009653752091646286
nat-gas 0.009653752091646286
bop 0.009653752091646286
cpi 0.008881451924314583
cocoa 0.00707941820054061
reserves 0.00707941820054061
carcass 0.0064358347277641914
copper 0.006049684644098339
jobs 0.005920967949543055
yen 0.005792251254987772
ipi 0.005277384476766637
iron-steel 0.005148667782211353
cotton 0.005019951087656069
barley 0.004762517698545501
rubber 0.004762517698545501
gas 0.004762517698545501
rice 0.004505084309434934
alum 0.0045050843094

In [164]:
    # Removing the stemmer actually improves accuracy on test set, who knew
    successes, earned, bottom_5,i = 0, 0, 0, 0
    dir_path = "C:\\Users\\ksing\\OneDrive\\Documents\\Text Classifiers\\test"
    for file in os.listdir(dir_path):
        filepath = dir_path + '\\' + file 
        num = int(file[0:len(file) - 4])
        text = vectorize_text(stop_words, valid_words, filepath)
        computed_labels = complement_naive_bayes(complement_probs, idf, text, prior_probs)
        # computed_labels = multinomial_naive_bayes(conditional_probs, idf, text, prior_probs)
        # computed_labels = weight_normalized_cnb(complement_probs_normalized, idf, text, 
        #                                        prior_probs_normalized)
        suc, e, b5 = bayes_accuracy_model(num, number_labels_test, computed_labels)
        # Even with using conditional_probs, earn appears in 1773/3019 samples
        
        # CNB brought earn labels down to 1170/3019, which is the best improvement so far
        
        # The slower the denominator function grows, the less we see bottom 5 labels appear in the top 5
        # This occurs (I think) because labels with less docs = larger denominator term = smaller number inside log
        # = more negative logarithm output = (freq * compl_prob[label][word]) is disproportionately smaller for smaller
        # classes. This function returns an argmin, which means that super negative terms are more likely to float to the
        # top, like the bottom 5 labels
                
        # Multinomial Naive Bayes: 84.09% (2538.627561327562) accuracy on test set (????), 1773 "Earn" labels
        # Complement Naive Bayes: 85.09% (2568.913203463204) accuracy on test set, 1687 "Earn" labels
        # Weight Normalized CNB w/ TF-IDF transformation: 76.35% (2304.986291486292), 2131 "Earn" labels
        # CNB with IDF transformation: 86.76% (2619.324711399711), 1321 "Earn" labels
        # MNB with IDF transformation: 84.87% (2562.234704184704), 1521 "Earn" labels
        successes += suc
        earned += e
        bottom_5 += b5
        i += 1
    print(successes, earned, bottom_5, i)

14829
['nat-gas', 'crude'] [('trade', 3809.5237721040353), ('crude', 3799.688110476157), ('nat-gas', 3796.888901985003), ('cpi', 3787.4829008945862), ('dlr', 3787.286859407693), ('gas', 3786.1541020851596), ('yen', 3785.5895469365114), ('wpi', 3784.782326914773), ('ipi', 3784.631392072648), ('iron-steel', 3784.6098626390303)]
14832
['rubber', 'tin', 'sugar', 'corn', 'rice', 'grain', 'trade'] [('bop', 3141.7369846836996), ('trade', 3141.513965991165), ('gold', 3132.6733956700455), ('rubber', 3129.0116316031617), ('sugar', 3128.798603100639), ('cpi', 3128.187600965676), ('tin', 3127.1308440509274), ('rice', 3124.6725674306904), ('money-supply', 3124.651140465423), ('gnp', 3124.4292380122615)]
14840
['rubber', 'coffee', 'lumber', 'palm-oil', 'veg-oil'] [('coffee', 9195.869643891803), ('rubber', 9182.09851960458), ('veg-oil', 9163.950509516559), ('palm-oil', 9157.72687406301), ('sugar', 9135.831611891273), ('cocoa', 9134.949254925006), ('gold', 9134.752874379348), ('tea', 9133.38594224139)

15156
['acq'] [('earn', 4776.2382526951515), ('acq', 4719.8247457531925), ('pet-chem', 4707.1530803083415), ('rapeseed', 4686.260858721682), ('cotton', 4685.909253710422), ('groundnut', 4685.353546101823), ('rice', 4685.352750274246), ('sunseed', 4685.214284355345), ('tea', 4685.088976678031), ('meal-feed', 4684.745176633838)]
15194
['earn'] [('gold', 15116.01466787316), ('gnp', 15111.462208718522), ('alum', 15108.070060841303), ('tea', 15107.816073486116), ('pet-chem', 15107.639711855672), ('orange', 15107.414677568402), ('palm-oil', 15107.112582967075), ('barley', 15106.32415183324), ('rubber', 15105.795317980657), ('gas', 15105.730260496153)]
15206
['rice'] [('trade', 5958.065307320856), ('grain', 5894.024092369876), ('wheat', 5892.313222698207), ('livestock', 5888.498082798502), ('carcass', 5886.484922138455), ('oilseed', 5885.506535243285), ('soybean', 5883.635999286738), ('ship', 5883.491249164171), ('cotton', 5883.180625645007), ('coffee', 5882.949863300462)]
15212
['interest', 

15454
['yen'] [('money-fx', 8416.467547371993), ('dlr', 8332.793620173856), ('yen', 8305.451382367319), ('interest', 8301.813715578732), ('gnp', 8295.99318665049), ('ship', 8295.665459614369), ('retail', 8289.973104216151), ('jobs', 8289.550471518087), ('bop', 8289.209538958981), ('gold', 8288.986838352243)]
15455
['yen'] [('money-fx', 4252.319691878596), ('dlr', 4217.015074311975), ('trade', 4212.290352767114), ('yen', 4208.962543398838), ('jobs', 4207.33314724106), ('gnp', 4203.722804043825), ('ipi', 4199.1868221811155), ('interest', 4198.209117886925), ('tin', 4196.8197038861), ('rubber', 4196.523312637236)]
15478
['earn'] [('money-supply', 1965.5891321896893), ('earn', 1963.1387912981913), ('jobs', 1961.4336896490906), ('soybean', 1960.8837125980049), ('dlr', 1960.817418111937), ('veg-oil', 1960.7515949396122), ('bop', 1960.6959881217567), ('nat-gas', 1960.260277068535), ('gold', 1960.1550710551414), ('acq', 1960.1469950219316)]
15483
['yen'] [('money-fx', 2182.7026125467505), ('dl

15643
['cotton'] [('grain', 2750.892651461182), ('coffee', 2743.4651479814174), ('corn', 2743.3736769165853), ('ship', 2742.128160252681), ('cotton', 2740.5655930331754), ('wheat', 2739.3129071013304), ('oilseed', 2739.24110167575), ('soybean', 2737.9807357866807), ('gas', 2735.920729464796), ('rice', 2735.4390873469124)]
15646
['bop', 'trade'] [('trade', 754.3781491576297), ('money-fx', 754.2379093265041), ('dlr', 742.9803454710901), ('bop', 742.7722735082909), ('interest', 742.3622513663703), ('gnp', 742.2056244525226), ('money-supply', 740.4737314192438), ('yen', 739.3619015890022), ('alum', 738.9266525088998), ('reserves', 738.8585414457841)]
15648
['grain', 'corn'] [('grain', 537.6452148015941), ('sugar', 537.6058097639453), ('corn', 535.5899716858202), ('wheat', 531.7461055666479), ('barley', 529.4436616878337), ('oilseed', 528.5639688059454), ('trade', 528.4924118476785), ('veg-oil', 528.0670287216607), ('soybean', 527.5771799427308), ('coffee', 527.285436842188)]
15649
['barley

15869
['wheat', 'grain'] [('grain', 963.8518563160656), ('earn', 962.0462898972609), ('wheat', 958.4754803870482), ('corn', 953.3524739330304), ('oilseed', 950.8323145673718), ('soybean', 949.277755480726), ('sugar', 949.2632671972841), ('barley', 947.5953108340967), ('veg-oil', 947.4832286408998), ('rice', 947.4080829730932)]
15870
['wheat', 'grain'] [('earn', 1315.8893919210318), ('grain', 1307.4957392878214), ('wheat', 1300.4442775344094), ('corn', 1297.4485463296462), ('oilseed', 1294.119554504911), ('soybean', 1292.4510289904672), ('sugar', 1291.8255728879478), ('sorghum', 1290.6982870639522), ('barley', 1290.6371498185063), ('veg-oil', 1290.3876036093986)]
15871
['rice', 'oat', 'barley', 'cotton-oil', 'sorghum', 'cotton', 'soy-oil', 'veg-oil', 'meal-feed', 'soy-meal', 'soybean', 'oilseed', 'corn', 'grain'] [('veg-oil', 29774.446705928487), ('sunseed', 29734.046421573035), ('soybean', 29728.318153575176), ('soy-oil', 29725.270076580826), ('sun-oil', 29724.763275864505), ('wheat', 

16040
['crude', 'ship'] [('earn', -0.9933933713988728), ('acq', -1.5493661680557944), ('money-fx', -2.6700381747886364), ('grain', -2.8871590069479307), ('crude', -2.9943173913319745), ('trade', -3.04981379678149), ('interest', -3.1085719550035615), ('wheat', -3.6013104602784085), ('ship', -3.6746930062124323), ('corn', -3.7538900478736252)]
16052
['trade'] [('bop', 1434.3739909866163), ('trade', 1432.5771648677073), ('copper', 1427.9767323973456), ('cpi', 1421.4568234245464), ('meal-feed', 1420.6043673808454), ('money-supply', 1420.443643498293), ('reserves', 1419.3471060251131), ('ipi', 1419.2537517768133), ('gnp', 1418.8610755314023), ('jobs', 1418.8164418245667)]
16053
['dlr', 'money-fx', 'interest'] [('interest', 14599.603741335446), ('money-supply', 14583.477341048314), ('money-fx', 14578.85072061256), ('cpi', 14519.133068617008), ('dlr', 14511.97673256821), ('retail', 14509.250161790556), ('housing', 14506.82517030184), ('jobs', 14505.376739322019), ('copper', 14505.067131518395

16171
['gnp'] [('trade', 9989.399223381837), ('gnp', 9986.976718650292), ('money-fx', 9954.663075095837), ('reserves', 9947.29537153353), ('interest', 9939.917237956004), ('veg-oil', 9936.872325463839), ('jobs', 9936.338965105904), ('bop', 9936.037190939778), ('tin', 9935.310264653755), ('iron-steel', 9935.234988404136)]
16195
['wpi'] [('interest', 5021.629757210177), ('bop', 5018.217826636051), ('money-supply', 5016.466200583451), ('ipi', 5015.617409360007), ('cpi', 5015.151873299279), ('gnp', 5014.502098994687), ('wpi', 5012.5212442946795), ('jobs', 5010.990548392527), ('retail', 5010.7379215220135), ('gas', 5010.289834186131)]
16196
['gnp'] [('iron-steel', 4681.867616511491), ('money-fx', 4667.6965716613995), ('gnp', 4664.234623538451), ('interest', 4652.1668356724385), ('trade', 4649.576580381965), ('cotton', 4647.540302197313), ('reserves', 4645.895129589533), ('cocoa', 4644.257949490475), ('cpi', 4644.20307550289), ('bop', 4644.149999793864)]
16200
['dlr'] [('money-fx', 17835.806

16519
['livestock', 'acq'] [('ship', 2118.305958805359), ('acq', 2113.620275572454), ('livestock', 2105.4130807611937), ('carcass', 2101.2094718322746), ('jobs', 2088.808008455739), ('yen', 2088.2595863214733), ('pet-chem', 2088.1934802725063), ('veg-oil', 2087.9959488056256), ('gold', 2087.7217948572998), ('hog', 2087.5437697619536)]
16588
['earn'] [('acq', 4450.040938617536), ('rand', 4360.700407055462), ('hog', 4359.889682526407), ('livestock', 4359.7846968080285), ('gnp', 4359.518532121251), ('yen', 4359.275564768782), ('rice', 4359.249547309703), ('copper', 4359.201231746055), ('jobs', 4359.095467401198), ('alum', 4359.083647189634)]
16600
['earn'] [('acq', 5544.530035839394), ('earn', 5531.767343879053), ('retail', 5456.932472049303), ('ipi', 5455.459511344368), ('jobs', 5454.027977436595), ('lei', 5453.996187245347), ('housing', 5453.631525942453), ('income', 5453.119591497225), ('gnp', 5453.118167254431), ('cpi', 5453.056790403312)]
16601
['rice', 'grain'] [('grain', 1847.65236

17470
['interest'] [('money-fx', 4249.981294304175), ('interest', 4240.562914292725), ('money-supply', 4219.196269983013), ('crude', 4217.70321067707), ('ship', 4214.134406386519), ('palm-oil', 4212.05344378703), ('veg-oil', 4210.620404541995), ('reserves', 4209.37407587211), ('dlr', 4208.359583234573), ('alum', 4206.347043084825)]
17473
['fuel'] [('grain', 1422.6771628712397), ('wheat', 1418.7898571388318), ('veg-oil', 1415.7134149772314), ('sugar', 1414.7191980082537), ('ship', 1414.5963183948904), ('corn', 1413.8405492378888), ('livestock', 1412.9951434848438), ('copper', 1412.5562518056645), ('gas', 1412.5117645155253), ('gold', 1412.289543507499)]
17477
['coconut-oil', 'palm-oil', 'meal-feed', 'copra-cake', 'coconut', 'veg-oil'] [('oilseed', 17140.361911361062), ('coconut', 17126.864914072972), ('veg-oil', 17089.106879370036), ('meal-feed', 17060.805555827716), ('palm-oil', 17058.82975414998), ('coconut-oil', 17057.393369896155), ('soybean', 17055.537327049493), ('copra-cake', 170

17827
['hog', 'acq'] [('acq', 1400.1685611775883), ('livestock', 1372.9865963506677), ('hog', 1370.3298461864372), ('gold', 1361.5274659425788), ('carcass', 1361.5083886334307), ('nat-gas', 1360.978668601218), ('iron-steel', 1360.6733971392), ('cpi', 1360.6516358270192), ('soybean', 1360.651045641019), ('copper', 1360.638715562659)]
17829
['iron-steel'] [('earn', 2529.6149827060926), ('iron-steel', 2511.6777273954967), ('money-supply', 2499.628566911191), ('coffee', 2498.1202202865616), ('carcass', 2497.6046225307564), ('livestock', 2497.454828388145), ('reserves', 2496.653017731823), ('rubber', 2496.434034420785), ('copper', 2496.4163124445713), ('ship', 2496.1659664871745)]
17870
['alum'] [('interest', 5708.390687708323), ('alum', 5683.709490827159), ('crude', 5663.822475994771), ('iron-steel', 5662.569409055815), ('pet-chem', 5660.289674498595), ('cpi', 5656.31174099824), ('dlr', 5655.944952742646), ('reserves', 5654.957378815355), ('yen', 5654.91166397142), ('tin', 5654.33484543834

18095
['gnp'] [('money-fx', 7501.221061247316), ('trade', 7497.216693885389), ('gnp', 7478.300542600834), ('dlr', 7470.148795435583), ('interest', 7463.5100847565145), ('money-supply', 7462.290804274406), ('veg-oil', 7461.700014817249), ('cpi', 7460.100594787808), ('jobs', 7459.152631318094), ('yen', 7458.850820804138)]
18099
['carcass'] [('livestock', 6872.841593242436), ('carcass', 6825.072644126314), ('grain', 6818.412520654509), ('veg-oil', 6814.808450883307), ('rice', 6810.141252118004), ('wheat', 6809.355519161578), ('sun-oil', 6807.971811312018), ('palm-oil', 6807.941141008901), ('cotton', 6807.491818557727), ('rubber', 6807.109599564311)]
18106
['interest'] [('trade', 629.4739152398763), ('crude', 613.7204433749396), ('interest', 613.4483961399641), ('gas', 613.1242676323576), ('ship', 610.9246589682036), ('money-fx', 610.3655472962058), ('money-supply', 609.8985080426849), ('cpi', 609.7188847973509), ('sugar', 609.5512395620783), ('dlr', 609.5131888900823)]
18136
['dmk', 'mone

18480
['fuel', 'naphtha', 'gas'] [('gas', 3578.2103358398626), ('fuel', 3510.159301930835), ('crude', 3507.924009018486), ('heat', 3497.63789253557), ('jet', 3495.267425918486), ('naphtha', 3495.094221885696), ('nat-gas', 3491.922982657236), ('pet-chem', 3491.7501572165575), ('veg-oil', 3484.836327672063), ('palm-oil', 3483.8835082399487)]
18482
['rapeseed', 'soy-meal', 'oilseed', 'sorghum', 'corn', 'grain', 'carcass', 'livestock', 'meal-feed'] [('corn', 4895.632285858497), ('grain', 4893.382145065519), ('oilseed', 4891.735736533094), ('meal-feed', 4891.28826112846), ('soybean', 4888.018479839749), ('sorghum', 4884.270392583019), ('livestock', 4881.271517225087), ('carcass', 4878.279532168757), ('rapeseed', 4875.341695293957), ('soy-meal', 4874.681867156926)]
18489
['platinum', 'silver'] [('gold', 3168.3727659535543), ('silver', 3077.987017988997), ('copper', 3071.5595388294555), ('platinum', 3068.18980083538), ('nickel', 3067.2880910274926), ('zinc', 3067.0363538939587), ('bop', 3064.

18911
['housing', 'income', 'gnp'] [('gnp', 7117.890914192965), ('cpi', 7090.405679017437), ('retail', 7082.305010027133), ('housing', 7080.403939474895), ('ipi', 7079.883536530397), ('jobs', 7078.896918096489), ('income', 7078.721714369639), ('money-supply', 7078.683864965675), ('bop', 7077.247766933797), ('coffee', 7075.72153694008)]
18922
['acq'] [('earn', 1943.1061834539903), ('acq', 1937.4801534857406), ('jobs', 1908.216635276044), ('gold', 1907.9345077666824), ('copper', 1907.70187811581), ('cpi', 1907.6193152424073), ('barley', 1907.5248733071019), ('ship', 1907.3891273047536), ('alum', 1907.358997263104), ('gas', 1907.2559882493397)]
18932
['nat-gas'] [('acq', 1533.0881401286792), ('nat-gas', 1472.9514107886037), ('crude', 1470.3045617062094), ('gold', 1463.8335374497362), ('alum', 1463.7270163200492), ('livestock', 1462.5924158618234), ('rubber', 1462.4535180639073), ('yen', 1462.1673397722946), ('copper', 1462.1027210464738), ('gas', 1461.9715119070675)]
18943
['zinc'] [('alu

19271
['nat-gas', 'acq'] [('acq', 1402.3127882973245), ('crude', 1370.4201209281696), ('nat-gas', 1367.786089800869), ('gold', 1361.5020319730613), ('lumber', 1360.2669192117207), ('money-fx', 1358.5643132843004), ('livestock', 1358.3253836094793), ('alum', 1358.0799417088326), ('copper', 1358.0763233003618), ('ship', 1357.9529805939778)]
19275
['barley', 'oat', 'wheat', 'grain'] [('grain', 3500.7069342588275), ('wheat', 3485.6976630064946), ('corn', 3472.336718684602), ('rice', 3465.50849946498), ('cotton', 3464.575925089597), ('oilseed', 3464.2421695735816), ('sorghum', 3463.523569920561), ('barley', 3463.2006504274214), ('iron-steel', 3462.4818546856727), ('soybean', 3462.0275360028986)]
19295
['platinum'] [('gold', 5537.051168658488), ('trade', 5507.811954387778), ('platinum', 5501.491112443411), ('silver', 5497.737552981927), ('copper', 5496.84632660548), ('nickel', 5495.389098622846), ('palladium', 5494.199721502618), ('orange', 5493.784249161697), ('tea', 5493.466328135495), ('z

19692
['zinc'] [('silver', 1515.9292806688434), ('gold', 1512.2533104810252), ('alum', 1504.8181995137686), ('copper', 1503.0671762401093), ('iron-steel', 1503.0598426478823), ('zinc', 1500.8001436685604), ('coffee', 1499.3341311604588), ('lead', 1497.204915189473), ('sugar', 1496.6793208340785), ('rice', 1495.7174297309941)]
19721
['soybean', 'oilseed', 'corn', 'grain'] [('grain', 7268.188839185656), ('wheat', 7241.192374240375), ('corn', 7226.965235565442), ('livestock', 7214.547903264967), ('rice', 7213.325161683091), ('oilseed', 7211.065415636776), ('soybean', 7208.629551997533), ('cotton', 7205.699672106374), ('sorghum', 7205.6814828211445), ('nat-gas', 7205.113979348982)]
19756
['crude', 'ship'] [('trade', 2548.7872993717806), ('ship', 2544.940903713207), ('crude', 2540.4551582615045), ('gas', 2514.4706764039265), ('wheat', 2513.149554087752), ('grain', 2511.3316841507126), ('cotton', 2510.593841117623), ('nat-gas', 2510.493614127799), ('rice', 2510.413197694506), ('carcass', 251

20208
['rapeseed', 'sunseed', 'soybean', 'oilseed', 'soy-meal', 'meal-feed'] [('meal-feed', 3936.7801147928744), ('oilseed', 3929.5783419198033), ('soy-meal', 3929.3716116274054), ('grain', 3928.1166853865748), ('corn', 3925.4376335284833), ('rapeseed', 3923.323794111387), ('soybean', 3922.338789613094), ('veg-oil', 3921.9902953599776), ('wheat', 3920.1896513938623), ('soy-oil', 3918.8716694396785)]
20232
['veg-oil'] [('grain', 7055.147187662047), ('trade', 7048.894220386431), ('wheat', 7048.391375060572), ('coffee', 7037.414977023826), ('veg-oil', 7031.905915567783), ('iron-steel', 7029.720324301175), ('carcass', 7028.238869808118), ('livestock', 7027.819003881146), ('soybean', 7027.629401188394), ('tea', 7027.57603288062)]
20248
['jobs', 'trade'] [('earn', -0.9933933713988728), ('acq', -1.5493661680557944), ('money-fx', -2.6700381747886364), ('grain', -2.8871590069479307), ('crude', -2.9943173913319745), ('trade', -3.04981379678149), ('interest', -3.1085719550035615), ('wheat', -3.60

20756
['ship', 'crude'] [('trade', 5033.911245507136), ('ship', 5029.968244465586), ('crude', 5027.641667015792), ('cpi', 4985.779520222908), ('livestock', 4985.139301301223), ('potato', 4985.122866555444), ('carcass', 4983.540448634752), ('veg-oil', 4983.37863302055), ('copper', 4982.924733016364), ('dlr', 4982.765743491959)]
20764
['interest', 'money-fx'] [('money-fx', 18580.137054525723), ('trade', 18509.390600083156), ('dlr', 18493.287156198312), ('interest', 18476.297877973422), ('yen', 18468.639172085637), ('gnp', 18465.564751210688), ('rubber', 18462.68052344648), ('ship', 18462.246559575015), ('cpi', 18461.602903032508), ('copper', 18460.688707550245)]
20769
['interest'] [('trade', 2987.0604515217788), ('interest', 2986.2981515886604), ('money-fx', 2977.2569179770494), ('ship', 2969.8773418668657), ('dlr', 2968.958873578102), ('gnp', 2966.972026507236), ('yen', 2966.298413404098), ('retail', 2965.5630597684103), ('money-supply', 2965.531473033307), ('ipi', 2965.4581536224096)]


21525
['bop', 'jobs', 'gnp'] [('gnp', 7585.302934123445), ('veg-oil', 7567.113509245884), ('palm-oil', 7563.372278924951), ('jobs', 7562.75202703305), ('trade', 7561.706059284294), ('cocoa', 7561.046196599715), ('tin', 7560.423991715657), ('rubber', 7560.211836754458), ('bop', 7556.869622653968), ('cpi', 7553.768297513602)]
21530
['cotton', 'grain'] [('grain', 3464.5819406764926), ('corn', 3446.678474504291), ('wheat', 3444.3995388881094), ('sugar', 3444.388744735844), ('oilseed', 3443.3412856034315), ('cotton', 3441.493524668613), ('soybean', 3441.3433525300384), ('bop', 3438.845195744677), ('sorghum', 3438.421275407009), ('sunseed', 3437.7374709640503)]
21532
['cpi'] [('money-fx', 5155.557497973888), ('jobs', 5144.50447546359), ('cpi', 5131.8436941823165), ('gnp', 5130.46029961055), ('money-supply', 5123.802452331231), ('ship', 5123.375410224577), ('coconut', 5122.082107255733), ('ipi', 5121.654667787195), ('nickel', 5120.538188473497), ('interest', 5119.9316448431955)]
21535
['rand'