In [19]:
import pandas as pd
import numpy as np
import gensim
import re
import os
import logging
from h2o.estimators.word2vec import H2OWord2vecEstimator
from h2o.estimators import H2OGradientBoostingEstimator
from h2o.estimators import H2ORandomForestEstimator
import h2o
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/toprak.ucar/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
csv_files = [pos_csv for pos_csv in os.listdir("data/") if pos_csv.endswith('.csv')]
print(csv_files)
df = pd.DataFrame()

for file in csv_files:
    df = df.append(pd.read_csv("data/" + file))

['articles1.csv', 'articles3.csv', 'articles2.csv']


In [84]:
df.columns

Index(['Unnamed: 0', 'id', 'title', 'publication', 'content', 'isBreitbart'], dtype='object')

In [3]:
df = df.drop(['author', 'date', 'year', 'month', 'url'], axis=1)

In [4]:
df['title'] = df['title'].str.lower()

In [5]:
df['publication'] = df['publication'].str.lower()

In [6]:
df['content'] = df['content'].str.lower()

In [7]:
df['content'] = df['content'].str.replace('[^a-zA-Z]', ' ')

In [10]:
stop_words = stopwords.words('english')

In [11]:
df['content'] = df['content'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))

In [13]:
df.groupby(['publication']).count()

Unnamed: 0_level_0,Unnamed: 0,id,title,content
publication,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
atlantic,7179,7179,7179,7179
breitbart,23781,23781,23781,23781
business insider,6757,6757,6757,6757
buzzfeed news,4854,4854,4854,4854
cnn,11488,11488,11488,11488
fox news,4354,4354,4354,4354
guardian,8681,8681,8681,8681
national review,6203,6203,6203,6203
new york post,17493,17493,17493,17493
new york times,7803,7803,7803,7803


In [14]:
len(df)

142570

In [15]:
df["isBreitbart"] = np.where(df['publication'] == 'breitbart', "1", "0")

In [16]:
words = []
for count in range (len(df)):
    if (type(df.iloc[count]['content']) != float):
        words.append(df.iloc[count]['content'].split())

In [20]:
model = gensim.models.Word2Vec(words, window=15, 
                                       size= 50, iter=10, 
                                       min_count=1, workers = 4)

2018-12-24 23:40:05,938 : INFO : collecting all words and their counts
2018-12-24 23:40:05,939 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2018-12-24 23:40:07,285 : INFO : PROGRESS: at sentence #10000, processed 5516956 words, keeping 97866 word types
2018-12-24 23:40:07,889 : INFO : PROGRESS: at sentence #20000, processed 8188013 words, keeping 117856 word types
2018-12-24 23:40:08,581 : INFO : PROGRESS: at sentence #30000, processed 10985634 words, keeping 134567 word types
2018-12-24 23:40:09,452 : INFO : PROGRESS: at sentence #40000, processed 14817409 words, keeping 151661 word types
2018-12-24 23:40:10,536 : INFO : PROGRESS: at sentence #50000, processed 17604723 words, keeping 163973 word types
2018-12-24 23:40:11,681 : INFO : PROGRESS: at sentence #60000, processed 21958109 words, keeping 187955 word types
2018-12-24 23:40:12,398 : INFO : PROGRESS: at sentence #70000, processed 26014229 words, keeping 203162 word types
2018-12-24 23:40:13,442 : IN

2018-12-24 23:41:14,812 : INFO : EPOCH 1 - PROGRESS: at 76.89% examples, 931417 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:41:15,819 : INFO : EPOCH 1 - PROGRESS: at 78.55% examples, 932578 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:41:16,825 : INFO : EPOCH 1 - PROGRESS: at 79.86% examples, 931791 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:41:17,827 : INFO : EPOCH 1 - PROGRESS: at 81.36% examples, 933706 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:41:18,835 : INFO : EPOCH 1 - PROGRESS: at 82.84% examples, 935714 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:41:19,839 : INFO : EPOCH 1 - PROGRESS: at 84.24% examples, 936752 words/s, in_qsize 8, out_qsize 0
2018-12-24 23:41:20,841 : INFO : EPOCH 1 - PROGRESS: at 86.75% examples, 934801 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:41:21,843 : INFO : EPOCH 1 - PROGRESS: at 89.78% examples, 935741 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:41:22,853 : INFO : EPOCH 1 - PROGRESS: at 92.53% examples, 935133 words/s, in_qsiz

2018-12-24 23:42:24,452 : INFO : EPOCH 2 - PROGRESS: at 96.26% examples, 938407 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:42:25,452 : INFO : EPOCH 2 - PROGRESS: at 97.73% examples, 937547 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:42:26,455 : INFO : EPOCH 2 - PROGRESS: at 99.21% examples, 938954 words/s, in_qsize 6, out_qsize 1
2018-12-24 23:42:26,910 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-12-24 23:42:26,920 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-12-24 23:42:26,926 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-12-24 23:42:26,927 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-12-24 23:42:26,928 : INFO : EPOCH - 2 : training on 57349221 raw words (56400912 effective words) took 60.0s, 940052 effective words/s
2018-12-24 23:42:27,941 : INFO : EPOCH 3 - PROGRESS: at 1.06% examples, 914967 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:42:28,962 : INFO : EPOCH 3 - PR

2018-12-24 23:43:29,560 : INFO : EPOCH 4 - PROGRESS: at 1.05% examples, 910282 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:43:30,566 : INFO : EPOCH 4 - PROGRESS: at 1.90% examples, 825602 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:43:31,580 : INFO : EPOCH 4 - PROGRESS: at 2.93% examples, 855512 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:43:32,583 : INFO : EPOCH 4 - PROGRESS: at 4.03% examples, 883005 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:43:33,589 : INFO : EPOCH 4 - PROGRESS: at 5.12% examples, 903578 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:43:34,603 : INFO : EPOCH 4 - PROGRESS: at 7.50% examples, 924636 words/s, in_qsize 8, out_qsize 0
2018-12-24 23:43:35,606 : INFO : EPOCH 4 - PROGRESS: at 9.77% examples, 912114 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:43:36,606 : INFO : EPOCH 4 - PROGRESS: at 12.66% examples, 933458 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:43:37,614 : INFO : EPOCH 4 - PROGRESS: at 15.49% examples, 948226 words/s, in_qsize 7, ou

2018-12-24 23:44:40,186 : INFO : EPOCH 5 - PROGRESS: at 3.29% examples, 963129 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:44:41,186 : INFO : EPOCH 5 - PROGRESS: at 4.38% examples, 963185 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:44:42,190 : INFO : EPOCH 5 - PROGRESS: at 5.45% examples, 966244 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:44:43,199 : INFO : EPOCH 5 - PROGRESS: at 8.39% examples, 984390 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:44:44,215 : INFO : EPOCH 5 - PROGRESS: at 11.27% examples, 990614 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:44:45,218 : INFO : EPOCH 5 - PROGRESS: at 13.84% examples, 990447 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:44:46,221 : INFO : EPOCH 5 - PROGRESS: at 16.47% examples, 991153 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:44:47,222 : INFO : EPOCH 5 - PROGRESS: at 18.78% examples, 986461 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:44:48,228 : INFO : EPOCH 5 - PROGRESS: at 21.36% examples, 989288 words/s, in_qsize 7,

2018-12-24 23:45:50,647 : INFO : EPOCH 6 - PROGRESS: at 22.36% examples, 866651 words/s, in_qsize 8, out_qsize 0
2018-12-24 23:45:51,659 : INFO : EPOCH 6 - PROGRESS: at 23.53% examples, 852183 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:45:52,671 : INFO : EPOCH 6 - PROGRESS: at 25.21% examples, 859506 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:45:53,672 : INFO : EPOCH 6 - PROGRESS: at 26.92% examples, 865002 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:45:54,676 : INFO : EPOCH 6 - PROGRESS: at 28.30% examples, 857439 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:45:55,684 : INFO : EPOCH 6 - PROGRESS: at 29.67% examples, 848296 words/s, in_qsize 8, out_qsize 1
2018-12-24 23:45:56,687 : INFO : EPOCH 6 - PROGRESS: at 32.27% examples, 855093 words/s, in_qsize 8, out_qsize 0
2018-12-24 23:45:57,709 : INFO : EPOCH 6 - PROGRESS: at 34.78% examples, 848687 words/s, in_qsize 8, out_qsize 0
2018-12-24 23:45:58,725 : INFO : EPOCH 6 - PROGRESS: at 35.88% examples, 842465 words/s, in_qsiz

2018-12-24 23:47:00,583 : INFO : EPOCH 7 - PROGRESS: at 27.00% examples, 815000 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:47:01,595 : INFO : EPOCH 7 - PROGRESS: at 28.74% examples, 820327 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:47:02,617 : INFO : EPOCH 7 - PROGRESS: at 30.80% examples, 826477 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:47:03,621 : INFO : EPOCH 7 - PROGRESS: at 33.86% examples, 831385 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:47:04,624 : INFO : EPOCH 7 - PROGRESS: at 35.85% examples, 840131 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:47:05,629 : INFO : EPOCH 7 - PROGRESS: at 37.24% examples, 845181 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:47:06,631 : INFO : EPOCH 7 - PROGRESS: at 38.93% examples, 852340 words/s, in_qsize 8, out_qsize 0
2018-12-24 23:47:07,635 : INFO : EPOCH 7 - PROGRESS: at 40.55% examples, 856320 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:47:08,647 : INFO : EPOCH 7 - PROGRESS: at 42.15% examples, 857477 words/s, in_qsiz

2018-12-24 23:48:10,451 : INFO : EPOCH 8 - PROGRESS: at 33.76% examples, 689838 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:48:11,465 : INFO : EPOCH 8 - PROGRESS: at 35.54% examples, 695771 words/s, in_qsize 8, out_qsize 0
2018-12-24 23:48:12,469 : INFO : EPOCH 8 - PROGRESS: at 36.45% examples, 692994 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:48:13,476 : INFO : EPOCH 8 - PROGRESS: at 37.41% examples, 691371 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:48:14,521 : INFO : EPOCH 8 - PROGRESS: at 38.36% examples, 686400 words/s, in_qsize 8, out_qsize 0
2018-12-24 23:48:15,570 : INFO : EPOCH 8 - PROGRESS: at 39.14% examples, 676985 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:48:16,579 : INFO : EPOCH 8 - PROGRESS: at 40.00% examples, 671564 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:48:17,628 : INFO : EPOCH 8 - PROGRESS: at 41.04% examples, 667563 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:48:18,630 : INFO : EPOCH 8 - PROGRESS: at 42.26% examples, 668056 words/s, in_qsiz

2018-12-24 23:49:21,097 : INFO : EPOCH 9 - PROGRESS: at 31.61% examples, 797871 words/s, in_qsize 6, out_qsize 1
2018-12-24 23:49:22,100 : INFO : EPOCH 9 - PROGRESS: at 34.61% examples, 801105 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:49:23,110 : INFO : EPOCH 9 - PROGRESS: at 36.00% examples, 804006 words/s, in_qsize 8, out_qsize 0
2018-12-24 23:49:24,118 : INFO : EPOCH 9 - PROGRESS: at 36.76% examples, 791230 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:49:25,119 : INFO : EPOCH 9 - PROGRESS: at 37.78% examples, 787808 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:49:26,126 : INFO : EPOCH 9 - PROGRESS: at 39.51% examples, 795153 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:49:27,127 : INFO : EPOCH 9 - PROGRESS: at 40.50% examples, 787228 words/s, in_qsize 8, out_qsize 0
2018-12-24 23:49:28,152 : INFO : EPOCH 9 - PROGRESS: at 41.14% examples, 770745 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:49:29,171 : INFO : EPOCH 9 - PROGRESS: at 42.57% examples, 771896 words/s, in_qsiz

2018-12-24 23:50:30,235 : INFO : EPOCH 10 - PROGRESS: at 27.59% examples, 780850 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:50:31,251 : INFO : EPOCH 10 - PROGRESS: at 29.44% examples, 789650 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:50:32,262 : INFO : EPOCH 10 - PROGRESS: at 31.73% examples, 797611 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:50:33,270 : INFO : EPOCH 10 - PROGRESS: at 34.32% examples, 793919 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:50:34,303 : INFO : EPOCH 10 - PROGRESS: at 35.61% examples, 788809 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:50:35,309 : INFO : EPOCH 10 - PROGRESS: at 36.68% examples, 786089 words/s, in_qsize 8, out_qsize 0
2018-12-24 23:50:36,313 : INFO : EPOCH 10 - PROGRESS: at 37.87% examples, 786745 words/s, in_qsize 7, out_qsize 1
2018-12-24 23:50:37,321 : INFO : EPOCH 10 - PROGRESS: at 39.39% examples, 789550 words/s, in_qsize 7, out_qsize 0
2018-12-24 23:50:38,329 : INFO : EPOCH 10 - PROGRESS: at 41.03% examples, 795124 words/s

In [32]:
model.wv.similar_by_word("istanbul")

  if np.issubdtype(vec.dtype, np.int):


[('reina', 0.7801131010055542),
 ('bosporus', 0.7633792161941528),
 ('ortakoy', 0.7598312497138977),
 ('ankara', 0.7545477151870728),
 ('ataturk', 0.7528706192970276),
 ('hurriyet', 0.7358903884887695),
 ('atat', 0.7162894606590271),
 ('arik', 0.715834379196167),
 ('besiktas', 0.7115811109542847),
 ('ahmet', 0.711054801940918)]

In [50]:
model.wv.similar_by_word("messi")

  if np.issubdtype(vec.dtype, np.int):


[('ronaldo', 0.9000247716903687),
 ('neymar', 0.8803908824920654),
 ('higuain', 0.8501801490783691),
 ('barcelona', 0.8403756618499756),
 ('piqu', 0.8349080085754395),
 ('higua', 0.8344173431396484),
 ('griezmann', 0.8323855996131897),
 ('carrasco', 0.8292516469955444),
 ('atletico', 0.8267635107040405),
 ('iniesta', 0.8264540433883667)]

In [47]:
model.wv.similar_by_word("ataturk")

  if np.issubdtype(vec.dtype, np.int):


[('atat', 0.8326884508132935),
 ('kemal', 0.807235598564148),
 ('istanbul', 0.7528706192970276),
 ('havalimani', 0.7232919335365295),
 ('turkishminutetm', 0.6904064416885376),
 ('ahmet', 0.6871216297149658),
 ('hurriyet', 0.6819521188735962),
 ('yildiz', 0.6789950132369995),
 ('alpay', 0.6765890717506409),
 ('mustafa', 0.6731521487236023)]

In [28]:
model.wv.most_similar(positive=['paris', 'turkey'], negative=['france'])

  if np.issubdtype(vec.dtype, np.int):


[('ankara', 0.7198780179023743),
 ('turkish', 0.6650469303131104),
 ('syria', 0.639181911945343),
 ('damascus', 0.6344844102859497),
 ('istanbul', 0.6245085597038269),
 ('akinci', 0.618219256401062),
 ('erdogan', 0.6135762333869934),
 ('uae', 0.604555606842041),
 ('antakya', 0.6014748811721802),
 ('tehran', 0.5996435284614563)]

In [31]:
model.wv.most_similar(positive=['trump', 'turkey'], negative=['america'])

  if np.issubdtype(vec.dtype, np.int):


[('turkish', 0.6588573455810547),
 ('erdogan', 0.6462050676345825),
 ('yildirim', 0.6361896991729736),
 ('marashipov', 0.6323192119598389),
 ('erdo', 0.6155811548233032),
 ('davutoglu', 0.6034064292907715),
 ('ankara', 0.5946260690689087),
 ('davuto', 0.5712416172027588),
 ('pilz', 0.5670062303543091),
 ('plotters', 0.5552436113357544)]

In [52]:
model.wv.most_similar(positive=['messi', 'madrid'], negative=['barcelona'])

  if np.issubdtype(vec.dtype, np.int):


[('ronaldo', 0.8275047540664673),
 ('neymar', 0.8230786323547363),
 ('higuain', 0.8138059377670288),
 ('atletico', 0.8111591339111328),
 ('cristiano', 0.8060434460639954),
 ('higua', 0.7991237044334412),
 ('isco', 0.7953017354011536),
 ('benzema', 0.7921320199966431),
 ('carrasco', 0.790255606174469),
 ('juve', 0.7867611050605774)]

In [63]:
model.wv.most_similar(positive=['louvre', 'italy'], negative=['france'])

  if np.issubdtype(vec.dtype, np.int):


[('uffizi', 0.689817488193512),
 ('branczik', 0.6873118877410889),
 ('rueger', 0.6867389678955078),
 ('pompeii', 0.6823211312294006),
 ('porto', 0.677849292755127),
 ('seine', 0.6729899048805237),
 ('gallery', 0.6708073019981384),
 ('nuovo', 0.6648528575897217),
 ('frescoes', 0.6636790037155151),
 ('duomo', 0.6594634056091309)]

In [68]:
model.save(fname_or_handle="model/" + "model_for_news")

2018-12-25 00:15:03,625 : INFO : saving Word2Vec object under model/model_for_news, separately None
2018-12-25 00:15:03,626 : INFO : storing np array 'vectors' to model/model_for_news.wv.vectors.npy
2018-12-25 00:15:03,850 : INFO : not storing attribute vectors_norm
2018-12-25 00:15:03,851 : INFO : storing np array 'syn1neg' to model/model_for_news.trainables.syn1neg.npy
2018-12-25 00:15:04,543 : INFO : saved model/model_for_news


In [122]:
def calculate_avg_vecs(column):
    words = getattr(column, 'content').split()
    vecs = []
    for word in words:
        vecs.append(model[word])
    if len(vecs) == 0:
        return np.zeros(50)
    vec_sum = np.sum(vecs, axis = 0)
    vec_avg = np.divide(vec_sum, len(vecs))
    return vec_avg

In [72]:
def calculate_text_length_by_words(text):
    return len(text.split())

In [73]:
def calculate_text_length_by_chars(text):
    return len(text)

In [75]:
def generate_publisher_dictionary(df):
    publication_array = df['publication'].unique()
    publication_index = np.arange(len(publication_array))
    return dict(zip(publication_array, publication_index))

In [78]:
vec_col_names = []
for x in range(50):
    vec_col_names.append('v' + str(x))

In [123]:
def generate_columns_for_model(df):
    feature_columns = pd.DataFrame()
    for row in df.itertuples():
        if(type(getattr(row, 'content')) == str):
            vecs = calculate_avg_vecs(row)
            vec_df = pd.DataFrame([vecs.tolist()], columns=vec_col_names)
            column = [getattr(row, 'content')]
            col_df = pd.DataFrame([column], columns=['content'])
            col_df['number_of_words'] = calculate_text_length_by_words(getattr(row, 'content'))
            col_df['len_of_text'] = calculate_text_length_by_chars(getattr(row, 'content'))
            col_df['publication'] = publisher_dict.get(getattr(row, 'publication'))
            feature_df = pd.concat([col_df, vec_df], axis=1)
            feature_columns = feature_columns.append(feature_df)
    return feature_columns

In [124]:
publisher_dict = generate_publisher_dictionary(df)
feature_columns = generate_columns_for_model(df)

  """


In [126]:
feature_columns.columns

Index(['content', 'number_of_words', 'len_of_text', 'publication', 'v0', 'v1',
       'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9', 'v10', 'v11', 'v12',
       'v13', 'v14', 'v15', 'v16', 'v17', 'v18', 'v19', 'v20', 'v21', 'v22',
       'v23', 'v24', 'v25', 'v26', 'v27', 'v28', 'v29', 'v30', 'v31', 'v32',
       'v33', 'v34', 'v35', 'v36', 'v37', 'v38', 'v39', 'v40', 'v41', 'v42',
       'v43', 'v44', 'v45', 'v46', 'v47', 'v48', 'v49'],
      dtype='object')

In [None]:
training = feature_columns.groupby('is').apply(lambda x : x.sample(frac = 0.7))
test = pd.concat([feature_columns,training]).drop_duplicates(subset='text',keep=False)       

In [118]:
 # modeling
        h2o.init(max_mem_size="90G")
        h2o.connect()

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

content            washington congressional republicans new fear ...
number_of_words                                                  502
len_of_text                                                     4002
publication                                                        0
v0                                                           0.87953
v1                                                           1.22148
v2                                                           2.35176
v3                                                          0.326413
v4                                                          -1.52299
v5                                                         -0.858053
v6                                                         -0.584676
v7                                                          -1.97759
v8                                                          -2.54625
v9                                                            0.1973
v10                               