In [1]:
#import Reddit posts as csv
import pandas as pd

all_posts = pd.read_csv('all_posts_reddit_onionandnotonion_2.csv', index_col=0, sep='|')
all_posts.head()

Unnamed: 0,title,score,id,subreddit,url,num_comments,body,created,Onion?
0,Louisiana eye doctor offers free eye exams for...,114318,aic7pm,nottheonion,https://www.wwltv.com/article/sports/nfl/saint...,2102,,1548120000.0,0
1,Man rescued from Taliban didn't believe Donald...,103947,76rjtv,nottheonion,http://www.newsweek.com/man-rescued-taliban-di...,5288,,1508199000.0,0
2,"Nat Geo hires Jeff Goldblum to walk around, be...",100839,923ww4,nottheonion,https://news.avclub.com/nat-geo-hires-jeff-gol...,1537,,1532652000.0,0
3,Black security guard who stops shooter is then...,100022,9wl2d7,nottheonion,https://thehill.com/homenews/news/416255-black...,2399,,1542106000.0,0
4,Hunter dies after shot elephant falls on him,95389,6cgr3h,nottheonion,http://www.news24.com/SouthAfrica/News/hunter-...,1712,,1495405000.0,0


In [2]:
all_posts_news = pd.read_csv('all_posts_reddit_onionandnews_csv', index_col=0, sep='|')
all_posts_news.head()

Unnamed: 0,title,score,id,subreddit,url,num_comments,body,created,Onion?
0,"'No Way To Prevent This,’ Says Only Nation Whe...",34996,7b0y34,TheOnion,https://www.theonion.com/no-way-to-prevent-thi...,3043,,1509951000.0,1
1,Trump Warns Removing Confederate Statues Could...,27425,7to2ak,TheOnion,https://politics.theonion.com/trump-warns-remo...,1882,,1517211000.0,1
2,"‘No Way To Prevent This,’ Says Only Nation Whe...",21138,7xl7h3,TheOnion,https://www.theonion.com/no-way-to-prevent-thi...,1200,,1518670000.0,1
3,Roy Moore Retires From Politics To Spend More ...,18073,7jgh6i,TheOnion,https://politics.theonion.com/roy-moore-retire...,126,,1513165000.0,1
4,"Mike Pence Disappointed In The 200,000 Husband...",16752,5pbaag,TheOnion,http://www.theonion.com/article/mike-pence-dis...,332,,1485044000.0,1


In [3]:
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()

In [4]:
def spacy_ner(title):
    """function to apply English library to title"""
    spacy_title = nlp(title)
    return spacy_title

In [5]:
import re

def replace(string, substitutions):
    """Function that uses regex to substitute something in a string"""
    substrings = sorted(substitutions, key=len, reverse=True)
    regex = re.compile('|'.join(map(re.escape, substrings)))
    return regex.sub(lambda match: substitutions[match.group(0)], string)

In [6]:
def get_entities(title):
    """Make lists of entities and entity types and return them"""
    spacy_title = spacy_ner(title)
    spacy_title_ents = [str(X) for X in spacy_title.ents]
    spacy_title_ents_types = [X.label_ for X in spacy_title.ents]
    return spacy_title_ents, spacy_title_ents_types

In [7]:
def remove_entities(title):
    """Substitutes entities with empty strings and return title with no entities"""
    entities, ent_types = get_entities(title)
    if entities == []:
        return title
    else:
        substitutions = {}
        for X in entities:
            substitutions[X] = ''
        output = replace(title, substitutions)
        return output

In [8]:
import re
import nltk

#Lemmatization
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
nltk.download('wordnet')
nltk.download('stopwords')

lemmatizer=WordNetLemmatizer()

stop_words = set(stopwords.words('english'))

import string

def preprocessing(title):
    """Function to preprocess title"""
    title_noentities = remove_entities(title) #remove entities
    title_lower = title_noentities.lower() #make lowercase
    title_lower_nonumbers = re.sub(r'\d+','', str(title_lower)) #remove numbers
    no_punctuation = re.sub(r'[^\w\s]','', title_lower_nonumbers) #remove punctuation
    tokenized_title = word_tokenize(no_punctuation) #tokenize title
    new_title = []
    for word in tokenized_title:
        new_word = lemmatizer.lemmatize(word) #lemmatize each word
        new_title.append(new_word) #make title into list of words
    final_title = [i for i in new_title if not i in stop_words] #get rid of stopwords
    return final_title

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/melaniemalinas/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/melaniemalinas/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [9]:
def preprocessing_entities(title):
    """Preprocess title and append entities and entity types"""
    title_entities, title_ents_types = get_entities(title)
    title_noentities = preprocessing(title)
    title_all = title_noentities + title_entities + title_ents_types
    return title_all

In [10]:
#apply preprocessing function to all titles
all_titles_ents = all_posts['title'].apply(lambda x: preprocessing_entities(x)) 

In [11]:
#make dataframe with only title and whether or not it is the Onion
final_posts_df = pd.DataFrame({'title':all_titles_ents, 'Onion?':all_posts['Onion?']})

In [12]:
#making X and y arrays
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer

def dummy_fun(doc):
    """just a dummy function for the CountVectorizer"""
    return doc

def make_xy(df, vectorizer=None):
    """make titles and Onion status into X and y arrays for machine learning"""    
    if vectorizer is None:
        vectorizer = CountVectorizer(tokenizer=dummy_fun,
        preprocessor=dummy_fun)
    X = vectorizer.fit_transform(df['title'])
    X = X.tocsc()  # some versions of sklearn return COO format
    y = df['Onion?'].values.astype(np.int)
    return X, y

In [13]:
from sklearn.model_selection import KFold
def cv_score(clf, X, y, scorefunc):
    """get CV score from a classifier input"""
    result = 0.
    nfold = 5
    for train, test in KFold(nfold, random_state=42).split(X): # split data into train/test groups, 5 times
        clf.fit(X[train], y[train]) # fit the classifier, passed is as clf.
        result += scorefunc(clf, X[test], y[test]) # evaluate score function on held-out data
    return result / nfold # average

In [14]:
from sklearn.metrics import f1_score, make_scorer
#make a scorer from the F1 score
f1_scorer = make_scorer(f1_score)

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

#the grid of parameters to search over
alphas = [.1, 1, 5, 10, 50]
best_min_df = 5

#Find the best value for alpha and min_df, and the best classifier
best_alpha = None
maxscore=-np.inf
for alpha in alphas:   
    vectorizer = CountVectorizer(min_df=best_min_df, tokenizer=dummy_fun, \
    preprocessor=dummy_fun)
    Xthis, ythis = make_xy(final_posts_df, vectorizer)
    X_train_this, X_test_this, y_train_this, y_test_this = \
    train_test_split(Xthis, ythis, test_size=0.2, random_state=42)
    clf = MultinomialNB(alpha=alpha).fit(X_train_this, y_train_this)
    score = cv_score(clf,X_train_this,y_train_this, f1_scorer)
    if score > maxscore:
        maxscore = score
        best_alpha = alpha

In [16]:
print("alpha: {}".format(best_alpha))

alpha: 1


In [17]:
vectorizer = CountVectorizer(min_df = best_min_df, tokenizer=dummy_fun, preprocessor=dummy_fun)
X_2, y_2 = make_xy(final_posts_df, vectorizer)
X_train_2, X_test_2, y_train_2, y_test_2 = \
    train_test_split(X_2, y_2, test_size=0.2, random_state=42)                                                   
clf = MultinomialNB(alpha=best_alpha).fit(X_train_2, y_train_2)
#Print the accuracy on the test and training dataset
training_accuracy = clf.score(X_train_2, y_train_2)
test_accuracy = clf.score(X_test_2, y_test_2)

print("Accuracy on training data: {:2f}".format(training_accuracy))
print("Accuracy on test data:     {:2f}".format(test_accuracy))

Accuracy on training data: 0.845223
Accuracy on test data:     0.745547


In [18]:
from sklearn import metrics

pred = clf.predict(X_test_2)
classification_report = metrics.classification_report(y_test_2,pred)
print(classification_report)

              precision    recall  f1-score   support

           0       0.72      0.81      0.76       200
           1       0.78      0.68      0.72       193

    accuracy                           0.75       393
   macro avg       0.75      0.74      0.74       393
weighted avg       0.75      0.75      0.74       393



In [19]:
words = np.array(vectorizer.get_feature_names()) #get list of words

#make identity matrix so that each row has only one word
x = np.eye(X_test_2.shape[1])
probs = clf.predict_log_proba(x)[:, 0]
ind = np.argsort(probs)

good_words = words[ind[:10]]
bad_words = words[ind[-10:]]

good_prob = probs[ind[:10]]
bad_prob = probs[ind[-10:]]

print("Onion words\t     P(Onion | word)")
for w, p in zip(good_words, good_prob):
    print("{:>20}".format(w), "{:.2f}".format(1 - np.exp(p)))
    
print("Not Onion words\t     P(Not Onion | word)")
for w, p in zip(bad_words, bad_prob):
    print("{:>20}".format(w), "{:.2f}".format(1 - np.exp(p)))

Onion words	     P(Onion | word)
               trump 0.96
           regularly 0.95
             happens 0.94
           announced 0.93
           announces 0.93
                shit 0.93
              nation 0.92
            evidence 0.92
            anything 0.92
            everyone 0.92
Not Onion words	     P(Not Onion | word)
                help 0.12
             officer 0.11
                 LOC 0.11
             protest 0.10
             accused 0.09
               China 0.08
             ORDINAL 0.06
               Texas 0.06
            arrested 0.06
              police 0.05


In [20]:
x, y = make_xy(final_posts_df, vectorizer)

prob = clf.predict_proba(x)[:, 0] #probability of being not the onion
predict = clf.predict(x)

y = np.asarray(y)
misclassified = np.where(y != predict) #identify where values don't match prediction

series_misclassified = pd.Series(prob[misclassified], index=list(misclassified))

#sort series

series_misclassified_sorted = series_misclassified.sort_values() #get sorted values of misclassified

indices_misclassified = list(series_misclassified_sorted.index.values) #get indices of misclassified and sort

#get first and last indices
lowest_prob = indices_misclassified[0:5]
highest_prob = indices_misclassified[-5:]

#make lists of lowest probabilities and highest probabilities
lowest_prob_list = [item for t in lowest_prob for item in t] 
highest_prob_list = [item for t in highest_prob for item in t]

###
print("Actually not the Onion but mis-classified as the Onion")
print('---------------------------')
for row in lowest_prob_list:
    print(all_posts.title.iloc[row]) #index into those rows in all_posts
    print("")

print("Actually the Onion but mis-classified as not the Onion")
print('--------------------------')
for row in highest_prob_list:
    print(all_posts.title.iloc[row]) #index into those rows in all_posts
    print("")

Actually not the Onion but mis-classified as the Onion
---------------------------
‘Live pee or die’: N.H. governor steps in to let woman keep her ‘PB4WEGO’ license plates

White supremacists taking DNA tests sad to discover they’re not 100% white

DeVos backlash Sees Parents Threatening to Homeschool Kids

Bush says Trump ‘makes me look pretty good’ by comparison: report

Rapper Soulja Boy Releases New Handheld Game Console and It Looks Terrible

Actually the Onion but mis-classified as not the Onion
--------------------------
Blog: If You’re Not A Police Officer, You Can’t Understand The Pressure You Feel In The Split Second When You Have To Decide Whether Or Not To Shoot An Unarmed Civilian 8 Times

Police Repeatedly Shoot Tim Cook After Mistaking iPhone For Gun

Dallas Cops Plant Black Suspect At Murder Scene

Trump Confident U.S. Military Strike On Syria Wiped Out Russian Scandal

Heartbroken Russian Ambassador Thought Special Meetings With Jeff Sessions Were Very Memorable



Going to try CountVectorizer with bi-grams by editing ngram_range

In [21]:
def make_xy_bigrams(df, vectorizer=None):
    """Same as make_xy but with ngram_range"""    
    if vectorizer is None:
        vectorizer = CountVectorizer(tokenizer=dummy_fun,
        preprocessor=dummy_fun, ngram_range=(1,2))
    X = vectorizer.fit_transform(df['title'])
    X = X.tocsc()  # some versions of sklearn return COO format
    y = df['Onion?'].values.astype(np.int)
    return X, y

In [22]:
#the grid of parameters to search over
alphas_2 = [.1, 1, 5, 10, 50]
best_min_df_2 = 5

#Find the best value for alpha and the best classifier
best_alpha_2 = None
maxscore_2=-np.inf

for alpha in alphas_2:   
    vectorizer = CountVectorizer(min_df=best_min_df_2, tokenizer=dummy_fun, \
    preprocessor=dummy_fun, ngram_range=(1,2))
    Xthis, ythis = make_xy_bigrams(final_posts_df, vectorizer)
    X_train_this, X_test_this, y_train_this, y_test_this = \
    train_test_split(Xthis, ythis, test_size=0.2, random_state=42)
    clf = MultinomialNB(alpha=alpha).fit(X_train_this, y_train_this)
    score = cv_score(clf,X_train_this,y_train_this, f1_scorer)
    if score > maxscore_2:
        maxscore_2 = score
        best_alpha_2 = alpha

In [23]:
print("alpha: {}".format(best_alpha_2))

alpha: 1


In [24]:
vectorizer_3 = CountVectorizer(min_df = best_min_df_2, tokenizer=dummy_fun, preprocessor=dummy_fun,
                              ngram_range=(1,2))
X_3, y_3 = make_xy_bigrams(final_posts_df, vectorizer_3)
X_train_3, X_test_3, y_train_3, y_test_3= train_test_split(X_3, y_3, test_size=0.2, random_state=42)                                            
clf_3 = MultinomialNB(alpha=best_alpha_2).fit(X_train_3, y_train_3)
#Print the accuracy on the test and training dataset
training_accuracy_3 = clf_3.score(X_train_3, y_train_3)
test_accuracy_3 = clf_3.score(X_test_3, y_test_3)

print("Accuracy on training data: {:2f}".format(training_accuracy_3))
print("Accuracy on test data:     {:2f}".format(test_accuracy_3))

Accuracy on training data: 0.847134
Accuracy on test data:     0.750636


In [25]:
words_3 = np.array(vectorizer_3.get_feature_names())

#make identity matrix so that each row has only one word
x_3 = np.eye(X_test_3.shape[1])
probs_3 = clf_3.predict_log_proba(x_3)[:, 0]
ind = np.argsort(probs_3)

good_words_3 = words_3[ind[:10]]
bad_words_3 = words_3[ind[-10:]]

good_prob_3 = probs_3[ind[:10]]
bad_prob_3 = probs_3[ind[-10:]]

print("Onion words\t     P(Onion | word or phrase)")
for w, p in zip(good_words_3, good_prob_3):
    print("{:>20}".format(w), "{:.2f}".format(1 - np.exp(p)))
    
print("Not Onion words\t     P(Not Onion | word or phrase)")
for w, p in zip(bad_words_3, bad_prob_3):
    print("{:>20}".format(w), "{:.2f}".format(1 - np.exp(p)))

Onion words	     P(Onion | word or phrase)
               trump 0.96
           regularly 0.95
             happens 0.94
   regularly happens 0.94
    nation regularly 0.94
         way prevent 0.94
          say nation 0.94
         prevent say 0.94
           announces 0.93
                shit 0.93
Not Onion words	     P(Not Onion | word or phrase)
                 LOC 0.11
             officer 0.11
             protest 0.10
           Texas GPE 0.10
             accused 0.09
               China 0.08
             ORDINAL 0.06
               Texas 0.06
            arrested 0.06
              police 0.05


In [26]:
#add code to get classification report and mis-classified sentences
pred_3 = clf_3.predict(X_test_3)
classification_report_3 = metrics.classification_report(y_test_3,pred_3)
print(classification_report_3)

              precision    recall  f1-score   support

           0       0.73      0.81      0.77       200
           1       0.78      0.68      0.73       193

    accuracy                           0.75       393
   macro avg       0.75      0.75      0.75       393
weighted avg       0.75      0.75      0.75       393



In [27]:
x_3, y_3 = make_xy_bigrams(final_posts_df, vectorizer_3)

prob_3 = clf_3.predict_proba(x_3)[:, 0] #probability of being not the onion
predict_3 = clf_3.predict(x_3)

y_3 = np.asarray(y_3)
misclassified_3 = np.where(y_3 != predict_3) #identify where values don't match prediction

series_misclassified_3 = pd.Series(prob_3[misclassified_3], index=list(misclassified_3))

#sort series

series_misclassified_sorted_3 = series_misclassified_3.sort_values() #get sorted values of misclassified

indices_misclassified_3 = list(series_misclassified_sorted_3.index.values) #get indices of misclassified and sort

lowest_prob_3 = indices_misclassified_3[0:5]
highest_prob_3 = indices_misclassified_3[-5:]

lowest_prob_list_3 = [item for t in lowest_prob_3 for item in t] 
highest_prob_list_3 = [item for t in highest_prob_3 for item in t]

###
print("Actually not the Onion but mis-classified as the Onion")
print('---------------------------')
for row in lowest_prob_list_3:
    print(all_posts.title.iloc[row])
    print("")

print("Actually the Onion but mis-classified as not the Onion")
print('--------------------------')
for row in highest_prob_list_3:
    print(all_posts.title.iloc[row])
    print("")

Actually not the Onion but mis-classified as the Onion
---------------------------
‘Live pee or die’: N.H. governor steps in to let woman keep her ‘PB4WEGO’ license plates

White supremacists taking DNA tests sad to discover they’re not 100% white

Losers are more likely to believe in conspiracy theories, study finds

DeVos backlash Sees Parents Threatening to Homeschool Kids

Tobacco smokers could gain 86 million years of life if they switch to vaping, study finds

Actually the Onion but mis-classified as not the Onion
--------------------------
Dallas Cops Plant Black Suspect At Murder Scene

‘C’mon, C’mon,’ Says Matt Damon Desperately Searching For Own Name On List Of IMDB User Dolphinsoul60’s Top 100 Actors

Blog: If You’re Not A Police Officer, You Can’t Understand The Pressure You Feel In The Split Second When You Have To Decide Whether Or Not To Shoot An Unarmed Civilian 8 Times

Heartbroken Russian Ambassador Thought Special Meetings With Jeff Sessions Were Very Memorable

Trum

It looks like bigrams improve the model slightly, so I will use bigrams. I then did analysis on posts from the Onion compared to real news posts from r/news. I will do this using bigrams.

In [28]:
all_titles_ents_news = all_posts_news['title'].apply(lambda x: preprocessing_entities(x))
final_posts_df_news = pd.DataFrame({'title':all_titles_ents_news, 'Onion?':all_posts_news['Onion?']})

In [29]:
#the grid of parameters to search over
alphas_3 = [.1, 1, 5, 10, 50]
best_min_df_3 = 5

#Find the best value for alpha and the best classifier
best_alpha_3 = None
maxscore_3=-np.inf

for alpha in alphas_3:   
    vectorizer = CountVectorizer(min_df=best_min_df_3, tokenizer=dummy_fun, \
    preprocessor=dummy_fun, ngram_range=(1,2))
    Xthis, ythis = make_xy_bigrams(final_posts_df_news, vectorizer)
    X_train_this, X_test_this, y_train_this, y_test_this = \
    train_test_split(Xthis, ythis, test_size=0.2, random_state=42)
    clf = MultinomialNB(alpha=alpha).fit(X_train_this, y_train_this)
    score = cv_score(clf,X_train_this,y_train_this, f1_scorer)
    if score > maxscore_3:
        maxscore_3 = score
        best_alpha_3 = alpha

In [30]:
print("alpha: {}".format(best_alpha_3))

alpha: 0.1


In [31]:
vectorizer_4 = CountVectorizer(min_df = best_min_df_3, tokenizer=dummy_fun, preprocessor=dummy_fun,
                              ngram_range=(1,2))
X_4, y_4 = make_xy_bigrams(final_posts_df_news, vectorizer_4)
X_train_4, X_test_4, y_train_4, y_test_4= train_test_split(X_4, y_4, test_size=0.2, random_state=42)                                            
clf_4 = MultinomialNB(alpha=best_alpha_3).fit(X_train_4, y_train_4)
#Print the accuracy on the test and training dataset
training_accuracy_4 = clf_4.score(X_train_4, y_train_4)
test_accuracy_4 = clf_4.score(X_test_4, y_test_4)

print("Accuracy on training data: {:2f}".format(training_accuracy_4))
print("Accuracy on test data:     {:2f}".format(test_accuracy_4))

Accuracy on training data: 0.877193
Accuracy on test data:     0.807500


In [32]:
words_4 = np.array(vectorizer_4.get_feature_names())

x_4 = np.eye(X_test_4.shape[1])
probs_4 = clf_4.predict_log_proba(x_4)[:, 0]
ind = np.argsort(probs_4)

good_words_4 = words_4[ind[:10]]
bad_words_4 = words_4[ind[-10:]]

good_prob_4 = probs_4[ind[:10]]
bad_prob_4 = probs_4[ind[-10:]]

print("Onion words\t     P(Onion | word or phrase)")
for w, p in zip(good_words_4, good_prob_4):
    print("{:>20}".format(w), "{:.2f}".format(1 - np.exp(p)))
    
print("News words\t     P(News | word or phrase)")
for w, p in zip(bad_words_4, bad_prob_4):
    print("{:>20}".format(w), "{:.2f}".format(1 - np.exp(p)))

Onion words	     P(Onion | word or phrase)
                like 1.00
             prevent 1.00
          study find 0.99
             happens 0.99
           regularly 0.99
         prevent say 0.99
             fucking 0.99
          say nation 0.99
         way prevent 0.99
    nation regularly 0.99
News words	     P(News | word or phrase)
           GPE MONEY 0.01
                 sue 0.01
               fired 0.01
          California 0.01
        CARDINAL GPE 0.01
        GPE CARDINAL 0.01
              charge 0.01
               judge 0.01
            arrested 0.01
             ORDINAL 0.00


In [33]:
#add code to get classification report and mis-classified sentences
pred_4 = clf_4.predict(X_test_4)
classification_report_4 = metrics.classification_report(y_test_4,pred_4)
print(classification_report_4)

              precision    recall  f1-score   support

           0       0.79      0.85      0.82       206
           1       0.83      0.76      0.79       194

    accuracy                           0.81       400
   macro avg       0.81      0.81      0.81       400
weighted avg       0.81      0.81      0.81       400



In [34]:
x_4, y_4 = make_xy_bigrams(final_posts_df_news, vectorizer_4)

prob_4 = clf_4.predict_proba(x_4)[:, 0] #probability of being news
predict_4 = clf_4.predict(x_4)

y_4 = np.asarray(y_4)
misclassified_4 = np.where(y_4 != predict_4)

series_misclassified_4 = pd.Series(prob_4[misclassified_4], index=list(misclassified_4))

#sort series

series_misclassified_sorted_4 = series_misclassified_4.sort_values() #sort from smallest to largest

indices_misclassified_4 = list(series_misclassified_sorted_4.index.values) #get indices of misclassified

lowest_prob_4 = indices_misclassified_4[0:5] #get lowest probabilites
highest_prob_4 = indices_misclassified_4[-5:] #get highest probabilities

lowest_prob_list_4 = [item for t in lowest_prob_4 for item in t]  #get 
highest_prob_list_4 = [item for t in highest_prob_4 for item in t]

###
print("Actually news but mis-classified as the Onion")
print('---------------------------')
for row in lowest_prob_list_4:
    print(all_posts_news.title.iloc[row])
    print("")

print("Actually the Onion but mis-classified as news")
print('--------------------------')
for row in highest_prob_list_4:
    print(all_posts_news.title.iloc[row])
    print("")

Actually news but mis-classified as the Onion
---------------------------
The Italian government has approved a law ordering parents to vaccinate children or face fines. The authorities have noted a rise in measles cases, which the cabinet blames on "the spread of anti-scientific theories."

Decade in the Red: Trump Tax Figures Show Over $1 Billion in Business Losses

George Clooney Calls for Online Release of 'The Interview "That's the most important part. We cannot be told we can't see something by Kim Jong Un, of all f***ing people."

Facebook "allowed Microsoft's Bing search engine to see the names of virtually all Facebook users' friends without consent, the records show, and gave Netflix and Spotify the ability to read Facebook users' private messages."

Millennials earn 20% less than Boomers did at same stage of life

Actually the Onion but mis-classified as news
--------------------------
Man Who Crossed US In Balloon Only Talks About Horse Abuse

PR Firm Advises U.S. To Cut Ti