# Sentiment Analysis on Reddit by Political Subreddits

In [1]:
import pandas as pd
import numpy as np
import json as j

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfTransformer

In [2]:
reddit_data = pd.read_json('09_28_2019.json')

In [3]:
subreddits = np.unique(np.array(reddit_data['subreddit'].tolist()))

In [4]:
reddit_data.shape

(24622, 62)

In [5]:
reddit_data.columns

Index(['all_awardings', 'approved_at_utc', 'approved_by', 'archived', 'author',
       'author_cakeday', 'author_flair_background_color',
       'author_flair_css_class', 'author_flair_richtext',
       'author_flair_template_id', 'author_flair_text',
       'author_flair_text_color', 'author_flair_type', 'author_fullname',
       'author_patreon_flair', 'awarders', 'banned_at_utc', 'banned_by',
       'body', 'body_html', 'can_gild', 'can_mod_post', 'collapsed',
       'collapsed_reason', 'controversiality', 'created', 'created_utc',
       'depth', 'distinguished', 'downs', 'edited', 'gilded', 'gildings', 'id',
       'is_submitter', 'likes', 'link_id', 'locked', 'mod_note',
       'mod_reason_by', 'mod_reason_title', 'mod_reports', 'name', 'no_follow',
       'num_reports', 'parent_id', 'permalink', 'removal_reason',
       'report_reasons', 'saved', 'score', 'score_hidden', 'send_replies',
       'steward_reports', 'stickied', 'subreddit', 'subreddit_id',
       'subreddit_name_pre

In [6]:
kept_columns = ['subreddit','author','id','parent_id','body', 'score','ups','downs','saved','all_awardings','controversiality']

In [7]:
for c in reddit_data.columns:
    if c not in kept_columns:
        reddit_data = reddit_data.drop(columns=c, axis=1)

In [8]:
reddit_data.sample(10)

Unnamed: 0,all_awardings,author,body,controversiality,downs,id,parent_id,saved,score,subreddit,ups
7018,[],SgtDonowitz,You’re telling me Monty Python was a revisioni...,0,0,f1hdld6,t1_f1h3gh5,False,42,history,42
16388,[],paintbucketholder,Obama was talking about how Citizens United al...,0,0,f1q3d9i,t1_f1plkvp,False,101,politics,101
14758,[],RoutineProcedure,1) The president does not pilot the drones.\n\...,0,0,f1r8sjc,t1_f1r519b,False,1,politics,1
5837,[],isntAnything,They should totally respect your choice. Some ...,0,0,f1phw5v,t1_f1pguj7,False,2,environment,2
12211,[],MikeOfAllPeople,"This is correct. For anyone wondering, these e...",0,0,f1oid2f,t1_f1ohyh6,False,109,news,109
2637,[],eskjcSFW,The ironic part would be when the people that ...,0,0,f1pxtea,t1_f1px3uo,False,13,Economics,13
5219,[],zdss,That's not how that works.,0,0,f1ngneg,t1_f1n8e7y,False,3,ElizabethWarren,3
7786,[],Regretful_Attorney,Better than having them on the streets harassi...,0,0,f1na83u,t3_da3avw,False,-7,law,-7
20325,[],ps2fats,world event? lol,0,0,f0vd0bx,t3_d6ck39,False,-1,worldevents,-1
19697,[],the8track,"She’s supporting an inquiry, not necessarily i...",0,0,f1ng391,t3_da38tm,False,46,tulsi,46


In [9]:
reddit_data.shape

(24622, 11)

I think we want to do this for every subreddit

In [10]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/frostburn/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [11]:
import re
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, chi2

In [12]:
predictions = []
for i, sr in enumerate(subreddits):
    
    # another dict?
    pred_scores = {}
    
    print(i)
    data = reddit_data[reddit_data['subreddit'] == sr]
    
    if len(data) < 20:
#         print('hello')
#         print(len(data))
        continue

    stemmer = SnowballStemmer('english')
    words = stopwords.words("english")

    data['cleaned'] = data['body'].apply(lambda x: " ".join([stemmer.stem(i) for i in re.sub("[^a-zA-Z]", " ", x).split() if i not in words]).lower())

    X_train, X_test, y_train, y_test = train_test_split(data['cleaned'], data.score, test_size=0.2)

    pipeline = Pipeline([('vect', TfidfVectorizer(ngram_range=(1, 2), stop_words="english", sublinear_tf=True)),
                         ('chi',  SelectKBest(chi2, k='all')),
                         ('clf', LinearSVC(C=1.0, penalty='l1', max_iter=3000, dual=False))])


    model = pipeline.fit(X_train, y_train)

    vectorizer = model.named_steps['vect']
    chi = model.named_steps['chi']
    clf = model.named_steps['clf']

#     feature_names = vectorizer.get_feature_names()
#     feature_names = [feature_names[i] for i in chi.get_support(indices=True)]
#     feature_names = np.asarray(feature_names)

#     target_names = ['1', '2', '3', '4', '5']
#     print("top 10 keywords per class:")
#     for i, label in enumerate(target_names):
#         indeces = 10 if len(clf.coef_[i]) > 10 else len(clf.coef_[i]) 
#         top10 = np.argsort(clf.coef_[i])[-indeces:]
#         print("%s: %s" % (label, " ".join(feature_names[top10])))

    print("accuracy score: " + str(model.score(X_test, y_test)))
    
    test_speech = [
        
        'Donald Trump will be the president.', 
        'Bernie Sanders will be the president.',
        'Elizabeth Warren will be the president.',
        'Beto O\'Rourke will be the president.',
        'Cory Booker will be the president.',
        'Andrew Yang will be the president',
        'Tulsi Gabbard will be the president'
        
    ]
    
    pred_scores[sr] = list(map(int,model.predict(test_speech)))
    
    predictions.append(pred_scores)
#     print(sr)
#     print(model.predict(test_speech))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


0
1
accuracy score: 0.7272727272727273
2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.46153846153846156
3
accuracy score: 0.4444444444444444
4


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.6
5
accuracy score: 0.2222222222222222
6
accuracy score: 0.5
7


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.13402061855670103
8
accuracy score: 0.45454545454545453
9


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.2222222222222222
10


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.2421875
11


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.21621621621621623
12
accuracy score: 0.375
13
14


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.47619047619047616
15
accuracy score: 0.6666666666666666
16


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.15384615384615385
17


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.29310344827586204
18


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.21844660194174756
19
accuracy score: 0.21739130434782608
20


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.375
21
accuracy score: 0.14285714285714285
22


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.5769230769230769
23


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.4888888888888889
24


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.24242424242424243
25


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.32075471698113206
26


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.3821656050955414
27
accuracy score: 0.42857142857142855
28


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.36363636363636365
29
30


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.21212121212121213
31
accuracy score: 0.3333333333333333
32


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.21666666666666667
33
accuracy score: 0.275
34


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.1875
35
accuracy score: 0.36363636363636365
36


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.625
37
accuracy score: 0.3333333333333333
38


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.38686131386861317
39


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.16666666666666666
40


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.3548387096774194
41


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.17391304347826086
42
accuracy score: 0.5882352941176471
43


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.28804347826086957
44
accuracy score: 0.7777777777777778
45


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.0423728813559322
46


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.08917197452229299
47
48


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.0851063829787234
49


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.6614173228346457
50
accuracy score: 0.23076923076923078
51


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.3684210526315789
52
accuracy score: 0.14285714285714285
53
accuracy score: 0.673469387755102
54


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.2840909090909091
55
accuracy score: 0.3
56


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.12903225806451613
57


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.1603960396039604
58


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


accuracy score: 0.18009478672985782




In [13]:
predictions

[{'AmericanPolitics': [2, 1, 1, 1, 1, 1, 1]},
 {'BernieSanders': [2, 2, 2, 2, 2, 2, 2]},
 {'Beto2020': [1, 1, 1, 1, 1, 1, 1]},
 {'BetoORourke': [1, 1, 1, 2, 1, 1, 1]},
 {'BillWeld': [1, 1, 1, 1, 1, 1, 1]},
 {'Communist': [1, 1, 1, 1, 1, 1, 1]},
 {'Conservative': [1, 1, 1, 8, 1, 1, 1]},
 {'Delaney2020': [1, 1, 1, 1, 1, 1, 1]},
 {'Democrats2020': [2, 2, 2, 2, 2, 2, 2]},
 {'Economics': [1, 1, 1, 1, 1, 1, 1]},
 {'ElizabethWarren': [4, 1, 11, 1, 1, 1, 7]},
 {'JoeBiden': [2, 1, 1, 1, 1, 1, 1]},
 {'Kamala': [12, 1, 1, 1, 1, 1, 1]},
 {'Kossacks_for_Sanders': [3, 3, 7, 3, 3, 3, 3]},
 {'LGBTnews': [1, 1, 1, 1, 1, 1, 1]},
 {'Liberal': [1, 1, 1, 1, 1, 1, 1]},
 {'Libertarian': [2, 1, 1, 1, 1, 1, 1]},
 {'LibertarianLeft': [2, 2, 2, 2, 2, 2, 2]},
 {'LibertarianSocialism': [1, 1, 1, 1, 1, 1, 1]},
 {'Marianne2020': [1, 1, 1, 1, 1, 1, 1]},
 {'Marxism': [1, 1, 1, 1, 1, 1, 1]},
 {'Objectivism': [1, 1, 1, 1, 1, 1, 1]},
 {'Pete_Buttigieg': [1, 1, 1, 1, 1, 1, 1]},
 {'Republican': [1, 1, 1, 1, 1, 1, 1]},
 {'S

In [14]:
d = predictions[0]

In [15]:

with open("score_predictions.json", 'w') as f:
    f.write(j.dumps(predictions))


for sr in subreddits:
    
    data = reddit_data[reddit_data['subreddit'] == sr]
    print(sr)
    print(len(data))
    
    if len(data) < 20:
        print('hello')
        print(len(data))
        continue

    #data = data[data.score != 3]
    data['sentiment'] = data['score'] > 0
    #print(data.head())
    
    
    #print(len(data), len(data.sentiment))
    
    X_train, X_test, y_train, y_test = train_test_split(data, data.sentiment, test_size=0.2)
    
    
    count = CountVectorizer()
    temp = count.fit_transform(X_train.body)

    tdif = TfidfTransformer()
    temp2 = tdif.fit_transform(temp)

    text_regression = LogisticRegression()
    model = text_regression.fit(temp2, y_train)

    prediction_data = tdif.transform(count.transform(X_test.body))

    predicted = model.predict(prediction_data)
    
    print(np.mean(predicted == y_test))
    
    text = input()
    
    print(model.predict(tdif.transform(count.transform([text]))))