# ScatterText Visualization Setup

In [1]:
import pandas as pd
import pickle
import numpy as np

import scattertext as st

import emoji
from preprocessing_funcs import get_hashtags, remove_url, remove_rt_prefix, demojify

In [2]:
with open('../data_files/tweets_with_feels.pickle', 'rb') as read_file:
    df = pickle.load(read_file)

In [3]:
df.head()

Unnamed: 0,month,handle,label,tweet,vader_score
1,2016-10,detroitdailynew,clinton,"Clinton: Trump should’ve apologized more, atta...",-0.2484
4,2017-08,mrclydepratt,clinton,RT @Shareblue: Pence and his lawyers decided w...,0.0
13,2016-08,pamela_moore13,clinton,"Dave Chappelle: ""Black Lives Matter"" is the wo...",-0.6792
42,2017-03,paulinett,clinton,RT @KStreetHipster: Hamner-Brown is already on...,0.0
43,2017-03,hyddrox,clinton,RT @TrumpSuperPAC: Obama's a Muslim &amp; this...,0.4184


In [4]:
trump_clinton_df = df[(df['label'] == 'trump') | 
                      (df['label'] == 'clinton')]

In [5]:
trump_clinton_df

Unnamed: 0,month,handle,label,tweet,vader_score
1,2016-10,detroitdailynew,clinton,"Clinton: Trump should’ve apologized more, atta...",-0.2484
4,2017-08,mrclydepratt,clinton,RT @Shareblue: Pence and his lawyers decided w...,0.0000
13,2016-08,pamela_moore13,clinton,"Dave Chappelle: ""Black Lives Matter"" is the wo...",-0.6792
42,2017-03,paulinett,clinton,RT @KStreetHipster: Hamner-Brown is already on...,0.0000
43,2017-03,hyddrox,clinton,RT @TrumpSuperPAC: Obama's a Muslim &amp; this...,0.4184
...,...,...,...,...,...
203463,2017-01,daileyjadon,trump,RT @paul_lander: Donald Trump Mistakenly Tweet...,-0.8126
203464,2016-07,dailysandiego,trump,Trump transition tests outsider's governing ab...,0.3182
203470,2016-09,onlinememphis,trump,ELECTION GUIDE: Federal and State elections ht...,0.0000
203474,2016-10,watchmewalkin,trump,RT @BuzzFeed: This 22-year-old got out of an a...,-0.1531


In [6]:
trump_clinton_df['tweet'] = trump_clinton_df['tweet'].map(remove_url)
trump_clinton_df['tweet'] = trump_clinton_df['tweet'].map(remove_rt_prefix)
trump_clinton_df['tweet'] = trump_clinton_df['tweet'].map(demojify)

In [7]:
trump_clinton_df

Unnamed: 0,month,handle,label,tweet,vader_score
1,2016-10,detroitdailynew,clinton,"Clinton: Trump should’ve apologized more, atta...",-0.2484
4,2017-08,mrclydepratt,clinton,Pence and his lawyers decided which of his of...,0.0000
13,2016-08,pamela_moore13,clinton,"Dave Chappelle: ""Black Lives Matter"" is the wo...",-0.6792
42,2017-03,paulinett,clinton,Hamner-Brown is already on its way here. It's...,0.0000
43,2017-03,hyddrox,clinton,Obama's a Muslim &amp; this video from @FoxNe...,0.4184
...,...,...,...,...,...
203463,2017-01,daileyjadon,trump,Donald Trump Mistakenly Tweets to the Wrong I...,-0.8126
203464,2016-07,dailysandiego,trump,Trump transition tests outsider's governing ab...,0.3182
203470,2016-09,onlinememphis,trump,ELECTION GUIDE: Federal and State elections,0.0000
203474,2016-10,watchmewalkin,trump,This 22-year-old got out of an awkward convo ...,-0.1531


In [8]:
trump_clinton_df = trump_clinton_df[trump_clinton_df['tweet'] != '']

In [9]:
trump_clinton_df

Unnamed: 0,month,handle,label,tweet,vader_score
1,2016-10,detroitdailynew,clinton,"Clinton: Trump should’ve apologized more, atta...",-0.2484
4,2017-08,mrclydepratt,clinton,Pence and his lawyers decided which of his of...,0.0000
13,2016-08,pamela_moore13,clinton,"Dave Chappelle: ""Black Lives Matter"" is the wo...",-0.6792
42,2017-03,paulinett,clinton,Hamner-Brown is already on its way here. It's...,0.0000
43,2017-03,hyddrox,clinton,Obama's a Muslim &amp; this video from @FoxNe...,0.4184
...,...,...,...,...,...
203463,2017-01,daileyjadon,trump,Donald Trump Mistakenly Tweets to the Wrong I...,-0.8126
203464,2016-07,dailysandiego,trump,Trump transition tests outsider's governing ab...,0.3182
203470,2016-09,onlinememphis,trump,ELECTION GUIDE: Federal and State elections,0.0000
203474,2016-10,watchmewalkin,trump,This 22-year-old got out of an awkward convo ...,-0.1531


In [10]:
corpus = st.CorpusFromPandas(trump_clinton_df,
                             category_col='label',
                             text_col='tweet',
                             nlp=st.whitespace_nlp_with_sentences
                            ).build()

In [11]:
html = st.produce_scattertext_explorer(corpus,
                                       category='trump',
                                       category_name='Trump',
                                       not_category_name='Clinton',
                                       minimum_term_frequency=200,
                                       pmi_threshold_coefficient=5,
                                       width_in_pixels=1000,
                                       metadata=trump_clinton_df['handle'],
                                      )

In [12]:

open('6b_scattertext_trump_clinton.html', 'wb').write(html.encode('utf-8'));