# Safeguarding Voters from State Sponsored Content
## Authors: Siddharth Nanda, Dhyey Parikh, Clara Na, Dale Wilson

In [1]:
# Package imports
import pandas as pd
import nltk
import functions
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

In [2]:
# Tweets are read into a data frame and reorganized in a second frame
text_data = pd.read_csv('tweets.csv')
df = pd.DataFrame({"Date": text_data['created_str'], "Time": text_data['created_at'], "Tweet": text_data['text']})

In [3]:
# Values are sorted by time, missing values are removed, and the index of the data frame is reset
df = df.sort_values(by=['Time'])
df = df.dropna(axis=0, how='all')
df = df.reset_index()

In [4]:
# A second dataframe with polarity data appended is created with the polarizeFrame function
df1 = functions.polarizeFrame(df['Date'], df['Tweet'])

In [5]:
# Converted text to lower case and saved in another variable to pass to functions
text = df1['Text'].str.lower()

In [6]:
# Ran function 'addToFrame' in-conjunction with 'hashTagPhrases' to get a dataframe with the top 10 used hashtags
h_ten = functions.addToFrame('Hashtags', functions.hashTagPhrases(text, 10))

In [7]:
# Ran function 'addToFrame' in-conjunction with 'mostMentions' to get a dataframe with the top 10 mentioned users
m_ten = functions.addToFrame('Mentions', functions.mostMentions(text, 10))

In [8]:
# Generates plots 
values = h_ten['Frequency']
phases = h_ten['Hashtags']
colors = ['rgb(32,155,160)', 'rgb(253,93,124)', 'rgb(28,119,139)', 'rgb(182,231,235)', 'rgb(35,154,160)', 'rgb(45,32,193)', 'rgb(123,167,33)', 'rgb(34,55,78)', 'rgb(109,33,165)', 'rgb(76,89,145)']

n_phase = len(phases)
plot_width = 400

# height of a section and difference between sections 
section_h = 100
section_d = 10

# multiplication factor to calculate the width of other sections
unit_width = plot_width / max(values)

# width of each funnel section relative to the plot width
phase_w = [int(value * unit_width) for value in values]

# plot height based on the number of sections and the gap in between them
height = section_h * n_phase + section_d * (n_phase - 1)


# list containing all the plot shapes
shapes = []

# list containing the Y-axis location for each section's name and value text
label_y = []

for i in range(n_phase):
        if (i == n_phase-1):
                points = [phase_w[i] / 2, height, phase_w[i] / 2, height - section_h]
        else:
                points = [phase_w[i] / 2, height, phase_w[i+1] / 2, height - section_h]

        path = 'M {0} {1} L {2} {3} L -{2} {3} L -{0} {1} Z'.format(*points)

        shape = {
                'type': 'path',
                'path': path,
                'fillcolor': colors[i],
                'line': {
                    'width': 1,
                    'color': colors[i]
                }
        }
        shapes.append(shape)
        
        # Y-axis location for this section's details (text)
        label_y.append(height - (section_h) / 2)

        height = height - (section_h + section_d)

# For phase names
label_trace = go.Scatter(
    x=[-350]*n_phase,
    y=label_y,
    mode='text',
    text=phases,
    textfont=dict(
        color='rgb(200,200,200)',
        size=15
    )
)
 
# For phase values
value_trace = go.Scatter(
    x=[350]*n_phase,
    y=label_y,
    mode='text',
    text=values,
    textfont=dict(
        color='rgb(200,200,200)',
        size=15
    )
)

data = [label_trace, value_trace]
 
layout = go.Layout(
    title="<b>Top Ten Hashtags Used by Russian Bots</b>",
    titlefont=dict(
        size=20,
        color='rgb(203,203,203)'
    ),
    shapes=shapes,
    height=560,
    width=800,
    showlegend=False,
    paper_bgcolor='rgba(44,58,71,1)',
    plot_bgcolor='rgba(44,58,71,1)',
    xaxis=dict(
        showticklabels=False,
        zeroline=False,
    ),
    yaxis=dict(
        showticklabels=False,
        zeroline=False
    ),
    margin=go.Margin(
        l=50,
        r=50,
        b=50,
        t=100,
        pad=4
    )
)
 
fig = go.Figure(data=data, layout=layout)

In [9]:
# Generates plots
trace1 = go.Bar(
    y=['@realdonaldtrump'],
    x=[4325],
    orientation = 'h',
    marker = dict(
        color = 'rgba(32,155,160, 0.6)',
        line = dict(
            color = 'rgba(253,93,124, 1.0)',
            width = 2)
    )
)
trace2 = go.Bar(
    y=['@midnight'],
    x=[2414],
    orientation = 'h',
    marker = dict(
        color = 'rgba(28,119,139, 0.6)',
        line = dict(
            color = 'rgba(182,231,235, 1.0)',
            width = 2)
    )
)

trace3 = go.Bar(
    y=['@blicqer'],
    x=[2181],
    orientation = 'h',
    marker = dict(
        color = 'rgba(35,154,160, 0.6)',
        line = dict(
            color = 'rgba(45,32,193, 1.0)',
            width = 2)
    )
)

trace4 = go.Bar(
    y = ['@hillaryclinton'],
    x=[2080],
    orientation = 'h',
    marker = dict(
        color = 'rgba(123,167,33, 0.6)',
        line = dict(
            color = 'rgba(34,55,78, 1.0)',
            width = 2)
    )
)

trace5 = go.Bar(
    y=['@conservatexian'],
    x=[1105],
    orientation = 'h',
    marker = dict(
        color = 'rgba(109,33,165, 0.6)',
        line = dict(
            color = 'rgba(76,89,145, 1.0)',
            width = 2)
    )
)


data = [trace5, trace4, trace3, trace2, trace1]
layout = go.Layout(
    title="<b>Top Five Mentions by Russian Bots</b>",
    titlefont=dict(
        size=20,
        color='rgb(203,203,203)'
    ),
    font=dict(family='Courier New, monospace', size=10, color='#cbcbcb'),
    barmode='stack',
    height=560,
    width=800,
    showlegend=False,
    paper_bgcolor='rgba(44,58,71,1)',
    plot_bgcolor='rgba(44,58,71,1)',
    margin=go.Margin(
        l=150,
        r=50,
        b=100,
        t=100,
        pad=4
    )
)

fig1 = go.Figure(data=data, layout=layout)

In [10]:
# Generates plots
nonzero = df1[(df1['Compound'] != 0)]
trace = go.Scatter(
    x=nonzero.Date,
    y=nonzero['Compound'],
    name = "Compound Sentiment",
    line = dict(color = '#17BECF'),
    opacity = 0.8)

data = [trace]

layout = dict(
    title='Compound Polarity over Time',
    titlefont = dict(
        size=20,
        color = 'rgb(203, 203, 203)'
    ),
    font = dict(family='Courier New, monospace', size = 10, color = '#cbcbcb'),
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label='1m',
                     step='month',
                     stepmode='backward'),
                dict(count=3,
                     label='3m',
                     step='month',
                     stepmode='backward'),
                dict(count=6,
                     label='6m',
                     step='month',
                     stepmode='backward'),
                dict(count=12,
                     label='12m',
                     step='month',
                     stepmode='backward'),
                dict(count=24,
                     label='2y',
                     step='month',
                     stepmode='backward'),
                dict(step='all')
            ]),
            font = dict(size = 11, color = '#000000')
        ),
        rangeslider=dict(),
        type='date'
    ),
    paper_bgcolor = 'rgba(44, 58, 71, 1)',
    plot_bgcolor = 'rgba(44, 58, 71, 1)'
)

fig2 = dict(data=data, layout=layout)

In [11]:
# Generates plots
pos_count = 0
neg_count = 0
neu_count = 0
for i in range(0, df1['Compound'].size-1):
    if df1['Compound'][i] > 0:
        pos_count += 1
    if df1['Compound'][i] < 0:
        neg_count += 1
    if df1['Compound'][i] == 0:
        neu_count += 1

labels = ['Positive', 'Negative', 'Neutral']
values = [pos_count, neg_count, neu_count]

trace = go.Pie(labels=labels, values=values)
data = [trace]

layout4 = go.Layout(
    title="<b>Overall Sentiment Analysis<b>",
    titlefont=dict(
        size=20,
        color='rgb(255, 255, 255)'
    ),
    font=dict(family='Courier New, monospace', size=10, color='#FFFFFF'),
    height = 560, 
    width = 800,
    paper_bgcolor='rgba(44,58,71,1)'
)

fig3 = dict(data = data, layout = layout4)

In [None]:
# Displays plots for Top 10 Hashtags, and Top 5 Mentions
iplot(fig)
iplot(fig1)

In [None]:
# Displays plots for Compound Polarity over Time, and Overall Sentiment
iplot(fig2)
iplot(fig3)