#### load libraries

In [141]:
import pandas as pd
import numpy as np
import re

# plot 
import plotly.plotly as py
import plotly.graph_objs as go
import cufflinks as cf
import plotly.tools as tls 
cf.set_config_file(offline=False, world_readable=True, theme='ggplot')
credentials = tls.get_credentials_file()
from IPython.display import Image

#### load data

In [31]:
df = pd.read_csv("tweets_table-5.csv")

In [32]:
df.head()

Unnamed: 0,keyword,date,tweets
0,deletefacebook,2018-05-13 07:08:55,rt tomthunkitsmind trumpti polit firm offer en...
1,deletefacebook,2018-05-13 07:07:58,rt duckduckgo pleas keep mind deletefacebook d...
2,deletefacebook,2018-05-13 07:06:01,rt inthematrixxx qalert 51218peopl unit hold p...
3,deletefacebook,2018-05-13 06:38:34,rt tammygallant8 kindamuslim jonathon5760 mdav...
4,deletefacebook,2018-05-13 06:34:10,rt tomthunkitsmind trumpti polit firm offer en...


#### convert the dates

In [33]:
dates = [pd.to_datetime(x).date() for x in df.date]
df = df.assign(dates = pd.Series(dates))

In [34]:
df.head()

Unnamed: 0,keyword,date,tweets,dates
0,deletefacebook,2018-05-13 07:08:55,rt tomthunkitsmind trumpti polit firm offer en...,2018-05-13
1,deletefacebook,2018-05-13 07:07:58,rt duckduckgo pleas keep mind deletefacebook d...,2018-05-13
2,deletefacebook,2018-05-13 07:06:01,rt inthematrixxx qalert 51218peopl unit hold p...,2018-05-13
3,deletefacebook,2018-05-13 06:38:34,rt tammygallant8 kindamuslim jonathon5760 mdav...,2018-05-13
4,deletefacebook,2018-05-13 06:34:10,rt tomthunkitsmind trumpti polit firm offer en...,2018-05-13


In [35]:
df.shape

(252011, 4)

In [36]:
df = df.drop_duplicates()

#### drop duplicates, now down to 46K rows

In [37]:
df.shape

(46627, 4)

In [41]:
df.keyword.unique()

array(['deletefacebook', '#deletefacebook', 'facebook', 'mark zuckerberg',
       'cambridge analytica', 'aleksandr kogan'], dtype=object)

#### remove hashtag from keywords

In [85]:
clean_keywords = [re.sub(r'[^\w]', '', word) for word in df.keyword]
df = df.assign(clean_keywords = pd.Series(clean_keywords))

In [86]:
df.clean_keywords.unique()

array(['deletefacebook', 'facebook', 'markzuckerberg',
       'cambridgeanalytica', 'aleksandrkogan', nan], dtype=object)

In [87]:
df.dates.unique()

array([datetime.date(2018, 5, 13), datetime.date(2018, 5, 12),
       datetime.date(2018, 5, 11), datetime.date(2018, 5, 10),
       datetime.date(2018, 5, 14), datetime.date(2018, 5, 15),
       datetime.date(2018, 5, 16), datetime.date(2018, 5, 9),
       datetime.date(2018, 5, 8), datetime.date(2018, 5, 6),
       datetime.date(2018, 5, 7), datetime.date(2018, 5, 17),
       datetime.date(2018, 5, 18), datetime.date(2018, 5, 19)], dtype=object)

#### calculate totals by day and keyword

In [88]:
df_tweet_totals = df.groupby(['dates', 'clean_keywords'])['tweets'].count().reset_index()

In [89]:
df_tweet_totals.head()

Unnamed: 0,dates,clean_keywords,tweets
0,2018-05-10,deletefacebook,642
1,2018-05-11,deletefacebook,2109
2,2018-05-12,deletefacebook,1956
3,2018-05-12,markzuckerberg,877
4,2018-05-13,deletefacebook,645


In [123]:
df_tweet_totals[df_tweet_totals.clean_keywords == 'facebook']

Unnamed: 0,dates,clean_keywords,tweets
5,2018-05-13,facebook,553
9,2018-05-14,facebook,829


In [147]:
facebook = go.Scatter(x=df_tweet_totals.dates, 
                      y=df_tweet_totals[df_tweet_totals.clean_keywords == 'facebook'].tweets, 
                      name = "facebook", 
                      line = dict(color = '#17BECF'), opacity = 0.8)

deletefacebook = go.Scatter(x=df_tweet_totals.dates, 
                            y=df_tweet_totals[df_tweet_totals.clean_keywords == 'deletefacebook'].tweets, 
                            name = "deletefacebook", 
                            line = dict(color = '#35A43D'), opacity = 0.8)

markzuckerberg = go.Scatter(x=df_tweet_totals.dates, 
                            y=df_tweet_totals[df_tweet_totals.clean_keywords == 'markzuckerberg'].tweets, 
                            name = "markzuckerberg", 
                            line = dict(color = '#6EA9B5'), opacity = 0.8)

cambridgeanalytica = go.Scatter(x=df_tweet_totals.dates, 
                                y=df_tweet_totals[df_tweet_totals.clean_keywords == 'cambridgeanalytica'].tweets, 
                                name = "cambridgeanalytica", 
                                line = dict(color = '#846EB5'), opacity = 0.8)

aleksandrkogan = go.Scatter(x=df_tweet_totals.dates, 
                                y=df_tweet_totals[df_tweet_totals.clean_keywords == 'aleksandrkogan'].tweets, 
                                name = "aleksandrkogan", 
                                line = dict(color = '#B56E8D'), opacity = 0.8)

data = [facebook, deletefacebook, markzuckerberg, cambridgeanalytica, aleksandrkogan]

layout = dict(
    title = "Total Unique Tweets by Keyword and Date",
    xaxis = dict(range = ['2018-05-10','2018-05-15'])
)

fig = dict(data = data, layout = layout)
py.iplot(fig)

In [148]:
py.image.save_as(fig, filename='trend_chart.png')