In [2]:
##################################################################
# In this Notebook I conducted a basic background analysis
# by doing the following:
# 1) Creating a timeline by day
# 2) Creating a timeline by hour
# PLEASE NOTE: The final outputs for this file are the html files in
# in the images folder titled "IRA_Act_Day.html", "IRA_Act_Hour.html"
#################################################################

# Here is my code for the basic Background Analysis
%run ImportStatements.ipynb
%run functions.ipynb
pd.set_option('mode.chained_assignment', None)

### Importation of Data

In [3]:
# I first imported each of my CSVs by year, and then formatted them into manaegable dataframes

years = [2014, 2015, 2016, 2017, 2018]

initial = {'Date': [], 
        'Num_Tweets': []}

initial = pd.DataFrame(initial)

# I then looped through all the files and reformatted them
for year in years:
    data = pd.read_csv("data/" + str(year) + "/IRA" + str(year) + ".csv")
    formatted_Data = pd.DataFrame(pd.to_datetime(data['publish_date']).dt.strftime('%Y/%m/%d').value_counts())
    temp = {'Date': pd.to_datetime(data['publish_date']).dt.strftime('%Y/%m/%d').value_counts().index,
            'Num_Tweets': pd.to_datetime(data['publish_date']).dt.strftime('%Y/%m/%d').value_counts().values}
    temp = pd.DataFrame(temp)   
    initial = pd.concat([initial, temp])

### Reformatting for Graphs

In [4]:
# I then created an empty data frame for the period I would like to display data
per1 = pd.date_range(start ='11/15/2014',  
         end ='12/31/2018', freq ='1D')   

master_Data = pd.DataFrame(per1)

# I merged the data with the formatted data to show a complete timeline
master_Data = master_Data.rename(columns={0:"Date"})
master_Data['Date'] = master_Data['Date'].dt.strftime('%Y/%m/%d')
master_Data.insert(1, "Num_Tweets", 0)

initial["Num_Tweets"] = initial["Num_Tweets"].astype(int)


merged_data =  pd.concat([master_Data, initial]).groupby(['Date']).sum().reset_index()


In [5]:
# Finally, I graphed and formatted
merged_data['Date'] = per1

fig_IRA_Act_Day = px.line(merged_data, x="Date", y="Num_Tweets")
fig_IRA_Act_Day.update_yaxes(title_text='# Tweets')
fig_IRA_Act_Day.update_layout(
    
    title={
        'text': "IRA Activity by Day",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    template="simple_white",
    annotations=[
        dict(
            x='2017-8-12',
            y=6200,
            xref="x",
            yref="y",
            text="Charlottesville Rally",
            showarrow=False,
            arrowhead=1,
            ax=0,
            ay=-40
        ),
        dict(
            x='2016-10-6',
            y=3000,
            xref="x",
            yref="y",
            text="Run up to 2016 Election",
            showarrow=False,
            arrowhead=1,
            ax=0,
            ay=-40
        ),
        dict(
            x='2015-7-22',
            y=2800,
            xref="x",
            yref="y",
            text="Hillary WikiLeaks Activity",
            showarrow=False,
            arrowhead=1,
            ax=0,
            ay=-40
        )
    ])

fig_IRA_Act_Day.write_html("images/IRA_Act_Day.html")

### Graphing by Hour

In [6]:
# I did the same process as above, but instead differentiated by hour
years = [2014, 2015, 2016, 2017, 2018]

initial = {
        'Hour': [], 
        'Num_Tweets': [],
        'Year':[]
        }

initial = pd.DataFrame(initial)

for year in years:
    data = pd.read_csv("data/" + str(year) + "/IRA" + str(year) + ".csv")
    hour = pd.to_datetime(data['publish_date']).dt.round('H').dt.hour.value_counts().index
    num_tweets = pd.to_datetime(data['publish_date']).dt.round('H').dt.hour.value_counts().values
    temp = {'Hour': hour,
            'Num_Tweets': num_tweets,
           'Year': str(year)}

    temp = pd.DataFrame(temp)   
    initial = pd.concat([initial, temp])
    

In [7]:
total = initial.groupby(["Hour"]).sum().reset_index()

In [8]:
# This is the replacement frame to make sure the graph X-axis looks proper
replacement = ["12:00 AM", "1:00 AM", "2:00 AM", "3:00 AM", "4:00 AM","5:00 AM", "6:00 AM",
                "7:00 AM", "8:00 AM", "9:00 AM", "10:00 AM", "11:00 AM", "12:00 PM" ,"1:00 PM",
                "2:00 PM", "3:00 PM", "4:00 PM", "5:00 PM", "6:00 PM", "7:00 PM", "8:00 PM",
                 "9:00 PM", "10:00 PM", "11:00 PM"]
total['Time'] = replacement

### Focusing on data by year

In [9]:
# Here I separated the years by time, specifically by hour
separate_years = initial.groupby(["Hour", 'Year']).sum().reset_index()



year2014Data = separate_years.loc[separate_years['Year'] == '2014']


year2015Data = separate_years.loc[separate_years['Year'] == '2015']
year2015Data['Year'] = 2015

year2016Data = separate_years.loc[separate_years['Year'] == '2016']
year2016Data['Year'] = 2016

year2017Data = separate_years.loc[separate_years['Year'] == '2017']
year2017Data['Year'] = 2017

year2018Data = separate_years.loc[separate_years['Year'] == '2018']

total['Year'] = 'total'
year2014Data = fill_missing_range(year2014Data, 'Hour', 0, 24, 1)
year2018Data = fill_missing_range(year2018Data, 'Hour', 0, 24, 1)

year2014Data['Year'] = 2014
year2018Data['Year'] = 2018
year2014Data['Time'] = replacement
year2015Data['Time'] = replacement
year2016Data['Time'] = replacement
year2017Data['Time'] = replacement
year2018Data['Time'] = replacement

list_of_data = [total, year2014Data, year2015Data, year2016Data, year2017Data, year2018Data]

In [10]:
# I then combined the data into a single data frame that I could plot by hour
all_data = pd.concat(list_of_data, sort=True)

fig_IRA_Act_Hour = px.line(all_data, x="Time", y="Num_Tweets", color='Year')
fig_IRA_Act_Hour.update_yaxes(title_text='# Tweets')
fig_IRA_Act_Hour.update_layout(
    title={
        'text': "IRA Activity by Hour",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    template="simple_white")

fig_IRA_Act_Hour.write_html("images/IRA_Act_Hour.html")

Key Results: 
- Key results, time spiked around 3 PM
- Clear focus on responding to newsworthy events
- high volume around charlottesville