In [None]:
import tweepy
import itertools
import time
import numpy as np
import json
import datetime
import os
import pandas as pd

import networkx as nx

# Load data

In [None]:
#load extracted tweets by all processes and combine in one dictionary
tweets = dict()

for i in range(4):
    with open('../../data/tweets/tweets{}.json'.format(i+1)) as json_file:
        tweets.update(json.load(json_file))

#combine all tweets of users to one dictionary
allTweets = dict()
for user in tweets:
    userTweets = tweets[user]
    for tweet in userTweets:
        allTweets[tweet] = userTweets[tweet]

In [None]:
#load extracted user information
users = dict()

with open('../../data/users.json') as json_file:
        users.update(json.load(json_file))

In [None]:
#load information on drivers and races
drivers = pd.read_csv('../../data/drivers_info.csv')
races = pd.read_csv('../../data/races_info.csv')

# Preprocessing (leave only tweets with driver hashtag)

In [None]:
#foreach tweet, check if one of driver's name as hashtag (only than it's count as fanboost vote)
#keep relevant tweets

activeUsers = []
relevantTweets = []

for tweetID in allTweets:
    tweet = allTweets[tweetID]
    userID = (tweet['user'])['id']
    hashtags = (tweet['entities'])['hashtags']
    driverFound = False
    for i in range(len(Drivers)):
        for hashtag in hashtags:
            if hashtag['text'] == Drivers[i]:
                relevantTweets.append(tweet)
                driverFound == True
                activeUsers.append(userID)
                print("User {} voted for Driver {} with tweet {}".format(userID, Drivers[i], tweetID))
                break
        if driverFound:
            break
print(f"\r {len(relevantTweets)} relevant tweets")
            
#remember users actively participating in voting on Twitter (for node ids later)
activeUsers = list(dict.fromkeys(activeUsers))

x = np.array(activeUsers)
activeUsers = []
uniqueUsers = np.unique(x)
for user in uniqueUsers:
    activeUsers.append((int)(user))
print(f"{len(activeUsers)} active users")

# Create Network from relevant Tweets

## Create nodes

In [None]:
nodes = {}

In [None]:
# Transform drivers into json-nodes
for driver in Drivers:
    #add to node dictionary
    jsonEntry = dict()
    jsonEntry['type'] = 'driver'
    jsonEntry['color'] = '#0099ff'
    jsonEntry['shape'] = 'rectangle'
    jsonEntry['name'] = drivers['Name']
    jsonEntry['screenName'] = drivers['screenName']
    jsonEntry['location'] = drivers['Country']
    
    nodes[drivers['screenName']] = jsonEntry

In [None]:
#Transform active users into json-nodes
locationExtraction == False

for userID in activeUsers:
    userID = int(userID)
    
    #add to node dictionary
    jsonEntry = dict()
    jsonEntry['type'] = 'user'
    jsonEntry['color'] = '#808080'
    jsonEntry['shape'] = 'ellipse'
    jsonEntry['screenName'] = users[userID]['screen_name']
    jsonEntry['id'] = users[userID]['userID']
    jsonEntry['twitterID'] = userID

    #location:
    if locationExtraction:
        if userID in users:
            jsonEntry['location'] = (users[userID])['location']
            
    nodes[users[userID]['screen_name']] = jsonEntry

## Create directed links from tweets

In [None]:
races['start_date'] = pd.to_datetime(races['start_date'])

#voting is enabled for 10 days prior to race
delta1 = datetime.timedelta(days = 1)
delta2 = datetime.timedelta(days = 10)
races['fanboost_start'] = races['start_date'] - delta2
races['fanboost_end'] = races['start_date'] + delta1

In [None]:
#find race that twitter vote corresponds to
def getRace(tweet):
    date_time_obj = datetime.datetime.strptime(tweet["created_at"],  '%a %b %d %H:%M:%S %z %Y')
    date = date_time_obj.date()
    for race in races:
        if date_time_obj.date() <= (races[race])[0] and date_time_obj.date() > (races[race])[1]:
            return race
    return None

In [None]:
#create link for each voting (#fanboost + #driver/@driver): from twitter user to driver

#selected_races = races
selected_race = 'MexicoCity2'
edges = []

for i in range(len(relevantTweets)):
    tweet = relevantTweets[i]
    
    #only consider selected races
    race = getRace(tweet)
    if race != selected_races:
        continue
    
    tweetID = tweet['id']
    userID = (tweet['user'])['id']
    hashtags = (tweet['entities'])['hashtags']    
    user_mentions = (tweet['entities'])['user_mentions']
    hashtag_mentions = [hashtag['text'].lower() for hashtag in hashtags] + [user_mention['screen_name'].lower() for user_mention in user_mentions]
    
    driverFound = False
    
    #check whether tweet contains a driver hashtag or driver annotation, if yes -> create link
    for driver in drivers.iterrows():
        if driver['screenName'].lower() in hashtag_mentions:
            user = users[userID]
                    
            #increase number of votes (weight) for existing edge or create new edge
            jsonEntry = dict()
            jsonEntry["source"] = user['screenName']
            jsonEntry["target"] = driver['screenName']
            jsonEntry["tweetId"] = tweetID
            edges.append(jsonEntry)


            driverFound == True
            print(f"User {userID} voted for Driver {drivers['Name']} with tweet {tweetID}")

        if driverFound:
            #one tweet can only vote for one driver -> first mentioned wins
            break

In [None]:
#transform edge list into networkx Multigraph and save
df_edges = pd.DataFrame.from_records(edges)

G = nx.empty_graph()
if len(edges) != 0:
    G = nx.convert_matrix.from_pandas_edgelist(df_edges, 'source', 'target', edge_attr = ['tweet_id'], create_using=nx.MultiDiGraph)
    nx.set_node_attributes(G, nodes)
    
nx.write_graphml_lxml(G_before_protest, f'../../data/networks/network_voting_{selected_race}.graphml')