In [1]:
import sys
sys.path.append('../../')
import os 


import numpy as np
import json
from datetime import datetime
import os
import pandas as pd

import networkx as nx
from src.utils import read_users, read_friends

## Initialize

In [2]:
main_dir = '../../'
config_path = main_dir + 'config/config.json'

assert os.path.exists(config_path)

with open(config_path) as file:
        config = json.load(file)

In [3]:
all_user_dict = read_users(main_dir, config)

Reading user information files: ['users_2.json', 'users_1.json', 'users_0.json']


## Follower Network

### Create full follower Network
(list entry 'a b' if b is friend of a, i.e. a folllows b and b is in user list)

In [None]:
friends_dict = read_friends(main_dir, config)

In [None]:
#contruct edge list of json files 
links = []

for user in friends_dict:
    curr_friends = friends_dict[user]
    for friends_id in curr_friends:
        if friends_id in all_user_ids:
            links.append(f'{user} {friends_id}')

print(f"Number of edges: {len(links)}")

In [None]:
#parse edge list as networkx directed graph (needs a bit of time)
G = nx.parse_edgelist(links, nodetype=int, create_using=nx.DiGraph)

#save graph as graphml
graph_path = os.path.join(main_dir, config['graphs_dir'])
nx.write_graphml_lxml(G, os.path.join(graph_path, "follower_network_full.graphml")) 

### Create Influencer Follower Network
Get subnetwork containing only nodes with a lot of followers ("micro" influencer: > 1000 followers)

In [None]:
#get nodes with a lot of followers
print(f"Influencer follower count - lower limit: {config['influencer_lower_limit']}")
high_indeg_nodes = []
for (node, val) in G.in_degree():
    if val > influencer_lower_limit:
        high_indeg_nodes.append((node,val))
len(high_indeg_nodes)

In [None]:
#save some information on "influencer" nodes
most_popular = pd.DataFrame()
most_popular['screen_name'] = [all_user_dict[str(node)]['screen_name'] for (node, val) in  high_indeg_nodes]
most_popular['id'] = [node for (node, val) in  high_indeg_nodes]
most_popular['indegree'] = [val for (node, val) in  high_indeg_nodes]
most_popular['outdegree'] = [G.out_degree(node) for (node, val) in  high_indeg_nodes]
most_popular = most_popular.sort_values(by=['indegree'], ascending=False)

pop_ids = set(most_popular['id'])
pop_screen_names = set(most_popular['screen_name'])

results_path = os.path.join(main_dir, config['results_dir'])
most_popular.to_csv(os.path.join(results_path, f"{config['influencer_fname']}_{config['influencer_lower_limit']}.csv"), index=False)

In [None]:
#get subnetwork
pop_dict = {}
for index, row in most_popular.iterrows():
    most_pop = {}
    most_pop['screen_name'] = row['screen_name']
    most_pop['indegree']=row['indegree']
    most_pop['outdegree']=row['outdegree']
    pop_dict[row['id']]=most_pop

In [None]:
#contruct edge list of json files (list entry 'a b' if b is friend of a, i.e. a folllows b and b is in user list)
links = []

for user in friends_dict:
    if int(user) not in pop_ids:
        continue
        
    curr_friends = friends_dict[user]
    for friends_id in curr_friends:
        if friends_id in all_user_ids:
            if friends_id in pop_ids:
                links.append(f'{user} {friends_id}')

print(f"Number of edges: {len(links)}")

In [None]:
#parse edge list as networkx directed graph
G_influencer = nx.parse_edgelist(links, nodetype=int, create_using=nx.DiGraph)

#add information from original graph as attributes
attrs = {}
for node in G_influencer.nodes():
    att = {}
    att['id'] = all_user_dict[str(node)]['id']
    att['screen_name'] = all_user_dict[str(node)]['screen_name']
    date = all_user_dict[str(node)]['created_at']
    date = datetime.strptime(date, '%a %b %d %H:%M:%S %z %Y')
    att['created_at'] = all_user_dict[str(node)]['created_at']
    att['created_at_date'] = date.strftime("%Y-%m-%d")
    att['location'] = all_user_dict[str(node)]['location']
    att['original_indegree'] = pop_dict[int(node)]['indegree']
    att['original_outdegree'] = pop_dict[int(node)]['outdegree']
    attrs[node] = att

nx.set_node_attributes(G_influencer, attrs)

#save graph
graph_path = os.path.join(main_dir, config['graphs_dir'])
nx.write_graphml_lxml(G_influencer, os.path.join(graph_path, f"follower_network_influencer_{config['influencer_lower_limit']}.graphml")) 

## Interaction Network

In [4]:
tweets_path = os.path.join(main_dir, config['tweets_dir'])

with open(os.path.join(tweets_path, 'interaction_tweets.json')) as json_file:
    interactions = json.load(json_file) 

In [13]:
#add directed edge for every interaction
network = {}
links = []
for user in interactions:
    annot_list = interactions[user]
    network_friends = []
    for annot in annot_list:
        if str(annot['receiver']) in all_user_dict:
            network_friends.append(annot['receiver'])
            links.append(f"{user} {annot['receiver']}")
        network[user] = network_friends  

G = nx.parse_edgelist(links, nodetype=int, create_using=nx.MultiDiGraph)

In [15]:
#save graph
graph_path = os.path.join(main_dir, config['graphs_dir'])
nx.write_graphml_lxml(G, os.path.join(graph_path, f"interaction_graph.graphml")) 