In [1]:
import os 
import json
import tweepy
import networkx as nx
import pandas as pd
import numpy as np

# Load Twitter API credentials from external file

Create a json file named `twitter_credentials.json`

```
{
    "api_key": "...",
    "api_secret": "...",
    "access_token": "...",
    "access_secret": "...",
    "bearer_token": "..."
}
```

In [2]:
credentials_file = 'twitter_credentials.json'

with open(credentials_file) as credentials_data:
   api_data = json.load(credentials_data)

api_key = api_data['api_key']
api_secret = api_data['api_secret']
access_token = api_data['access_token']
access_secret = api_data['access_secret']
bearer_token = api_data['bearer_token']

In [3]:

client = tweepy.Client(
    consumer_key=api_key, 
    consumer_secret=api_secret, 
    access_token=access_token, 
    access_token_secret=access_secret, 
    bearer_token=bearer_token,
    wait_on_rate_limit=True,
    return_type=dict
)

In [4]:
accounts = ["mizzaro", "Miccighel_"]

data_folder = "data"

excluded_users_file = "excluded_usernames.json"

def serialize_json(folder, filename, data):
    if not os.path.exists(folder):
        os.makedirs(folder, exist_ok=True)
    with open(f"{folder}/{filename}", 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
        f.close()
    print(f"Data serialized to path: {folder}/{filename}")

def read_json(path, verbose=True):
    if os.path.exists(path):
        with open(path, "r", encoding="utf8") as file:
            data = json.load(file)
        if verbose:
            print(f"Data read from path: {path}")
        return data
    else:
        print(f"No data found at path: {path}")
        return {}

# check if username is excluded
def is_excluded(username):
    excluded_users_data = read_json(f"{data_folder}/{excluded_users_file}", False)
    excluded_users_list = excluded_users_data['excluded']

    if username in excluded_users_list:
        return True
    else:
        return False

# add username to excluded lists
def add_to_excluded(username):
    excluded_users_data = read_json(f"{data_folder}/{excluded_users_file}", False)
    excluded_users_list = excluded_users_data['excluded']

    if username not in excluded_users_list:
        excluded_users_data['excluded'].append(username)
        serialize_json(data_folder, excluded_users_file, excluded_users_data)
    else:
        print(f"User {username} is already present in excluded list")

def get_tweetsv2(username, quantity, userid=None):
    tweets = []
    tweets_file = f"{data_folder}/{username}_tweets.json"

    data_total = quantity

    if not os.path.exists(tweets_file):
        # Get Tweets from Twitter
        if userid:
            user = {}
            user['id'] = userid
            user['username'] = username
        else:
            response = client.get_user(username=username)
            user = response['data']
        print(f"Processing user with id {user['id']} and username {user['username']}")
        # Get 5 most recent tweets
        response = client.get_users_tweets(
                    id=user['id'], tweet_fields=['created_at', 'lang', 'public_metrics'],
                    max_results=data_total
                    )
    else:
        # Get Tweets from file
        print(f"Processing user with username {username} from file")
        response = {}
        response['data'] = read_json(tweets_file)
    
    for data in response['data']:
        tweets.append(data)
    return tweets

def get_followersv2(username, quantity, userid=None, save_all=True):
    followers_of_user = []
    all_followers_of_user = []
    all_followers_file = f"{data_folder}/{username}_followers.json"

    data_for_page = 1000
    data_total = quantity

    if not os.path.exists(all_followers_file):
        # Get Followers from Twitter
        if userid:
            user = {}
            user['id'] = userid
            user['username'] = username
        else:
            response = client.get_user(username=username)
            user = response['data']
        print(f"Processing user with id {user['id']} and username {user['username']}")
        # Get first 1000 followers
        response = client.get_users_followers(
                    id=user['id'], user_fields=['name', 'description', 'location', 'protected', 'public_metrics'], 
                    max_results=data_for_page
                    )
    else:
        # Get Followers from file
        print(f"Processing user with username {username} from file")
        response = {}
        response['data'] = read_json(all_followers_file)
        save_all = False
    
    for data in response['data']:
        if save_all:
            all_followers_of_user.append(data)
        # Skip excluded username
        if is_excluded(data['username']):
            continue
        if(len(followers_of_user) < data_total) and\
            not data['protected'] and\
            data['public_metrics']['tweet_count'] >= 5 and\
            5 <= data['public_metrics']['followers_count'] < 1000 and\
            5 <= data['public_metrics']['following_count'] < 1000:
            followers_of_user.append(data)
    if save_all:                
        serialize_json(data_folder, f"{username}_followers.json", all_followers_of_user)
    print(f"Found {len(followers_of_user)} followers for user {username}")
    if len(followers_of_user) < data_total:
        add_to_excluded(username)
        return []
    else:
        return followers_of_user

def get_followingv2(username, quantity, userid=None, save_all=False):
    following_of_user = []
    all_following_of_user = []
    all_following_file = f"{data_folder}/{username}_following.json"

    data_for_page = 1000
    data_total = quantity

    if not os.path.exists(all_following_file):
        # Get Following from Twitter
        if userid:
            user = {}
            user['id'] = userid
            user['username'] = username
        else:
            response = client.get_user(username=username)
            user = response['data']
        print(f"Processing user with id {user['id']} and username {user['username']}")
        # Get first 1000 following
        response = client.get_users_following(
                    id=user['id'], user_fields=['protected', 'public_metrics'], 
                    max_results=data_for_page
                    )
    else:
        # Get Following from file
        print(f"Processing user with username {username} from file")
        response = {}
        response['data'] = read_json(all_following_file)
        save_all = False
    
    for data in response['data']:
        if save_all:
            all_following_of_user.append(data)
        # Skip excluded username
        if is_excluded(data['username']):
            continue
        if(len(following_of_user) < data_total) and\
            not data['protected'] and\
            data['public_metrics']['tweet_count'] >= 5 and\
            5 <= data['public_metrics']['followers_count'] < 1000 and\
            5 <= data['public_metrics']['following_count'] < 1000:
            following_of_user.append(data)
    if save_all:                
        serialize_json(data_folder, f"{username}_following.json", all_following_of_user)
    print(f"Found {len(following_of_user)} following of user {username}")
    if len(following_of_user) < data_total:
        add_to_excluded(username)
        return []
    else:
        return following_of_user

# First level followers

In [5]:

first_level_followers = []
for account in accounts:
    # Get 5 followers and save all followers list
    first_level_followers.extend(get_followersv2(account, 5))
    
serialize_json(data_folder, "first_level_followers.json", first_level_followers)

Processing user with username mizzaro from file
Data read from path: data/mizzaro_followers.json
Found 5 followers for user mizzaro
Processing user with username Miccighel_ from file
Data read from path: data/Miccighel__followers.json
Found 5 followers for user Miccighel_
Data serialized to path: data/first_level_followers.json


# First level following

In [6]:

first_level_following = []
for account in accounts:
    # Get 5 following and save all following list
    first_level_following.extend(get_followingv2(account, 5))
    
serialize_json(data_folder, "first_level_following.json", first_level_following)

Processing user with username mizzaro from file
Data read from path: data/mizzaro_following.json
Found 5 following of user mizzaro
Processing user with username Miccighel_ from file
Data read from path: data/Miccighel__following.json
Found 5 following of user Miccighel_
Data serialized to path: data/first_level_following.json


# Second level followers

In [7]:
second_level_followers = []

# Get followers from first-level-followers
first_level_followers_list = read_json(f"{data_folder}/first_level_followers.json")

for follower in first_level_followers_list:
    # Get 5 followers and save all followers list - we need all information about follower leafs
    second_level_followers.extend(get_followersv2(follower['username'], 5, follower['id']))
        
# Get followers from first-level-following
first_level_following_list = read_json(f"{data_folder}/first_level_following.json")

for following in first_level_following_list:
    # Get 5 followers and do not save all followers list - we do not need information about following leafs
    second_level_followers.extend(get_followersv2(following['username'], 5, following['id'], False))

serialize_json(data_folder, "second_level_followers.json", second_level_followers)

Data read from path: data/first_level_followers.json
Processing user with username OlegZendel from file
Data read from path: data/OlegZendel_followers.json
Found 5 followers for user OlegZendel
Processing user with username LyndaSaidLhadj from file
Data read from path: data/LyndaSaidLhadj_followers.json
Found 5 followers for user LyndaSaidLhadj
Processing user with username acai2021 from file
Data read from path: data/acai2021_followers.json
Found 5 followers for user acai2021
Processing user with username rodger_benham from file
Data read from path: data/rodger_benham_followers.json
Found 5 followers for user rodger_benham
Processing user with username ColoursRiot from file
Data read from path: data/ColoursRiot_followers.json
Found 5 followers for user ColoursRiot
Processing user with username Outsider289 from file
Data read from path: data/Outsider289_followers.json
Found 5 followers for user Outsider289
Processing user with username mizzaro from file
Data read from path: data/mizzar

# Second level following

In [8]:
second_level_following = []

# Get following from first-level-followers
first_level_followers_list = read_json(f"{data_folder}/first_level_followers.json")

for follower in first_level_followers_list:
    # Get 5 following and save all followings list - we need all information about follower leafs
    second_level_following.extend(get_followingv2(follower['username'], 5, follower['id'], True))

# Get following from first-level-following
first_level_following_list = read_json(f"{data_folder}/first_level_following.json")

for following in first_level_following_list:
    # Get 5 following and do not save all followers list - we do not need information about following leafs
    second_level_following.extend(get_followingv2(following['username'], 5, following['id']))

serialize_json(data_folder, "second_level_following.json", second_level_following)

Data read from path: data/first_level_followers.json
Processing user with username OlegZendel from file
Data read from path: data/OlegZendel_following.json
Found 5 following of user OlegZendel
Processing user with username LyndaSaidLhadj from file
Data read from path: data/LyndaSaidLhadj_following.json
Found 5 following of user LyndaSaidLhadj
Processing user with username acai2021 from file
Data read from path: data/acai2021_following.json
Found 5 following of user acai2021
Processing user with username rodger_benham from file
Data read from path: data/rodger_benham_following.json
Found 5 following of user rodger_benham
Processing user with username ColoursRiot from file
Data read from path: data/ColoursRiot_following.json
Found 5 following of user ColoursRiot
Processing user with username Outsider289 from file
Data read from path: data/Outsider289_following.json
Found 5 following of user Outsider289
Processing user with username mizzaro from file
Data read from path: data/mizzaro_foll

# Last 5 tweets of first and second level followers

In [9]:
# Get tweets from first-level-followers
first_level_followers_list = read_json(f"{data_folder}/first_level_followers.json")

for follower in first_level_followers_list:
    if not os.path.exists(f"{data_folder}/{follower['username']}_tweets.json"):
        tweets = get_tweetsv2(follower['username'], 5, follower['id'])
        serialize_json(data_folder, f"{follower['username']}_tweets.json", tweets)

# Get tweets from second-level-followers
second_level_followers_list = read_json(f"{data_folder}/second_level_followers.json")

for follower in second_level_followers_list:
    if not os.path.exists(f"{data_folder}/{follower['username']}_tweets.json"):
        tweets = get_tweetsv2(follower['username'], 5, follower['id'])
        serialize_json(data_folder, f"{follower['username']}_tweets.json", tweets)

Data read from path: data/first_level_followers.json
Data read from path: data/second_level_followers.json
Processing user with id 2438581501 and username shruti_nirantar
Data serialized to path: data/shruti_nirantar_tweets.json
Processing user with id 711075897592274944 and username AminSakzad
Data serialized to path: data/AminSakzad_tweets.json


# Third level followers and following

In [10]:
# Get followers from second-level-followers
third_level_followers = []
for follower in second_level_followers_list:
    # Save all followers list - we need all information about follower leafs
    third_level_followers.extend(get_followersv2(follower['username'], 0, follower['id']))

# Get following from second-level-followers
third_level_following = []
for follower in second_level_followers_list:
    # Save all following list - we need all information about follower leafs
    third_level_following.extend(get_followingv2(follower['username'], 0, follower['id'], True))

Processing user with username Marwah_k from file
Data read from path: data/Marwah_k_followers.json
Found 0 followers for user Marwah_k
Processing user with username ivrik from file
Data read from path: data/ivrik_followers.json
Found 0 followers for user ivrik
Processing user with username ameertawfik from file
Data read from path: data/ameertawfik_followers.json
Found 0 followers for user ameertawfik
Processing user with username aleksbobic from file
Data read from path: data/aleksbobic_followers.json
Found 0 followers for user aleksbobic
Processing user with username debforit from file
Data read from path: data/debforit_followers.json
Found 0 followers for user debforit
Processing user with username gael_dias from file
Data read from path: data/gael_dias_followers.json
Found 0 followers for user gael_dias
Processing user with username Nthakur20 from file
Data read from path: data/Nthakur20_followers.json
Found 0 followers for user Nthakur20
Processing user with username AthmaniAmine 

# Create nodes with all attributes (id, profile details, 5 tweets, public metrics)

In [111]:
import itertools 
nodes = []

for follower in itertools.chain(first_level_followers_list, second_level_followers_list):
    node_attributes = {}
    node_attributes['username'] = follower['username']
    node_attributes['name'] = follower['name']
    node_attributes['location'] = follower['location'] if 'location' in follower else None
    node_attributes['metrics'] = follower['public_metrics']
    node_attributes['tweets'] = read_json(f"{data_folder}/{follower['username']}_tweets.json", False)
    nodes.append((follower['id'], node_attributes))

# Create Social Network Graph

In [130]:
from pyvis.network import Network

# Add project members name as attribute of the graph
graph = nx.MultiDiGraph(members=['Agrate Giulio','Bibalo Simone','De Nardi Gianluca Giuseppe Maria','Giunta Marco'])

graph.add_nodes_from(nodes)

# Add egdes with following or follower attribute
for node in graph.nodes:
    user_followers = read_json(f"{data_folder}/{graph.nodes[node]['username']}_followers.json", False)
    for follower in user_followers:
        if graph.has_node(follower['id']):
            graph.add_edge(node, follower['id'], key='follower', title=f"follower - {graph.nodes[node]['username']} -> {follower['username']}")
    user_following = read_json(f"{data_folder}/{graph.nodes[node]['username']}_following.json", False)
    for following in user_following:
        if graph.has_node(following['id']):
            graph.add_edge(node, following['id'], key='following', title=f"following - {graph.nodes[node]['username']} <- {following['username']}", color='#DB6400')

nt = Network(
    height="100%", 
    width="100%", 
    bgcolor="#222222", 
    font_color="white",
    heading="Social Network Graph",
    directed=True,
)

nt.set_edge_smooth('dynamic')
nt.from_nx(graph)
neighbor_map = nt.get_adj_list()
for node in nt.nodes:
    node['label'] = node['username']
    node['title'] = node['name']
    node['value'] = len(neighbor_map[node['id']])
nt.show("html/social_network_graphs.html")

# Followers network sub-graphs

In [131]:
# Get edges with follower attribute
follower_edges = [(u, v, k) for u,v,k,d in graph.edges(data=True, keys=True) if k == 'follower']
followers_graph = graph.edge_subgraph(follower_edges)

follower_nt = Network(
    height="100%", 
    width="100%", 
    bgcolor="#222222", 
    font_color="white",
    heading="Followers Network Graph",
    directed=True
)

follower_nt.set_edge_smooth('dynamic')
follower_nt.from_nx(followers_graph)
follower_neighbor_map = follower_nt.get_adj_list()
for node in follower_nt.nodes:
    node['label'] = node['username']
    node['title'] = node['name']
    node['value'] = len(follower_neighbor_map[node['id']])
follower_nt.show("html/follower_network_graphs.html")

# Following network sub-graphs

In [132]:
# Get edges with following attribute
following_edges = [(u,v,k) for u,v,k,d in graph.edges(data=True, keys=True) if k == 'following']
following_graph = graph.edge_subgraph(following_edges)

following_nt = Network(
    height="100%", 
    width="100%", 
    bgcolor="#222222", 
    font_color="white",
    heading="Following Network Graph",
    directed=True
)

following_nt.set_edge_smooth('dynamic')
following_nt.from_nx(following_graph)
following_neighbor_map = following_nt.get_adj_list()
for node in following_nt.nodes:
    node['label'] = node['username']
    node['title'] = node['name']
    node['value'] = len(following_neighbor_map[node['id']])
following_nt.show("html/following_network_graphs.html")