In [114]:
import os 
import json
import pprint
import tweepy

# Load Twitter API credentials from external file

Create a json file named `twitter_credentials.json`

```
{
    "api_key": "...",
    "api_secret": "...",
    "access_token": "...",
    "access_secret": "...",
    "bearer_token": "..."
}
```

In [115]:
credentials_file = 'twitter_credentials.json'

with open(credentials_file) as credentials_data:
   api_data = json.load(credentials_data)

api_key = api_data['api_key']
api_secret = api_data['api_secret']
access_token = api_data['access_token']
access_secret = api_data['access_secret']
bearer_token = api_data['bearer_token']

In [116]:

client = tweepy.Client(
    consumer_key=api_key, 
    consumer_secret=api_secret, 
    access_token=access_token, 
    access_token_secret=access_secret, 
    bearer_token=bearer_token,
    wait_on_rate_limit=True,
    return_type=dict
)

In [117]:
accounts = ["mizzaro", "Miccighel_"]

data_folder = "datav2"
tweets_folder = f"{data_folder}/tweets"

excluded_users_file = "excluded_usernames.json"

def serialize_json(folder, filename, data):
    if not os.path.exists(folder):
        os.makedirs(folder, exist_ok=True)
    with open(f"{folder}/{filename}", 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
        f.close()
    print(f"Data serialized to path: {folder}/{filename}")

def read_json(path, verbose=True):
    if os.path.exists(path):
        with open(path, "r", encoding="utf8") as file:
            data = json.load(file)
        if verbose:
            print(f"Data read from path: {path}")
        return data
    else:
        print(f"No data found at path: {path}")
        return {}

# check if username is excluded
def is_excluded(username):
    excluded_users_data = read_json(f"{data_folder}/{excluded_users_file}", False)
    excluded_users_list = excluded_users_data['excluded']

    if username in excluded_users_list:
        return True
    else:
        return False

# add username to excluded lists
def add_to_excluded(username):
    excluded_users_data = read_json(f"{data_folder}/{excluded_users_file}", False)
    excluded_users_list = excluded_users_data['excluded']

    if username not in excluded_users_list:
        excluded_users_data['excluded'].append(username)
        serialize_json(data_folder, excluded_users_file, excluded_users_data)
    else:
        print(f"User {username} is already present in excluded list")

def get_followers(user, quantity):
    
    print(f"Processing user {user}")
    
    followers_of_user = []
    
    for item in tweepy.Cursor(
        api.followers, 
        screen_name=user, 
        skip_status=True, 
        include_user_entities=False
        ).items():
        
        json_data = item._json
        
        found_follower = {}
        found_follower["id"] = json_data["id"]
        found_follower["name"] = json_data["name"]
        found_follower["screen_name"] = json_data["screen_name"]
        found_follower["location"] = json_data["location"]
        found_follower["description"] = json_data["description"]
        found_follower["protected"] = json_data["protected"]
        found_follower["followers_count"] = json_data["followers_count"]
        found_follower["friends_count"] = json_data["friends_count"]
        
        if (found_follower not in followers_of_user) and\
            not found_follower["protected"] and\
            (found_follower["followers_count"] < 1000) and\
            (found_follower["friends_count"] < 1000):
            followers_of_user.append(found_follower)

        if len(followers_of_user) == quantity:
            break
            
    print(f"Found {len(followers_of_user)} followers for user {user}")

    return followers_of_user


def get_friends(user, quantity):
    
    print(f"Processing user {user}")
    
    friends_of_user = []
    
    for item in tweepy.Cursor(
        api.friends, 
        screen_name=user, 
        skip_status=True, 
        include_user_entities=False
        ).items():
        
        json_data = item._json
        
        found_friend = {}
        found_friend["id"] = json_data["id"]
        found_friend["name"] = json_data["name"]
        found_friend["screen_name"] = json_data["screen_name"]
        found_friend["location"] = json_data["location"]
        found_friend["description"] = json_data["description"]
        found_friend["protected"] = json_data["protected"]
        found_friend["friends_count"] = json_data["friends_count"]
        found_friend["followers_count"] = json_data["followers_count"]
        
        # append only when following/follower < 1000
        if (found_friend not in friends_of_user) and\
            not found_friend["protected"] and\
           (found_friend["friends_count"] < 1000) and\
            (found_friend["followers_count"] < 1000):
            friends_of_user.append(found_friend)

        # exit when friends == quantity
        if len(friends_of_user) == quantity:
            break
            
    print(f"Found {len(friends_of_user)} friends of user {user}")
    
    return friends_of_user

def get_tweets(user, quantity):

    tweets = []
    for item in tweepy.Cursor(
        api.user_timeline, 
        screen_name=user,
        tweet_mode="extended"
    ).items(quantity):
        raw_tweet = item._json
        tweet = {}
        tweet["id"]= raw_tweet["id"]
        tweet["created_at"] = raw_tweet["created_at"]
        tweet["full_text"] = raw_tweet["full_text"],
        tweet["favorite_count"]= raw_tweet["favorite_count"]
        tweets.append(tweet)
    return tweets

def get_tweetsv2(username, quantity, userid=None):
    tweets = []
    tweets_file = f"{data_folder}/{username}_tweets.json"

    data_total = quantity

    if not os.path.exists(tweets_file):
        # Get Tweets from Twitter
        if userid:
            user = {}
            user['id'] = userid
            user['username'] = username
        else:
            response = client.get_user(username=username)
            user = response['data']
        print(f"Processing user with id {user['id']} and username {user['username']}")
        # Get 5 most recent tweets
        response = client.get_users_tweets(
                    id=user['id'], tweet_fields=['created_at', 'lang', 'public_metrics'],
                    max_results=data_total
                    )
    else:
        # Get Tweets from file
        print(f"Processing user with username {username} from file")
        response = {}
        response['data'] = read_json(tweets_file)
    
    for data in response['data']:
        tweets.append(data)
    return tweets

def get_followersv2(username, quantity, userid=None, save_all=True):
    followers_of_user = []
    all_followers_of_user = []
    all_followers_file = f"{data_folder}/{username}_followers.json"

    data_for_page = 1000
    data_total = quantity

    if not os.path.exists(all_followers_file):
        # Get Followers from Twitter
        if userid:
            user = {}
            user['id'] = userid
            user['username'] = username
        else:
            response = client.get_user(username=username)
            user = response['data']
        print(f"Processing user with id {user['id']} and username {user['username']}")
        # Get first 1000 followers
        response = client.get_users_followers(
                    id=user['id'], user_fields=['name', 'description', 'location', 'protected', 'public_metrics'], 
                    max_results=data_for_page
                    )
    else:
        # Get Followers from file
        print(f"Processing user with username {username} from file")
        response = {}
        response['data'] = read_json(all_followers_file)
        save_all = False
    
    for data in response['data']:
        if save_all:
            all_followers_of_user.append(data)
        # Skip excluded username
        if is_excluded(data['username']):
            continue
        if(len(followers_of_user) < data_total) and\
            not data['protected'] and\
            data['public_metrics']['tweet_count'] >= 5 and\
            5 <= data['public_metrics']['followers_count'] < 1000 and\
            5 <= data['public_metrics']['following_count'] < 1000:
            followers_of_user.append(data)
    if save_all:                
        serialize_json(data_folder, f"{username}_followers.json", all_followers_of_user)
    print(f"Found {len(followers_of_user)} followers for user {username}")
    if len(followers_of_user) < data_total:
        add_to_excluded(username)
    return followers_of_user

def get_followingv2(username, quantity, userid=None, save_all=True):
    following_of_user = []
    all_following_of_user = []
    all_following_file = f"{data_folder}/{username}_following.json"

    data_for_page = 1000
    data_total = quantity

    if not os.path.exists(all_following_file):
        # Get Following from Twitter
        if userid:
            user = {}
            user['id'] = userid
            user['username'] = username
        else:
            response = client.get_user(username=username)
            user = response['data']
        print(f"Processing user with id {user['id']} and username {user['username']}")
        # Get first 1000 following
        response = client.get_users_following(
                    id=user['id'], user_fields=['protected', 'public_metrics'], 
                    max_results=data_for_page
                    )
    else:
        # Get Following from file
        print(f"Processing user with username {username} from file")
        response = {}
        response['data'] = read_json(all_following_file)
        save_all = False
    
    for data in response['data']:
        if save_all:
            all_following_of_user.append(data)
        # Skip excluded username
        if is_excluded(data['username']):
            continue
        if(len(following_of_user) < data_total) and\
            not data['protected'] and\
            data['public_metrics']['tweet_count'] >= 5 and\
            5 <= data['public_metrics']['followers_count'] < 1000 and\
            5 <= data['public_metrics']['following_count'] < 1000:
            following_of_user.append(data)
    if save_all:                
        serialize_json(data_folder, f"{username}_following.json", all_following_of_user)
    print(f"Found {len(following_of_user)} following of user {username}")
    if len(following_of_user) < data_total:
        add_to_excluded(username)
    return following_of_user

# First level followers

In [118]:

first_level_followers = {}
for account in accounts:
    first_level_followers[account] = get_followersv2(account, 5)
    
serialize_json(data_folder, "first_level_followers.json", first_level_followers)

Processing user with username mizzaro from file
Data read from path: datav2/mizzaro_followers.json
Found 5 followers for user mizzaro
Processing user with username Miccighel_ from file
Data read from path: datav2/Miccighel__followers.json
Found 5 followers for user Miccighel_
Data serialized to path: datav2/first_level_followers.json


# First level following

In [119]:

first_level_following = {}
for account in accounts:
    first_level_following[account] = get_followingv2(account, 5)
    
serialize_json(data_folder, "first_level_following.json", first_level_following)

Processing user with username mizzaro from file
Data read from path: datav2/mizzaro_following.json
Found 5 following of user mizzaro
Processing user with username Miccighel_ from file
Data read from path: datav2/Miccighel__following.json
Found 5 following of user Miccighel_
Data serialized to path: datav2/first_level_following.json


# Second level followers

In [120]:
second_level_followers = {}

# Get followers from first-level-followers
first_level_followers_data = read_json(f"{data_folder}/first_level_followers.json")

for followers_list in first_level_followers_data.values():
    for follower in followers_list:
        second_level_followers[follower['username']] = get_followersv2(follower['username'], 5, follower['id'])
        
# Get followers from first-level-following
first_level_following_data = read_json(f"{data_folder}/first_level_following.json")

for followers_list in first_level_following_data.values():
    for follower in followers_list:
        second_level_followers[follower['username']] = get_followersv2(follower['username'], 5, follower['id'])

serialize_json(data_folder, "second_level_followers.json", second_level_followers)

Data read from path: datav2/first_level_followers.json
Processing user with username OlegZendel from file
Data read from path: datav2/OlegZendel_followers.json
Found 5 followers for user OlegZendel
Processing user with username LyndaSaidLhadj from file
Data read from path: datav2/LyndaSaidLhadj_followers.json
Found 5 followers for user LyndaSaidLhadj
Processing user with username acai2021 from file
Data read from path: datav2/acai2021_followers.json
Found 5 followers for user acai2021
Processing user with username rodger_benham from file
Data read from path: datav2/rodger_benham_followers.json
Found 5 followers for user rodger_benham
Processing user with username ColoursRiot from file
Data read from path: datav2/ColoursRiot_followers.json
Found 5 followers for user ColoursRiot
Processing user with username Outsider289 from file
Data read from path: datav2/Outsider289_followers.json
Found 5 followers for user Outsider289
Processing user with username mizzaro from file
Data read from pat

In [121]:
first_level_followers_data = read_json(f"{data_folder}/second_level_followers.json")
for following_list in first_level_followers_data.values():
    for follower in following_list:
        print(follower['username'])
        

Data read from path: datav2/second_level_followers.json
Marwah_k
ivrik
ameertawfik
aleksbobic
debforit
gael_dias
Nthakur20
AthmaniAmine
iiia_unipd
OthmaniDr
TilburgU_DCA
gabriellearuta
lesleywilton
sencerdeniz
rconfalonieri
jparapar
ameertawfik
TWebACM
DanaChatter
DeepakUniyal08
ciscaredda0111
BrillantVacant
LuvYahBoo24
Martha_Young
Damiano_David
Miccighel_
Sogno_Danzante
boreale01
nico_peresson
Giulia13568642
OlegZendel
LyndaSaidLhadj
acai2021
rodger_benham
ColoursRiot
JoziOpera
Tenor_Rguez27
MarigonaQerkezi
BarraliRoberto
DariaMasiero
Baggio_Giacomo
12_lollo
vagabiondaa
silviajoy21
Acewings_sk
ita_crypto
simonegiaco
Salvaacampora
cantaht
LucaToffolini
gianmariomaggio
mizzaro
DanielaPetrell1
3DOMFBK
diunito
crc_gk
mizzaro
haskhosravi
Commsgrad
bevan_koopman
johnedwardLEIG1
shelley_marshal
Scotty25953295
mizzaro
DrJJRich
Outsider289
mizzaro
EUROARTISTS
VincenzoCarli15
Ilele99
ramonplopes
sumitsidana17
Dr_Hai_Dong
bitcoinub
vuongthuavu6
ibblastoise
marcoliati
Barbe_z
smrtalks
IvanLocalz

# Second level following

In [122]:
second_level_following = {}

# Get following from first-level-followers
first_level_followers_data = read_json(f"{data_folder}/first_level_followers.json")

for following_list in first_level_followers_data.values():
    for following in following_list:
        second_level_following[following['username']] = get_followingv2(following['username'], 5, following['id'])

# Get following from first-level-following
first_level_following_data = read_json(f"{data_folder}/first_level_following.json")

for following_list in first_level_following_data.values():
    for following in following_list:
        second_level_following[following['username']] = get_followingv2(following['username'], 5, following['id'])

serialize_json(data_folder, "second_level_following.json", second_level_following)

Data read from path: datav2/first_level_followers.json
Processing user with username OlegZendel from file
Data read from path: datav2/OlegZendel_following.json
Found 5 following of user OlegZendel
Processing user with username LyndaSaidLhadj from file
Data read from path: datav2/LyndaSaidLhadj_following.json
Found 5 following of user LyndaSaidLhadj
Processing user with username acai2021 from file
Data read from path: datav2/acai2021_following.json
Found 5 following of user acai2021
Processing user with username rodger_benham from file
Data read from path: datav2/rodger_benham_following.json
Found 5 following of user rodger_benham
Processing user with username ColoursRiot from file
Data read from path: datav2/ColoursRiot_following.json
Found 5 following of user ColoursRiot
Processing user with username Outsider289 from file
Data read from path: datav2/Outsider289_following.json
Found 5 following of user Outsider289
Processing user with username mizzaro from file
Data read from path: dat

# Last 5 tweets of first and second level followers

In [123]:
# Get tweets from first-level-followers
first_level_followers_data = read_json(f"{data_folder}/first_level_followers.json")

for followers_list in first_level_followers_data.values():
    for follower in followers_list:
        if not os.path.exists(f"{data_folder}/{follower['username']}.json"):
            tweets = get_tweetsv2(follower['username'], 5, follower['id'])
            serialize_json(data_folder, f"{follower['username']}_tweets.json", tweets)

# Get tweets from second-level-followers
second_level_followers_data = read_json(f"{data_folder}/second_level_followers.json")

for followers_list in second_level_followers_data.values():
    for follower in followers_list:
        if not os.path.exists(f"{data_folder}/{follower['username']}.json"):
            tweets = get_tweetsv2(follower['username'], 5, follower['id'])
            serialize_json(data_folder, f"{follower['username']}_tweets.json", tweets)

Data read from path: datav2/first_level_followers.json
Processing user with username OlegZendel from file
Data read from path: datav2/OlegZendel_tweets.json
Data serialized to path: datav2/tweets/OlegZendel_tweets.json
Processing user with username LyndaSaidLhadj from file
Data read from path: datav2/LyndaSaidLhadj_tweets.json
Data serialized to path: datav2/tweets/LyndaSaidLhadj_tweets.json
Processing user with username acai2021 from file
Data read from path: datav2/acai2021_tweets.json
Data serialized to path: datav2/tweets/acai2021_tweets.json
Processing user with username rodger_benham from file
Data read from path: datav2/rodger_benham_tweets.json
Data serialized to path: datav2/tweets/rodger_benham_tweets.json
Processing user with username ColoursRiot from file
Data read from path: datav2/ColoursRiot_tweets.json
Data serialized to path: datav2/tweets/ColoursRiot_tweets.json
Processing user with username Outsider289 from file
Data read from path: datav2/Outsider289_tweets.json
Dat

# Third level followers and following

In [124]:
second_level_followers_data = read_json(f"{data_folder}/second_level_followers.json")

# Get followers from second-level-followers
third_level_followers = {}
for followers_list in second_level_followers_data.values():
    for follower in followers_list:
        third_level_followers[follower['username']] = get_followersv2(follower['username'], 0, follower['id'])

# Get following from second-level-followers
third_level_following = {}
for following_list in second_level_followers_data.values():
    for following in following_list:
        third_level_following[following['username']] = get_followingv2(following['username'], 0, following['id'])

Data read from path: datav2/second_level_followers.json
Processing user with username Marwah_k from file
Data read from path: datav2/Marwah_k_followers.json
Found 0 followers for user Marwah_k
Processing user with username ivrik from file
Data read from path: datav2/ivrik_followers.json
Found 0 followers for user ivrik
Processing user with username ameertawfik from file
Data read from path: datav2/ameertawfik_followers.json
Found 0 followers for user ameertawfik
Processing user with username aleksbobic from file
Data read from path: datav2/aleksbobic_followers.json
Found 0 followers for user aleksbobic
Processing user with username debforit from file
Data read from path: datav2/debforit_followers.json
Found 0 followers for user debforit
Processing user with username gael_dias from file
Data read from path: datav2/gael_dias_followers.json
Found 0 followers for user gael_dias
Processing user with username Nthakur20 from file
Data read from path: datav2/Nthakur20_followers.json
Found 0 fo