# **Communities Visualiztion**


## **Imports**


In [1]:
import os
import sys
from warnings import filterwarnings
import matplotlib.pyplot as plt
from pathlib import Path
from dotenv import load_dotenv

%matplotlib inline
%load_ext autotime

load_dotenv()
filterwarnings("ignore")

module_path = os.path.abspath(os.path.join("../"))
parent_dir = os.path.dirname(os.getcwd())
if module_path not in sys.path:
    sys.path.append(module_path)

time: 2.66 ms (started: 2022-12-31 00:32:41 -05:00)


In [26]:
import tweepy
import numpy as np
import pandas as pd
import dask.dataframe as dd
import ast
from scipy.stats import gmean
import networkx as nx
import seaborn as sns
from utils.graph import get_ego_graph, draw_nx_graph, draw_plotly_graph

PATH = os.getcwd()
PROJECT = str(Path(PATH).parents[0])
TWITTER_USERNAME = os.getenv("TWITTER_USERNAME")
TWITTER_API_KEY = os.getenv("TWITTER_API_KEY")
TWITTER_API_SECRET = os.getenv("TWITTER_API_SECRET")
TWITTER_API_BEARER_TOKEN = os.getenv("TWITTER_API_BEARER_TOKEN")
CLOUD_STORAGE_BUCKET = os.getenv("CLOUD_STORAGE_BUCKET")
TWITTER_USERNAME=os.getenv("TWITTER_USERNAME")
TWITTER_USER_ID=os.getenv("TWITTER_FOCAL_NODE_ID")

client = tweepy.Client(TWITTER_API_BEARER_TOKEN, wait_on_rate_limit=True)

time: 1.77 ms (started: 2022-12-31 00:42:51 -05:00)


## **Read**


### **Node Centrality (Subset)**


In [3]:
df_cen = dd.read_csv(f"{CLOUD_STORAGE_BUCKET}/measures/node/node_measures.csv").compute()
print(df_cen.shape)
df_cen.head()

(1117046, 3)


Unnamed: 0,measure_name,node,measure_value
0,pagerank,44196397,1.5e-05
1,pagerank,433365936,1.2e-05
2,pagerank,2248872301,1.2e-05
3,pagerank,35761106,1.2e-05
4,pagerank,1542483269542940679,1.2e-05


time: 9.8 s (started: 2022-12-31 00:32:54 -05:00)


### **Node Features**


In [4]:
nodes = dd.read_csv(f"{CLOUD_STORAGE_BUCKET}/features/node/node_features*.csv", dtype={'withheld': 'object'}).compute()
print(nodes.shape)
nodes.head()

(206084, 7)


Unnamed: 0,id,name,profile_image_url,public_metrics,username,verified,withheld
0,794433401591693312,Aran Komatsuzaki,https://pbs.twimg.com/profile_images/150798283...,"{'followers_count': 16111, 'following_count': ...",arankomatsuzaki,False,
1,2509504696,Jericho Brown,https://pbs.twimg.com/profile_images/111968593...,"{'followers_count': 40468, 'following_count': ...",jerichobrown,True,
2,1938030980,Angel Wicky,https://pbs.twimg.com/profile_images/144139505...,"{'followers_count': 355162, 'following_count':...",Angel_Wicky_II,True,
3,335544910,AC Justice Project,https://pbs.twimg.com/profile_images/145415812...,"{'followers_count': 1049, 'following_count': 4...",ACJProject,False,
4,2622261,Brian Ulicny,https://pbs.twimg.com/profile_images/730781652...,"{'followers_count': 796, 'following_count': 24...",bulicny,False,


time: 18.8 s (started: 2022-12-31 00:33:03 -05:00)


### **Graph**


In [5]:
df = dd.read_csv(f"{CLOUD_STORAGE_BUCKET}/ties/ties*.csv").compute()
df.following = df.following.apply(ast.literal_eval)
df = df.explode('following')

#if you need a full graph including the user
user = client.get_user(
    username=TWITTER_USERNAME,
    user_fields=["id"],
).data.id
df_following = df.copy().dropna()
df_user = pd.DataFrame({"user": user, "following": df_following.user.unique()})
df_with_user = pd.concat([df_user, df_following])

#create  a graph
edges = df_with_user.copy().dropna()
edges.columns = ['source', 'target']
edges['source'] = edges['source'].astype(int)
edges['target'] = edges['target'].astype(int)
edges['weight'] = 1
print(df.shape, edges.shape)

G = nx.from_pandas_edgelist(
    edges,
    create_using=nx.DiGraph(),
    edge_attr=True
)
print(f"Nodes: {len(G.nodes())}, Edges: {len(G.edges())}")
df.head()

(239795, 2) (240033, 3)
Nodes: 159578, Edges: 237327


Unnamed: 0,user,following
0,12,69170362
0,12,1499167615121387522
0,12,594103
0,12,58868301
0,12,721049861399949313


time: 5.45 s (started: 2022-12-31 00:33:22 -05:00)


In [18]:
c_s = list(nx.strongly_connected_components(G))
c_s.sort(key=len, reverse=True)

time: 964 ms (started: 2022-12-31 00:38:04 -05:00)


## **Statuses & Retweets**

In [20]:
def authenticate(api_bearer_token):
    import tweepy

    client = tweepy.Client(api_bearer_token, wait_on_rate_limit=True)
    return client


def get_users(client, user_fields, user_names=None, user_ids=None):

    if user_ids:
        return client.get_users(
            ids=user_ids,
            user_fields=user_fields,
        ).data
    elif user_names:
        return client.get_users(
            usernames=user_names,
            user_fields=user_fields,
        ).data
    else:
        raise ValueError(
            "Either one of user_names or user_ids should be provided"
        )
def get_users_following(
    client, user_id, max_results=1000, total_limit=5000, sleep_time=0.1
):
    import tweepy

    following = []
    for neighbor in tweepy.Paginator(
        client.get_users_following, id=user_id, max_results=max_results
    ).flatten(limit=total_limit):
        time.sleep(sleep_time)
        following.append(neighbor.id)
    print(f"User: {user_id}, Following: {len(following)}")
    return {"user": user_id, "following": following}

time: 1.42 ms (started: 2022-12-31 00:41:57 -05:00)


In [28]:
import time
user_d1_nodes = get_users_following(client, user_id=TWITTER_USER_ID)
len(user_d1_nodes.get('following'))

User: 1427408443296657408, Following: 241


241

time: 24.4 s (started: 2022-12-31 00:43:10 -05:00)


In [65]:
nodes.sample(5)

Unnamed: 0,id,name,profile_image_url,public_metrics,username,verified,withheld
125108,850892377627742209,Tyler Derr,https://pbs.twimg.com/profile_images/133466196...,"{'followers_count': 1034, 'following_count': 9...",TylersNetwork,False,
198490,998327796672811008,ClarkeMD,https://pbs.twimg.com/profile_images/998329452...,"{'followers_count': 46398, 'following_count': ...",MdTeryn,False,
90508,2413696970,Technoblade,https://pbs.twimg.com/profile_images/128495990...,"{'followers_count': 3587760, 'following_count'...",Technothepig,True,
105876,759140784180457472,Neeraj Salvankar,https://pbs.twimg.com/profile_images/151260896...,"{'followers_count': 72, 'following_count': 215...",neerajsal1kar,False,
104531,198588215,Optimus Prime,https://pbs.twimg.com/profile_images/146904969...,"{'followers_count': 678, 'following_count': 50...",TemporalAnalyst,False,


time: 7.77 ms (started: 2022-12-31 01:14:24 -05:00)


In [36]:
# test_users= user_d1_nodes.get('following')
test_users = [1938030980]#, 1433718835127926787, 2465283662]
def get_content_type(
                    user_id, 
                     content_type = "tweets", 
                     tweet_fields=['context_annotations','in_reply_to_user_id','public_metrics','entities','created_at']):
    if content_type == "tweets":
        return client.get_users_tweets(id=user_id, tweet_fields=tweet_fields).data
    elif content_type == "mentions":
        return client.get_users_mentions(id=user_id, tweet_fields=tweet_fields).data
    elif content_type == "likes":
        return client.get_liked_tweets(id=user_id, tweet_fields=tweet_fields).data
    
d = get_content_type(user_id=test_users[0], content_type="tweets")
d

[<Tweet id=1608402302234234880 text='Join me by the pool 😎\n\n#italy #travelblogger #traveler #traveling #travelling #traveler #travellife #europe #europeunion #europeanart #milan #milano #roma #rome #vacation #vacations #Italian #playboy #fhm #magazine https://t.co/UsrcBSeWw2'>,
 <Tweet id=1606560242917056512 text='Wishing you beautiful and peaceful Christmas filled with love 🤗❤️🎄🎁\nPřeji vám krásné a klidně Vánoce plné lásky 🤗❤️🎄🎁\n\n#christmas #Christmasgifts #MerryChristmas #horsegirl #horses #horse #horselover #horselovers #horsetrainer #animallover #rescueanimal https://t.co/6NypbtN2Aw'>,
 <Tweet id=1606408122142269463 text='Christmas liveshow is coming this Sunday!😎🤶🎄🎁\nFrom 9pm cet at https://t.co/Od0lp4hwNh @SecretFriendsX https://t.co/WKyaYdSnnL'>,
 <Tweet id=1605634142309318666 text='I’ve been asked what I want for Christmas, new year, birthday etc… ?? https://t.co/jCW8BKies7'>,
 <Tweet id=1605523356802916354 text='MUST SEE! Double penetration for Christmas 😱😎🍆🍆😜😋💦💦 Watch it

time: 233 ms (started: 2022-12-31 00:49:29 -05:00)


In [97]:
test_users= user_d1_nodes.get('following')
for c in ["tweets", "mentions", "likes"]:
    content_df = pd.DataFrame()
    time.sleep(0.5)
    for i, test_user in enumerate(test_users[:5]):
        print(f"users:{i}/{len(test_users)}")
        if i%100==0:
            time.sleep(10)
        data = get_content_type(user_id=test_user, content_type=c)
        try:
            for d in data:  
                content_df = content_df.append({
                                                        "user_id": test_user,
                                                        "timestamp": d.created_at,
                                                        "tweet_id": d.id,
                                                        "content": c,
                                                        "public_metrics": d.public_metrics,
                                                        "in_reply_to_user_id":d.in_reply_to_user_id,
                                                        "hashtags": [h.get('tag') for h in d.get('entities').get('hashtags', ())],
                                                        "context": [t.get('entity', ()).get('name').lower() for t in d.context_annotations]}, ignore_index=True)
        except:
            continue
    print(f"{c}: {content_df.shape}")
    content_df.to_csv(f"data/{c}_{time.strftime('%Y%m%d')}.csv", index=False) #%H%M%S
content_df.tail(10)

users:0/241
users:1/241
users:2/241
users:3/241
users:4/241
tweets: (34, 8)
users:0/241
users:1/241
users:2/241
users:3/241
users:4/241
mentions: (50, 8)
users:0/241
users:1/241
users:2/241
users:3/241
users:4/241
likes: (175, 8)


Unnamed: 0,user_id,timestamp,tweet_id,content,public_metrics,in_reply_to_user_id,hashtags,context
165,1067218780806307841,2022-09-21 22:41:31+00:00,1572717691219345408,likes,"{'retweet_count': 5, 'reply_count': 5, 'like_c...",,"[PostdocJEDIChampions22, StanfordAIMI]","[entertainment & leisure business, star wars, ..."
166,1067218780806307841,2022-09-21 18:33:47+00:00,1572655345075638275,likes,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",1.0672187808063078e+18,[],"[midjourney, digital artwork, ai image generat..."
167,1067218780806307841,2022-09-21 18:27:35+00:00,1572653783850840064,likes,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",1.0672187808063078e+18,[],[]
168,1067218780806307841,2022-09-19 22:43:29+00:00,1571993409694609408,likes,"{'retweet_count': 6, 'reply_count': 0, 'like_c...",,[NLProc],[technology business]
169,1067218780806307841,2022-09-20 04:07:07+00:00,1572074854459019265,likes,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",1.2398361524183e+18,[],[]
170,1067218780806307841,2022-09-20 04:03:23+00:00,1572073913391394816,likes,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",1.0672187808063078e+18,[],[]
171,1067218780806307841,2022-09-19 14:58:20+00:00,1571876349169508352,likes,"{'retweet_count': 10, 'reply_count': 1, 'like_...",,[],[]
172,1067218780806307841,2022-09-14 02:33:12+00:00,1569876891338969090,likes,"{'retweet_count': 2, 'reply_count': 0, 'like_c...",,[],"[technology business, economics]"
173,1067218780806307841,2022-09-16 00:56:16+00:00,1570577273857929217,likes,"{'retweet_count': 10, 'reply_count': 0, 'like_...",,[],"[stanford university, stanford university, gov..."
174,1067218780806307841,2022-09-15 20:51:45+00:00,1570515737856577536,likes,"{'retweet_count': 100, 'reply_count': 5, 'like...",,[],[]


time: 39.7 s (started: 2022-12-31 01:32:25 -05:00)


In [98]:
content_df_all = pd.concat([pd.read_csv(f"data/{c}_{time.strftime('%Y%m%d')}.csv") for c in ["tweets", "mentions", "likes"]], ignore_index=True)
content_df_all.in_reply_to_user_id = content_df_all.in_reply_to_user_id.fillna(0.0).astype(int)
print(content_df_all.shape)
content_df_all.head()

(259, 8)


Unnamed: 0,user_id,timestamp,tweet_id,content,public_metrics,in_reply_to_user_id,hashtags,context
0,3289153303,2022-12-30 19:40:48+00:00,1608910996969754624,tweets,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",2738381,[],"['financial services business', 'travel & tran..."
1,3289153303,2022-12-30 19:32:23+00:00,1608908877877370880,tweets,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",650733,[],"['transportation', 'automotive, aircraft & boa..."
2,3289153303,2022-12-30 19:05:19+00:00,1608902065933410305,tweets,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",905201,[],[]
3,3289153303,2022-12-30 18:59:53+00:00,1608900699773100033,tweets,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",650733,[],"['transportation', 'automotive, aircraft & boa..."
4,3289153303,2022-12-30 18:53:29+00:00,1608899091022647296,tweets,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",1584642529,[],['andrew tate']


time: 12.1 ms (started: 2022-12-31 01:33:10 -05:00)


In [99]:
content_df_all[(content_df_all.user_id != content_df_all.in_reply_to_user_id) & (content_df_all.in_reply_to_user_id != 0)] \
    .groupby(["user_id", "in_reply_to_user_id"]) \
    .tweet_id \
    .count()\
    .reset_index() \
    .rename(columns={'user_id': 'source','in_reply_to_user_id': 'target', 'tweet_id': 'weight', })

Unnamed: 0,source,target,weight
0,19725644,13445732,1
1,19725644,31039201,1
2,19725644,38361272,1
3,19725644,39439838,1
4,19725644,44196397,3
5,19725644,350057999,1
6,19725644,385560580,1
7,19725644,472301455,1
8,19725644,1243859159159689216,1
9,175624200,10472362,1


time: 10.2 ms (started: 2022-12-31 01:33:13 -05:00)


In [105]:
nodes[nodes.id.isin([19725644, 175624200, 44196397, 18676154, 130745589])]

Unnamed: 0,id,name,profile_image_url,public_metrics,username,verified,withheld
1956,175624200,John Carmack,https://pbs.twimg.com/profile_images/110649778...,"{'followers_count': 953883, 'following_count':...",ID_AA_Carmack,True,
48066,44196397,Elon Musk,https://pbs.twimg.com/profile_images/150359143...,"{'followers_count': 81133488, 'following_count...",elonmusk,True,
56906,18676154,Shawn Presser,https://pbs.twimg.com/profile_images/107744830...,"{'followers_count': 4739, 'following_count': 2...",theshawwn,False,
64968,19725644,Neil deGrasse Tyson,https://pbs.twimg.com/profile_images/74188698/...,"{'followers_count': 14622136, 'following_count...",neiltyson,True,
84489,130745589,Fei-Fei Li,https://pbs.twimg.com/profile_images/841385099...,"{'followers_count': 397370, 'following_count':...",drfeifei,True,


time: 9.02 ms (started: 2022-12-31 01:36:09 -05:00)


In [85]:
tweets = client.get_users_mentions(id=test_user, tweet_fields=['context_annotations','created_at','geo'])

for tweet in tweets.data:
    # print(tweet)
    context = set([t.get('entity').get('name')for t in tweet.context_annotations])
print(context)

{'Automotive, Aircraft & Boat Business', 'Luxury Cars', 'Design', 'Automotive', 'Tesla Motors', 'Hybrid and electric vehicles', 'Auto Manufacturer - Auto', 'Automobile Brands'}
time: 355 ms (started: 2022-12-31 01:18:35 -05:00)


In [316]:
tweets_dataframe.groupby(['user_id', 'content']).tweet_id.count()

user_id              content 
1938030980           likes       95
                     mentions    10
                     tweets      10
2465283662           likes       99
                     mentions    10
                     tweets      10
1433718835127926787  likes       43
                     mentions    10
                     tweets      10
Name: tweet_id, dtype: int64

time: 10.9 ms (started: 2022-12-04 21:54:28 -05:00)


In [113]:
alters = {
            i: {"previous": set(), "current": set(), "new": set()}
            for i in range(1, 2 + 1)
        }
alters[1]["previous"] = set([1,2,7])
alters[1]["previous"] = alters.get(1).get("previous").difference(set([7]))
alters

{1: {'previous': {1, 2}, 'current': set(), 'new': set()},
 2: {'previous': set(), 'current': set(), 'new': set()}}

time: 2.81 ms (started: 2022-12-31 10:07:44 -05:00)
