## Installations and imports

In [0]:
%pip install --upgrade scipy networkx cdlib

Python interpreter will be restarted.
Collecting scipy
  Downloading scipy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)
Collecting networkx
  Downloading networkx-3.1-py3-none-any.whl (2.1 MB)
Collecting cdlib
  Downloading cdlib-0.2.6-py3-none-any.whl (228 kB)
Collecting pulp
  Downloading PuLP-2.7.0-py3-none-any.whl (14.3 MB)
Collecting markov-clustering
  Downloading markov_clustering-0.0.6.dev0-py3-none-any.whl (6.3 kB)
Collecting pyclustering
  Downloading pyclustering-0.10.1.2.tar.gz (2.6 MB)
Collecting pooch
  Downloading pooch-1.7.0-py3-none-any.whl (60 kB)
Collecting demon
  Downloading demon-2.0.6-py3-none-any.whl (7.3 kB)
Collecting future
  Downloading future-0.18.3.tar.gz (840 kB)
Collecting tqdm
  Downloading tqdm-4.65.0-py3-none-any.whl (77 kB)
Collecting chinese-whispers
  Downloading chinese_whispers-0.8.1-py3-none-any.whl (7.8 kB)
Collecting bimlpa
  Downloading bimlpa-0.1.2-py3-none-any.whl (7.0 kB)
Collecting dynetx
  Downloading dynetx-

In [0]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from cdlib import algorithms
import random

## Graphs

In [0]:
# Load SQL tables as DF
follows_df = spark.sql('SELECT signer_id, follows, type FROM hive_metastore.sit.graph_follows').toPandas()
metrics_df = spark.sql('SELECT * FROM hive_metastore.sit.users_agg_metrics').toPandas().fillna(0)

# Define Graphs from follows/likes DF
G_follows = nx.from_pandas_edgelist(follows_df[follows_df['type']=='FOLLOW'], source='signer_id', target='follows')#, edge_attr=['type'])   #97% of actions are FOLLOWS

# Communities Detection
louvain_follows = algorithms.louvain(G_follows, weight='weight', resolution=1., randomize=False)
walktrap_follows = algorithms.walktrap(G_follows)


In [0]:
if louvain_follows.overlap == False:
    # Map Louvain follows communities to users
    lf_dict = {'signer_id': [], 'louvain_community': []}
    for i, community in enumerate(louvain_follows.communities):
        for user in community:
            lf_dict['signer_id'].append(user)
            lf_dict['louvain_community'].append(i)

    # Create a pandas dataframe from the data dictionary
    lf_df = pd.DataFrame(lf_dict)

if walktrap_follows.overlap == False:
    # Map Louvain follows communities to users
    wt_dict = {'signer_id': [], 'walktrap_community': []}
    for i, community in enumerate(walktrap_follows.communities):
        for user in community:
            wt_dict['signer_id'].append(user)
            wt_dict['walktrap_community'].append(i)

    # Create a pandas dataframe from the data dictionary
    wt_df = pd.DataFrame(wt_dict)

lcom_size_dict = dict(lf_df['louvain_community'].value_counts())
wtcom_size_dict = dict(wt_df['walktrap_community'].value_counts())

communities_df = pd.merge(lf_df, wt_df, on='signer_id')
communities_df['louvain_size'] = communities_df['louvain_community'].apply(lambda x: lcom_size_dict[x])
communities_df['walktrap_size'] = communities_df['walktrap_community'].apply(lambda x: wtcom_size_dict[x])

df = pd.merge(metrics_df, communities_df, on='signer_id', how='left')
df.shape

Out[123]: (7458, 36)

In [0]:
df.loc[0][0]

Out[183]: 'auroraecosystemnews.near'

In [0]:
df['trending_metric'] = (df['engagement_weighted_30d'])/df['activity_weighted_30d']
trending_users_df = df[['signer_id', 
                        'followers',
                        'engagement_weighted_30d', 
                        'activity_weighted_30d', 
                        'louvain_community',
                        'trending_metric'
                        ]].sort_values('trending_metric', ascending=False).head(20)
trending_users_df

Unnamed: 0,signer_id,followers,engagement_weighted_30d,activity_weighted_30d,louvain_community,trending_metric
53,neardigitalcollective.near,55.0,108.63,3.65,4.0,29.761644
14,jumpdefisocial.near,137.0,62.39,8.97,5.0,6.955407
1,root.near,393.0,242.73,36.07,1.0,6.729415
36,mintbase.near,69.0,65.71,11.25,4.0,5.840889
13,nearmax.near,137.0,49.53,10.26,1.0,4.827485
0,auroraecosystemnews.near,1161.0,617.95,181.45,0.0,3.405621
65,nekosocial.near,47.0,49.89,16.21,5.0,3.07773
327,nearxartdao.near,9.0,13.89,5.64,9.0,2.462766
16,vlad.near,133.0,57.13,25.11,1.0,2.275189
52,chefsale.near,55.0,24.45,11.56,1.0,2.115052


In [0]:
# List of Top10 communities by engagement
top10coms = list(df.groupby('louvain_community')['engagement_weighted_30d'].sum().sort_values(ascending=False).head(10).index) 

# List of first user by trending metric in each Top10 communities
top10coms_aux = df[['signer_id', 
                   'followers',
                   'engagement_weighted_30d', 
                   'activity_weighted_30d', 
                   'louvain_community',
                   'louvain_size',
                   'trending_metric'
                   ]][df['louvain_community'].isin(top10coms)]
top10coms_aux['rank'] = top10coms_aux.groupby('louvain_community')['trending_metric'].rank(method='first', ascending=False)
top10coms_df = top10coms_aux[top10coms_aux['rank']<3.0].sort_values('trending_metric', ascending=False)
top10coms_df

Unnamed: 0,signer_id,followers,engagement_weighted_30d,activity_weighted_30d,louvain_community,louvain_size,trending_metric,rank
53,neardigitalcollective.near,55.0,108.63,3.65,4.0,308.0,29.761644,1.0
14,jumpdefisocial.near,137.0,62.39,8.97,5.0,259.0,6.955407,1.0
1,root.near,393.0,242.73,36.07,1.0,572.0,6.729415,1.0
36,mintbase.near,69.0,65.71,11.25,4.0,308.0,5.840889,2.0
13,nearmax.near,137.0,49.53,10.26,1.0,572.0,4.827485,2.0
0,auroraecosystemnews.near,1161.0,617.95,181.45,0.0,960.0,3.405621,1.0
65,nekosocial.near,47.0,49.89,16.21,5.0,259.0,3.07773,2.0
327,nearxartdao.near,9.0,13.89,5.64,9.0,80.0,2.462766,1.0
93,mundoaurora.near,35.0,99.85,53.01,0.0,960.0,1.883607,2.0
302,moonbasedao.near,10.0,6.28,5.61,12.0,41.0,1.11943,1.0


In [0]:
print(list(top10coms_df['signer_id']))

['neardigitalcollective.near', 'jumpdefisocial.near', 'root.near', 'auroraecosystemnews.near', 'nearxartdao.near', 'moonbasedao.near', 'capricanna.near', 'evangel.near', 'davidnvg1511.near', 'leanhtuan151191.near']


In [0]:
def n_trending_users(n):
    '''Return a random list of n users from the top 20 trending users'''
    return random.sample(list(trending_users_df['signer_id']), n)

def trending_users_outside_community (user, n):
    '''Return a random list of n users from the top 20 trending users in a different community than the input user'''
    user_community = df[df['signer_id']==user]['louvain_community'].values[0]
    aux_df = trending_users_df[trending_users_df['louvain_community']!=user_community]
    return random.sample(list(aux_df['signer_id']), n)


In [0]:
trending_users_outside_community('rojoser.near', 3)

Out[198]: ['nekosocial.near', 'neardigitalcollective.near', 'jumpdefisocial.near']

In [0]:
n_trending_users(3)

Out[175]: ['neardigitalcollective.near', 'chefsale.near', 'vlad.near']