## Installations and imports

In [0]:
%pip install --upgrade scipy networkx cdlib

Python interpreter will be restarted.
Collecting scipy
  Downloading scipy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)
Collecting networkx
  Downloading networkx-3.1-py3-none-any.whl (2.1 MB)
Collecting cdlib
  Downloading cdlib-0.2.6-py3-none-any.whl (228 kB)
Collecting pulp
  Downloading PuLP-2.7.0-py3-none-any.whl (14.3 MB)
Collecting markov-clustering
  Downloading markov_clustering-0.0.6.dev0-py3-none-any.whl (6.3 kB)
Collecting pyclustering
  Downloading pyclustering-0.10.1.2.tar.gz (2.6 MB)
Collecting pooch
  Downloading pooch-1.7.0-py3-none-any.whl (60 kB)
Collecting demon
  Downloading demon-2.0.6-py3-none-any.whl (7.3 kB)
Collecting future
  Downloading future-0.18.3.tar.gz (840 kB)
Collecting tqdm
  Downloading tqdm-4.65.0-py3-none-any.whl (77 kB)
Collecting chinese-whispers
  Downloading chinese_whispers-0.8.1-py3-none-any.whl (7.8 kB)
Collecting bimlpa
  Downloading bimlpa-0.1.2-py3-none-any.whl (7.0 kB)
Collecting dynetx
  Downloading dynetx-

In [0]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from cdlib import algorithms
import random



Note: to be able to use all crisp methods, you need to install some additional packages:  {'infomap', 'karateclub', 'wurlitzer', 'leidenalg', 'graph_tool'}
Note: to be able to use all overlapping methods, you need to install some additional packages:  {'karateclub', 'ASLPAw'}
Note: to be able to use all bipartite methods, you need to install some additional packages:  {'wurlitzer', 'leidenalg', 'infomap'}


## Graphs

In [0]:
# Load SQL tables as DF
follows_df = spark.sql('SELECT signer_id, follows, type FROM hive_metastore.sit.graph_follows').toPandas()
metrics_df = spark.sql('SELECT * FROM hive_metastore.sit.users_agg_metrics').toPandas().fillna(0)

# Define Graphs from follows/likes DF
G_follows = nx.from_pandas_edgelist(follows_df[follows_df['type']=='FOLLOW'], source='signer_id', target='follows')#, edge_attr=['type'])   #97% of actions are FOLLOWS

# Communities Detection
louvain_follows = algorithms.louvain(G_follows, weight='weight', resolution=1., randomize=False)
walktrap_follows = algorithms.walktrap(G_follows)


In [0]:
if louvain_follows.overlap == False:
    # Map Louvain follows communities to users
    lf_dict = {'signer_id': [], 'louvain_community': []}
    for i, community in enumerate(louvain_follows.communities):
        for user in community:
            lf_dict['signer_id'].append(user)
            lf_dict['louvain_community'].append(i)

    # Create a pandas dataframe from the data dictionary
    lf_df = pd.DataFrame(lf_dict)

if walktrap_follows.overlap == False:
    # Map Louvain follows communities to users
    wt_dict = {'signer_id': [], 'walktrap_community': []}
    for i, community in enumerate(walktrap_follows.communities):
        for user in community:
            wt_dict['signer_id'].append(user)
            wt_dict['walktrap_community'].append(i)

    # Create a pandas dataframe from the data dictionary
    wt_df = pd.DataFrame(wt_dict)

lcom_size_dict = dict(lf_df['louvain_community'].value_counts())
wtcom_size_dict = dict(wt_df['walktrap_community'].value_counts())

communities_df = pd.merge(lf_df, wt_df, on='signer_id')
communities_df['louvain_size'] = communities_df['louvain_community'].apply(lambda x: lcom_size_dict[x])
communities_df['walktrap_size'] = communities_df['walktrap_community'].apply(lambda x: wtcom_size_dict[x])

df = pd.merge(metrics_df, communities_df, on='signer_id', how='left')
df.shape

Out[3]: (7974, 36)

## Top 20 Trending users

In [0]:
df['trending_metric'] = (df['engagement_weighted_30d'])/df['activity_weighted_30d']
trending_users_df = df[['signer_id', 
                        'followers',
                        'trending_metric',
                        #'engagement_weighted_30d', 
                        #'activity_weighted_30d', 
                        'louvain_community'
                        ]].sort_values('trending_metric', ascending=False).head(20).reset_index(drop=True)
trending_users_df['followers'] = trending_users_df['followers'].apply(int)
trending_users_df['louvain_community'] = trending_users_df['louvain_community'].apply(int)
#trending_users_df['trending_metric'] = trending_users_df['trending_metric'].apply(round)
trending_users_df.index = trending_users_df.index + 1
trending_users_df.rename(columns={'signer_id': 'user_name', 'louvain_community': 'com_ID'}, inplace=True)
trending_users_df

Unnamed: 0,user_name,followers,trending_metric,com_ID
1,nearmax.near,141,5.836047,1
2,root.near,423,5.239044,1
3,calimero.near,10,3.492447,1
4,nearxartdao.near,10,2.550336,8
5,near-nigeria.near,9,2.372126,16
6,chefsale.near,60,2.361512,1
7,auroraecosystemnews.near,1182,2.279561,0
8,partners.learnclub.near,165,2.278485,10
9,nfcommunity.near,8,2.105897,5
10,mob.near,281,2.094708,1


In [0]:
# Identify Communitites by name
id_to_name = {0.0: 'Aurora Network',
              1.0: 'Core Devs',
              3.0: 'Near Ukraine',
              5.0: 'Near Foundation',
              8.0: 'NearXArt Dao',
              10.0: 'Learn Near Club',
              16.0: 'Near Nigeria'}

# Apply name df
trending_users_df['com_name'] = trending_users_df['com_ID'].map(id_to_name)
trending_users_df

Unnamed: 0,user_name,followers,trending_metric,com_ID,com_name
1,nearmax.near,141,5.836047,1,Core Devs
2,root.near,423,5.239044,1,Core Devs
3,calimero.near,10,3.492447,1,Core Devs
4,nearxartdao.near,10,2.550336,8,NearXArt Dao
5,near-nigeria.near,9,2.372126,16,Near Nigeria
6,chefsale.near,60,2.361512,1,Core Devs
7,auroraecosystemnews.near,1182,2.279561,0,Aurora Network
8,partners.learnclub.near,165,2.278485,10,Learn Near Club
9,nfcommunity.near,8,2.105897,5,Near Foundation
10,mob.near,281,2.094708,1,Core Devs


In [0]:
display(trending_users_df)

user_name,followers,trending_metric,com_ID,com_name
nearmax.near,141,5.836046511627907,1,Core Devs
root.near,423,5.239043824701195,1,Core Devs
calimero.near,10,3.492447129909366,1,Core Devs
nearxartdao.near,10,2.5503355704697985,8,NearXArt Dao
near-nigeria.near,9,2.3721264367816093,16,Near Nigeria
chefsale.near,60,2.361512027491409,1,Core Devs
auroraecosystemnews.near,1182,2.279561087454962,0,Aurora Network
partners.learnclub.near,165,2.278484591461691,10,Learn Near Club
nfcommunity.near,8,2.10589651022864,5,Near Foundation
mob.near,281,2.0947080648310874,1,Core Devs


In [0]:
# Save as JSON for widget implementation
trending_users_df[['user_name', 'com_ID']].to_json('trending_users.json', orient='records')

## Top 10 Communities and their Top trending user

In [0]:
# List of Top10 communities by engagement
top10coms = list(df.groupby('louvain_community')['engagement_weighted_30d'].sum().sort_values(ascending=False).head(10).index) 

# List of first user by trending metric in each Top10 communities
top10coms_aux = df[['signer_id', 
                   'followers',
                   'engagement_weighted_30d', 
                   'activity_weighted_30d', 
                   'louvain_community',
                   'louvain_size',
                   'trending_metric'
                   ]][df['louvain_community'].isin(top10coms)]
top10coms_aux['rank'] = top10coms_aux.groupby('louvain_community')['trending_metric'].rank(method='first', ascending=False)
top10coms_df = top10coms_aux[top10coms_aux['rank']<2.0].sort_values('trending_metric', ascending=False)
top10coms_df

Unnamed: 0,signer_id,followers,engagement_weighted_30d,activity_weighted_30d,louvain_community,louvain_size,trending_metric,rank
20,nearmax.near,141.0,50.19,8.6,1.0,571.0,5.836047,1.0
326,nearxartdao.near,10.0,7.6,2.98,8.0,152.0,2.550336,1.0
356,near-nigeria.near,9.0,16.51,6.96,16.0,11.0,2.372126,1.0
0,auroraecosystemnews.near,1182.0,556.76,244.24,0.0,1078.0,2.279561,1.0
9,partners.learnclub.near,165.0,80.59,35.37,10.0,86.0,2.278485,1.0
396,nfcommunity.near,8.0,17.5,8.31,5.0,350.0,2.105897,1.0
194,nearukraineguild.near,20.0,36.4,20.89,3.0,357.0,1.742461,1.0
1336,63cf5a1d81ae3b1bcba60219a17664aba8622a2ec984fe...,1.0,4.63,3.98,9.0,116.0,1.163317,1.0
558,scopalaffairs.near,5.0,9.93,9.29,13.0,29.0,1.068891,1.0
1340,0xgh.near,1.0,5.65,7.65,40.0,2.0,0.738562,1.0


## Random trending user recommendation function

In [0]:
def n_trending_users(n):
    '''Return a random list of n users from the top 20 trending users'''
    return random.sample(list(trending_users_df['user_name']), n)

def trending_users_outside_community (user, n):
    '''Return a random list of n users from the top 20 trending users in a different community than the input user'''
    user_community = df[df['signer_id']==user]['louvain_community'].values[0]
    aux_df = trending_users_df[trending_users_df['com_ID']!=user_community]
    return random.sample(list(aux_df['user_name']), n)


In [0]:
trending_users_outside_community('rojoser.near', 3)

Out[10]: ['knaak.near', 'linearprotocol.near', 'nfcommunity.near']

In [0]:
n_trending_users(3)

Out[11]: ['linearprotocol.near', 'near-nigeria.near', 'petersalomonsen.near']