## Installations and imports

In [0]:
%pip install --upgrade scipy networkx cdlib

Python interpreter will be restarted.
Collecting scipy
  Downloading scipy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)
Collecting networkx
  Downloading networkx-3.1-py3-none-any.whl (2.1 MB)
Collecting cdlib
  Downloading cdlib-0.2.6-py3-none-any.whl (228 kB)
Collecting pulp
  Downloading PuLP-2.7.0-py3-none-any.whl (14.3 MB)
Collecting markov-clustering
  Downloading markov_clustering-0.0.6.dev0-py3-none-any.whl (6.3 kB)
Collecting pyclustering
  Downloading pyclustering-0.10.1.2.tar.gz (2.6 MB)
Collecting pooch
  Downloading pooch-1.7.0-py3-none-any.whl (60 kB)
Collecting demon
  Downloading demon-2.0.6-py3-none-any.whl (7.3 kB)
Collecting future
  Downloading future-0.18.3.tar.gz (840 kB)
Collecting tqdm
  Downloading tqdm-4.65.0-py3-none-any.whl (77 kB)
Collecting chinese-whispers
  Downloading chinese_whispers-0.8.1-py3-none-any.whl (7.8 kB)
Collecting bimlpa
  Downloading bimlpa-0.1.2-py3-none-any.whl (7.0 kB)
Collecting dynetx
  Downloading dynetx-

In [0]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from cdlib import algorithms
import random



Note: to be able to use all crisp methods, you need to install some additional packages:  {'infomap', 'karateclub', 'wurlitzer', 'leidenalg', 'graph_tool'}
Note: to be able to use all overlapping methods, you need to install some additional packages:  {'karateclub', 'ASLPAw'}
Note: to be able to use all bipartite methods, you need to install some additional packages:  {'wurlitzer', 'leidenalg', 'infomap'}


## Graphs

In [0]:
# Load SQL tables as DF
follows_df = spark.sql('SELECT signer_id, follows, type FROM hive_metastore.sit.graph_follows').toPandas()
metrics_df = spark.sql('SELECT * FROM hive_metastore.sit.users_agg_metrics').toPandas().fillna(0)

# Define Graphs from follows/likes DF
G_follows = nx.from_pandas_edgelist(follows_df[follows_df['type']=='FOLLOW'], source='signer_id', target='follows')#, edge_attr=['type'])   #97% of actions are FOLLOWS

# Communities Detection
louvain_follows = algorithms.louvain(G_follows, weight='weight', resolution=1., randomize=False)
walktrap_follows = algorithms.walktrap(G_follows)


In [0]:
if louvain_follows.overlap == False:
    # Map Louvain follows communities to users
    lf_dict = {'signer_id': [], 'louvain_community': []}
    for i, community in enumerate(louvain_follows.communities):
        for user in community:
            lf_dict['signer_id'].append(user)
            lf_dict['louvain_community'].append(i)

    # Create a pandas dataframe from the data dictionary
    lf_df = pd.DataFrame(lf_dict)

if walktrap_follows.overlap == False:
    # Map Louvain follows communities to users
    wt_dict = {'signer_id': [], 'walktrap_community': []}
    for i, community in enumerate(walktrap_follows.communities):
        for user in community:
            wt_dict['signer_id'].append(user)
            wt_dict['walktrap_community'].append(i)

    # Create a pandas dataframe from the data dictionary
    wt_df = pd.DataFrame(wt_dict)

lcom_size_dict = dict(lf_df['louvain_community'].value_counts())
wtcom_size_dict = dict(wt_df['walktrap_community'].value_counts())

communities_df = pd.merge(lf_df, wt_df, on='signer_id')
communities_df['louvain_size'] = communities_df['louvain_community'].apply(lambda x: lcom_size_dict[x])
communities_df['walktrap_size'] = communities_df['walktrap_community'].apply(lambda x: wtcom_size_dict[x])

df = pd.merge(metrics_df, communities_df, on='signer_id', how='left')
df.shape

Out[3]: (8357, 37)

## Top 20 Trending users

In [0]:
df['trending_metric'] = (df['engagement_weighted_30d'])/df['activity_weighted_30d']
trending_users_df = df[['signer_id', 
                        'followers',
                        'trending_metric',
                        #'engagement_weighted_30d', 
                        #'activity_weighted_30d', 
                        'louvain_community'
                        ]].sort_values('trending_metric', ascending=False).head(20).reset_index(drop=True)
trending_users_df['followers'] = trending_users_df['followers'].apply(int)
trending_users_df['louvain_community'] = trending_users_df['louvain_community'].apply(int)
#trending_users_df['trending_metric'] = trending_users_df['trending_metric'].apply(round)
trending_users_df.index = trending_users_df.index + 1
trending_users_df.rename(columns={'signer_id': 'user_name', 'louvain_community': 'com_ID'}, inplace=True)
trending_users_df

Unnamed: 0,user_name,followers,trending_metric,com_ID
1,marieke.near,173,3.130435,2
2,microchipgnu.near,109,3.011236,3
3,near-nigeria.near,19,2.15942,11
4,calimero.near,12,2.089286,2
5,nearukraineguild.near,21,2.021739,3
6,partners.learnclub.near,151,1.841121,10
7,shemar268abel.near,4,1.78,3
8,yamirghofran.near,1,1.529412,6
9,kemo.near,12,1.5,3
10,mxjxn.near,9,1.431818,3


In [0]:
# Identify Communitites by name
id_to_name = {0.0: 'Aurora Network',
              2.0: 'Core Devs',
              3.0: 'Near Ukraine',
              5.0: 'Near Foundation',
              8.0: 'NearXArt Dao',
              10.0: 'Learn Near Club',
              11.0: 'Near Nigeria',
              6.0: 'Amirghofran'}

# Apply name df
trending_users_df['com_name'] = trending_users_df['com_ID'].map(id_to_name)
trending_users_df

Unnamed: 0,user_name,followers,trending_metric,com_ID,com_name
1,marieke.near,173,3.130435,2,Core Devs
2,microchipgnu.near,109,3.011236,3,Near Ukraine
3,near-nigeria.near,19,2.15942,11,Near Nigeria
4,calimero.near,12,2.089286,2,Core Devs
5,nearukraineguild.near,21,2.021739,3,Near Ukraine
6,partners.learnclub.near,151,1.841121,10,Learn Near Club
7,shemar268abel.near,4,1.78,3,Near Ukraine
8,yamirghofran.near,1,1.529412,6,Amirghofran
9,kemo.near,12,1.5,3,Near Ukraine
10,mxjxn.near,9,1.431818,3,Near Ukraine


In [0]:
display(trending_users_df)

user_name,followers,trending_metric,com_ID,com_name
marieke.near,173,3.130434782608696,2,Core Devs
microchipgnu.near,109,3.01123595505618,3,Near Ukraine
near-nigeria.near,19,2.1594202898550723,11,Near Nigeria
calimero.near,12,2.0892857142857144,2,Core Devs
nearukraineguild.near,21,2.0217391304347827,3,Near Ukraine
partners.learnclub.near,151,1.841121495327103,10,Learn Near Club
shemar268abel.near,4,1.78,3,Near Ukraine
yamirghofran.near,1,1.5294117647058825,6,Amirghofran
kemo.near,12,1.5,3,Near Ukraine
mxjxn.near,9,1.4318181818181817,3,Near Ukraine


In [0]:
# Save as JSON for widget implementation
trending_users_df[['user_name', 'com_ID']].to_json('trending_users.json', orient='records')

## Top 10 Communities and their Top trending user

In [0]:
# List of Top10 communities by engagement
top10coms = list(df.groupby('louvain_community')['engagement_weighted_30d'].sum().sort_values(ascending=False).head(10).index) 

# List of first user by trending metric in each Top10 communities
top10coms_aux = df[['signer_id', 
                   'followers',
                   'engagement_weighted_30d', 
                   'activity_weighted_30d', 
                   'louvain_community',
                   'louvain_size',
                   'trending_metric'
                   ]][df['louvain_community'].isin(top10coms)]
top10coms_aux['rank'] = top10coms_aux.groupby('louvain_community')['trending_metric'].rank(method='first', ascending=False)
top10coms_df = top10coms_aux[top10coms_aux['rank']<2.0].sort_values('trending_metric', ascending=False)
top10coms_df

Unnamed: 0,signer_id,followers,engagement_weighted_30d,activity_weighted_30d,louvain_community,louvain_size,trending_metric,rank
6,marieke.near,173.0,7.2,2.3,2.0,545.0,3.130435,1.0
23,microchipgnu.near,109.0,26.8,8.9,3.0,527.0,3.011236,1.0
209,near-nigeria.near,19.0,14.9,6.9,11.0,84.0,2.15942,1.0
10,partners.learnclub.near,151.0,19.7,10.7,10.0,89.0,1.841121,1.0
1415,yamirghofran.near,1.0,2.6,1.7,6.0,213.0,1.529412,1.0
86,mundoaurora.near,44.0,46.1,49.8,0.0,998.0,0.925703,1.0
25,nearinturkiye.near,103.0,56.9,65.3,5.0,335.0,0.871363,1.0
1421,0xgh.near,1.0,5.6,10.2,46.0,2.0,0.54902,1.0


## Random trending user recommendation function

In [0]:
def n_trending_users(n):
    '''Return a random list of n users from the top 20 trending users'''
    return random.sample(list(trending_users_df['user_name']), n)

def trending_users_outside_community (user, n):
    '''Return a random list of n users from the top 20 trending users in a different community than the input user'''
    user_community = df[df['signer_id']==user]['louvain_community'].values[0]
    aux_df = trending_users_df[trending_users_df['com_ID']!=user_community]
    return random.sample(list(aux_df['user_name']), n)


In [0]:
trending_users_outside_community('rojoser.near', 3)

Out[10]: ['meta-pool-official.near', 'shemar268abel.near', 'mxjxn.near']

In [0]:
n_trending_users(3)

Out[11]: ['mob.near', 'marieke.near', 'ihyshan.near']

In [0]:
df[['signer_id', 'followers', 'following']][df.signer_id=='mob.near']

Unnamed: 0,signer_id,followers,following
2,mob.near,260.0,114.0
