In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import pickle

In [117]:
# Takes in a pickle object and returns in/out degree centralities, 
# clustering coefficient, local homophily metrics, and PageRank for account ID.
def node_calculations(file, ID):
    full_list = pd.read_pickle(file)
    edgelist = full_list[['originating_id', 'receiving_id']]
    edgelist.head()
    
    # store originating ID followers
    attr = dict()
    for i, row in full_list.iterrows():
        if pd.isna(row['originating_following_count']):
            attr[row['receiving_id']] = {'in': row['receiving_follower_count'], 
                                    'out': row['receiving_following_count']}
        else:
            attr[row['originating_id']] = {'in': row['originating_follower_count'], 
                                    'out': row['originating_following_count']}
            
    #print(attr)
    G = nx.convert_matrix.from_pandas_edgelist(edgelist, 'originating_id', 'receiving_id', 
                                               create_using=nx.DiGraph())
    nx.set_node_attributes(G, attr)
    
    nodes = list(G.nodes)
    
    indeg = G.in_degree(ID)
    outdeg = G.out_degree(ID)
    clustercoeff = nx.clustering(G, ID)
    pagerank = nx.pagerank(G)[ID]
    
    # compute "local homophily" for ID's degree structure in two different ways
    outdeg_Assort1 = 0
    outdeg_Assort2 = 0
    for node in G.neighbors(ID):
        nodeOutdeg = attr[node]['out']
        outdeg_Assort1 += nodeOutdeg
        if nodeOutdeg >= outdeg/2 and nodeOutdeg <= outdeg*2:
            outdeg_Assort2 += 1

    indeg_Assort1 = 0
    indeg_Assort2 = 0
    for node in G.predecessors(ID):    
        nodeIndeg = attr[node]['in']
        indeg_Assort1 += nodeIndeg
        if nodeIndeg >= indeg/2 and nodeIndeg <= indeg*2:
            indeg_Assort2 += 1    
            
    followers = len(list(G.predecessors(ID)))
    following = len(list(G.neighbors(ID)))
    
    indeg_Assort1 = indeg_Assort1 / (indeg*followers)
    outdeg_Assort1 = outdeg_Assort1 / (outdeg*following)
    indeg_Assort2 = indeg_Assort2 / followers
    outdeg_Assort2 = outdeg_Assort2 / following
    
    return indeg, outdeg, clustercoeff, pagerank, indeg_Assort1, outdeg_Assort1, indeg_Assort2, outdeg_Assort2

In [118]:
file = 'human/edgelist_2_deg_456361810.pkl'
ID = 456361810
#full_list = pd.read_pickle(file)
#with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#    print(full_list[['originating_id', 'receiving_id', 'originating_following_count', 'receiving_following_count']])
indeg, outdeg, clustercoeff, pagerank, indeg_A1, outdeg_A1, indeg_A2, outdeg_A2 = node_calculations(file, ID)
print([clustercoeff, pagerank, indeg_A1, outdeg_A1, indeg_A2, outdeg_A2])

[0, 0.0715102154241595, 17.142857142857142, 15.276643990929704, 0.2857142857142857, 0.0]
