# Load Karate Club data

In [8]:
import pandas as pd #CSV file handler
df_edge_list = pd.read_csv('EdgeList.csv')
df_node_attributes = pd.read_csv('NodeAttributes.csv')

In [19]:
# There are 34 nodes/vertices/egos
df_node_attributes.shape

(34, 3)

In [20]:
# There are 78 edges
df_edge_list.shape

(78, 2)

## Load a helper function for igraph
Note, igraph will choose to index vertexes (nodes) in its own "hashed" format, making computation **a lot faster**. This come at a cost w.r.t. to programmers time. Anyways, the function defined below demonstrates one way to initiazlie the graph where each node is assigned a "label".

In [21]:
from igraph import *

def network_from_edge_list(edge_list):
    '''
    Here, edge_list contains tuples of "edges", where each node is represented by 
    a  string-valued rf_if.
    edge_list = [
        ('7003', '15720'),
        ('7003', '7121'),
        ('7003', '14046'),
        ('7003', '7364')
    ]
    '''

    # Keep the unique vertexes
    all_vertex_list = []
    for edge_tup in edge_list:
        v1 = edge_tup[0]
        v2 = edge_tup[-1]
        if v1 not in all_vertex_list:
            all_vertex_list.append(v1)
        if v2 not in all_vertex_list:
            all_vertex_list.append(v2)

    # Initialize the graph 
    # This is an important step, as it labels nodes per its original "label/Index"
    g = Graph()
    g.add_vertices(
            all_vertex_list
            )

    # Add the edges
    g.add_edges(edge_list)
    print(g.summary())
    return g



In [22]:
edge_list = []
for enum, row in df_edge_list.iterrows():
    # enum will take the index value
    # row will be a "numpy series" with column names
    elem1 = row['Subject1']
    elem2 = row['NamedFriend']
    edge_list.append(
        [elem1, elem2]
    )
# edge_list

In [23]:
G_ig =  network_from_edge_list(edge_list)

IGRAPH UN-- 34 78 -- 
+ attr: name (v)


In [24]:
# This is a handy way to check if we have built a sensible network: 
# ==> it should have a number of nodes and a number of edges.
G_ig.summary()

'IGRAPH UN-- 34 78 -- \n+ attr: name (v)'

## Compute the average of based on network configuration
Here, we take only the structural insight of the graph and compute all average friend attributes using Pandas

In [26]:
def get_friend_list(graph, row):
    subject_id = row['ID']
    neighbors =  graph.vs.find(name = f"{ subject_id }").neighbors()
    return [v['name'] for v in neighbors]

# There should be a native igraph way of doing this :)
df_node_attributes['friend_list'] = df_node_attributes.apply(lambda row: get_friend_list(G_ig, row), axis=1)

In [28]:
def calc_friend_age_avg(df_node_attributes, row):
    friend_list = row['friend_list'] # this column is generated from the previous cell
    friend_df = df_node_attributes[df_node_attributes['ID'].isin(friend_list)]
    return friend_df['Age'].mean()
df_node_attributes['friend_avg_age'] = df_node_attributes.apply(lambda row: calc_friend_age_avg(df_node_attributes, row), axis=1)

In [29]:
df_node_attributes

Unnamed: 0,ID,Age,RandAttribute,friend_list,friend_avg_age
0,ID2,22,1.232323,"[ID1, ID3, ID4, ID8, ID14, ID18, ID20, ID22, I...",43.555556
1,ID1,77,1.787879,"[ID2, ID3, ID4, ID8, ID14, ID18, ID20, ID22, I...",40.8125
2,ID3,13,1.141414,"[ID2, ID1, ID4, ID8, ID14, ID9, ID10, ID28, ID...",48.8
3,ID4,37,1.383838,"[ID2, ID1, ID3, ID8, ID14, ID13]",42.0
4,ID5,20,1.212121,"[ID1, ID7, ID11]",56.666667
5,ID6,68,1.69697,"[ID1, ID7, ID11, ID17]",56.0
6,ID7,62,1.636364,"[ID1, ID5, ID6, ID17]",54.75
7,ID8,65,1.666667,"[ID2, ID1, ID3, ID4]",37.25
8,ID9,88,1.89899,"[ID1, ID3, ID31, ID33, ID34]",46.8
9,ID10,53,1.545455,"[ID3, ID34]",52.5
