# Load Karate Club data

In [1]:
import pandas as pd #CSV file handler
df_edge_list = pd.read_csv('EdgeList.csv')
df_node_attributes = pd.read_csv('NodeAttributes.csv')

## Construct the network in `NetworkX`

### Given complete edge info from a spreadsheet, we shall generate a Graph object from edge_list 
Note, there are other ways of generating the graph. It takes one line with a fully populated edge_list.

In [2]:
import networkx as nx
G = nx.Graph()

In [3]:
df_edge_list.head()

Unnamed: 0,Subject1,NamedFriend
0,ID2,ID1
1,ID2,ID3
2,ID2,ID4
3,ID2,ID8
4,ID2,ID14


In [4]:
edge_list = []
for enum, row in df_edge_list.iterrows():
    # enum will take the index value
    # row will be a "numpy series" with column names
    elem1 = row['Subject1']
    elem2 = row['NamedFriend']
    edge_list.append(
        [elem1, elem2]
    )

In [5]:
# basic add nodes
G.add_edges_from(edge_list)

### Add Node attributes
* Note, use `G.nodes()` to get the full list of nodes,
* And, use `G.node['NodeLabel']` to directly access the node
    * With `G.node['NodeLabel']['attribute_name']`, one can directly assign values to `attribute_name` for the node.

In [6]:
# First, here are all our nodes
G.nodes()

['ID2',
 'ID1',
 'ID3',
 'ID4',
 'ID8',
 'ID14',
 'ID18',
 'ID20',
 'ID22',
 'ID31',
 'ID5',
 'ID6',
 'ID7',
 'ID9',
 'ID11',
 'ID12',
 'ID13',
 'ID32',
 'ID10',
 'ID28',
 'ID29',
 'ID33',
 'ID17',
 'ID34',
 'ID26',
 'ID24',
 'ID25',
 'ID30',
 'ID27',
 'ID15',
 'ID16',
 'ID19',
 'ID21',
 'ID23']

In [7]:
# To add "age" attribute to a node, we need to pull for the node first
G.node['ID2']['Age'] = 1

In [8]:
G.node['ID2']

{'Age': 1}

In [9]:
# Now, we can assign node-attributes for all nodes:
df_node_attributes.head() # There are two attributes for each node

Unnamed: 0,ID,Age,RandAttribute
0,ID2,22,1.232323
1,ID1,77,1.787879
2,ID3,13,1.141414
3,ID4,37,1.383838
4,ID5,20,1.212121


In [10]:
for enum, row in df_node_attributes.iterrows():
    node_id = row['ID']
    age = row['Age']
    rand_attr = row['RandAttribute']
    G.node[node_id]['Age'] = age
    G.node[node_id]['RandAttribute'] = rand_attr

In [11]:
G.node['ID2']

{'Age': 22, 'RandAttribute': 1.2323232323232325}

## Calculate the average of my friends' age

### To start with, who are my friends?
For additional sytnax to get at neighbor-list, see: [networkx.Graph.neighbors — NetworkX](https://networkx.org/documentation/latest/reference/classes/generated/networkx.Graph.neighbors.html)

In [18]:
# By directly indexing by  
G.neighbors('ID2')

['ID1', 'ID3', 'ID4', 'ID8', 'ID14', 'ID18', 'ID20', 'ID22', 'ID31']

### Then, calculate the average of the average Age of my friends

In [19]:
neighbor_ages = []
for neighborID in G.neighbors('ID2'):
    neighbor_ages.append(
        G.node[neighborID]['Age']
    )

    
import numpy as np
np.mean(neighbor_ages)

43.55555555555556

### Populate the mean age for all nodes 

In [21]:
df_node_attributes.columns

Index(['ID', 'Age', 'RandAttribute'], dtype='object')

In [23]:
import numpy as np

# This combines NetowrkX with Pandas
def calc_neighbor_age(row, G):
    # Note, G is the graph we have generated previously
    # We use G and NetworkX to collect neighbors
    ego_id = row['ID']
    neighbor_list = G.neighbors(ego_id)
    neighbor_ages = []
    for neighborID in neighbor_list:
        neighbor_ages.append(
            G.node[neighborID]['Age']
        )
    return np.mean(neighbor_ages)
    
df_node_attributes['avg_friend_age'] = df_node_attributes.apply(lambda row : calc_neighbor_age(row, G), axis=1) #axis=1 is necessary here.

In [24]:
df_node_attributes

Unnamed: 0,ID,Age,RandAttribute,avg_friend_age
0,ID2,22,1.232323,43.555556
1,ID1,77,1.787879,40.8125
2,ID3,13,1.141414,48.8
3,ID4,37,1.383838,42.0
4,ID5,20,1.212121,56.666667
5,ID6,68,1.69697,56.0
6,ID7,62,1.636364,54.75
7,ID8,65,1.666667,37.25
8,ID9,88,1.89899,46.8
9,ID10,53,1.545455,52.5
