In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import collections

In [2]:
df = pd.read_csv('dolphin_edges.csv')
print(df)

      x   y
0     4   9
1     6  10
2     7  10
3     1  11
4     3  11
..   ..  ..
154  46  60
155  33  61
156   3  62
157  38  62
158  54  62

[159 rows x 2 columns]


In [3]:
fd = pd.read_csv('dolphin_nodes.csv')
print(fd)

          name sex
0         Beak   M
1   Beescratch   M
2       Bumper   M
3          CCL   F
4        Cross   M
..         ...  ..
57         Web   M
58    Whitetip   F
59         Zap   U
60         Zig   M
61      Zipfel   M

[62 rows x 2 columns]


In [4]:
# determine the number of nodes in the graph

num_nodes = max(df['x'].max(), df['y'].max())

In [5]:
# create an empty adjacency matrix

adj_matrix = np.zeros((num_nodes, num_nodes))

In [6]:
# iterate over the edges in the dataframe and fill in the adjacency matrix

for index, row in df.iterrows():
    source = row['x']
    target = row['y']
    adj_matrix[source - 1][target - 1] = 1
    adj_matrix[target - 1][source - 1] = 1  # since the graph is undirected

In [7]:
# print the adjacency matrix

print(adj_matrix.shape)

(62, 62)


In [8]:
# save adjacency matrix as Networkx Graph

df = pd.DataFrame(adj_matrix)
G = nx.from_pandas_adjacency(df)

In [9]:
# save Networkx Graph as dictionary of list (input to the function defined below)

graph = nx.to_dict_of_lists(G)
print(graph)

{0: [10, 14, 15, 40, 42, 47], 1: [17, 19, 26, 27, 28, 36, 41, 54], 2: [10, 42, 44, 61], 3: [8, 14, 59], 4: [51], 5: [9, 13, 56, 57], 6: [9, 13, 17, 54, 56, 57], 7: [19, 27, 30, 40, 54], 8: [3, 20, 28, 37, 45, 59], 9: [5, 6, 13, 17, 32, 41, 57], 10: [0, 2, 29, 42, 47], 11: [51], 12: [33], 13: [5, 6, 9, 17, 32, 41, 54, 57], 14: [0, 3, 16, 24, 33, 34, 37, 38, 40, 43, 50, 52], 15: [0, 18, 24, 40, 45, 55, 59], 16: [14, 20, 33, 37, 38, 50], 17: [1, 6, 9, 13, 22, 25, 27, 31, 57], 18: [15, 20, 21, 24, 29, 45, 51], 19: [1, 7, 30, 54], 20: [8, 16, 18, 28, 36, 38, 44, 47, 50], 21: [18, 29, 33, 37, 45, 51], 22: [17], 23: [36, 45, 51], 24: [14, 15, 18, 29, 45, 51], 25: [17, 26, 27], 26: [1, 25, 27], 27: [1, 7, 17, 25, 26], 28: [1, 8, 20, 30, 47], 29: [10, 18, 21, 24, 35, 43, 45, 51, 52], 30: [7, 19, 28, 42, 47], 31: [17], 32: [9, 13, 60], 33: [12, 14, 16, 21, 34, 37, 38, 40, 43, 50], 34: [14, 33, 37, 44, 49], 35: [29], 36: [1, 20, 23, 37, 39, 40, 59], 37: [8, 14, 16, 21, 33, 34, 36, 40, 43, 45, 61]

In [10]:
nx.closeness_centrality(G)

{0: 0.3465909090909091,
 1: 0.3719512195121951,
 2: 0.2824074074074074,
 3: 0.30808080808080807,
 4: 0.24897959183673468,
 5: 0.23828125,
 6: 0.2675438596491228,
 7: 0.3652694610778443,
 8: 0.3630952380952381,
 9: 0.25206611570247933,
 10: 0.3128205128205128,
 11: 0.24897959183673468,
 12: 0.2687224669603524,
 13: 0.27111111111111114,
 14: 0.3765432098765432,
 15: 0.3388888888888889,
 16: 0.32972972972972975,
 17: 0.3096446700507614,
 18: 0.3370165745856354,
 19: 0.3160621761658031,
 20: 0.391025641025641,
 21: 0.3333333333333333,
 22: 0.23735408560311283,
 23: 0.3333333333333333,
 24: 0.3128205128205128,
 25: 0.25523012552301255,
 26: 0.2772727272727273,
 27: 0.3160621761658031,
 28: 0.3652694610778443,
 29: 0.32275132275132273,
 30: 0.32275132275132273,
 31: 0.23735408560311283,
 32: 0.21631205673758866,
 33: 0.3652694610778443,
 34: 0.3160621761658031,
 35: 0.24497991967871485,
 36: 0.4178082191780822,
 37: 0.39869281045751637,
 38: 0.32972972972972975,
 39: 0.33516483516483514,
 40

# Alternatively

In [11]:
# define a function to compute the shortest paths from a given node to all other nodes
def bfs_shortest_paths(graph, start):
    queue = collections.deque([start])
    visited = set([start])
    distances = {start: 0}
    while queue:
        node = queue.popleft()
        for neighbor in graph[node]:
            if neighbor not in visited:
                visited.add(neighbor)
                queue.append(neighbor)
                distances[neighbor] = distances[node] + 1
    return distances


In [12]:
# define a function to compute the closeness centrality of a node
def closeness_centrality_node(graph, node):
    distances = bfs_shortest_paths(graph, node)
    total_distance = sum(distances.values())
    num_nodes = len(distances) - 1  # exclude the starting node
    if num_nodes > 0:
        return (num_nodes-2) / total_distance # excluding the additionally node 0 from the graph
    else:
        return 0.0

In [13]:
# compute the closeness centrality of all nodes in the graph
centrality = {}
for node in graph:
    centrality[node] = closeness_centrality_node(graph, node)

In [14]:
# print the results
for node, centrality in centrality.items():
    print(f"Node {node} has closeness centrality {centrality:.4f}")

Node 0 has closeness centrality 0.3352
Node 1 has closeness centrality 0.3598
Node 2 has closeness centrality 0.2731
Node 3 has closeness centrality 0.2980
Node 4 has closeness centrality 0.2408
Node 5 has closeness centrality 0.2305
Node 6 has closeness centrality 0.2588
Node 7 has closeness centrality 0.3533
Node 8 has closeness centrality 0.3512
Node 9 has closeness centrality 0.2438
Node 10 has closeness centrality 0.3026
Node 11 has closeness centrality 0.2408
Node 12 has closeness centrality 0.2599
Node 13 has closeness centrality 0.2622
Node 14 has closeness centrality 0.3642
Node 15 has closeness centrality 0.3278
Node 16 has closeness centrality 0.3189
Node 17 has closeness centrality 0.2995
Node 18 has closeness centrality 0.3260
Node 19 has closeness centrality 0.3057
Node 20 has closeness centrality 0.3782
Node 21 has closeness centrality 0.3224
Node 22 has closeness centrality 0.2296
Node 23 has closeness centrality 0.3224
Node 24 has closeness centrality 0.3026
Node 25 ha