# NetworkX

In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import adjusted_rand_score

G_football = nx.read_gml('Data/football1.gml', label='id')

# Accessing nodes, number of nodes, and edges

In [2]:
G_football.nodes[1]

{'label': 'FloridaState',
 'wins': 11,
 'losses': 2,
 'conference': 'Atlantic Coast'}

In [3]:
G_football.nodes[1]['label']

'FloridaState'

In [4]:
# Number of nodes
G_football.number_of_nodes()

115

In [5]:
# Number of edges
G_football.number_of_edges()

613

# Computing Correlation Coefficients

In [6]:
X = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]

correlation_matrix = np.corrcoef(X, y)
correlation_coefficient = correlation_matrix[0, 1]
print(f'Correlation Coefficient: {correlation_coefficient}')

Correlation Coefficient: 0.9999999999999999


In [7]:
correlation_matrix

array([[1., 1.],
       [1., 1.]])

# Assortativity Coefficients

### Attribute Assortativity

This coefficient represents the tendency of nodes with the same attributes to form edges between one another. In this case, since we select "conference," we obtain a measure representing the tendency of universities in the same conference to play one another.

In [8]:
nx.attribute_assortativity_coefficient(G_football, attribute="conference")

0.6275381679111909

### Numeric Assortativity


The numeric assortativity coefficient gives us a coefficient representing the tendency of nodes with similar numeric attributes to form edges with each other. In the context of the football dataset, since we selected the attribute 'wins', the coefficient tells us the tendency of teams with similar win counts to play against other.

In [9]:
nx.numeric_assortativity_coefficient(G_football, attribute="wins")

-0.049806582644503085

### Degree Assortativity

The degree assortativity coefficient, gives a coefficient representing the tendency of nodes with similar degrees to form edges between one another. In the context of the football dataset, this tells us the tendency of teams to play other teams with a similar number of total games played. 

In [10]:
nx.degree_assortativity_coefficient(G_football)

0.16244224957444287

## Working with Digraphs

When working with digraphs we can calculate coefficients as the in-degree or out-degree of each node.

1. In-Degree Assortativity (x='in', y='in')

Measures correlation between the in-degrees of connected nodes
Asks: "Do nodes with many incoming edges tend to connect to other nodes with many incoming edges?"

2. Out-Degree Assortativity (x='out', y='out')

Measures correlation between the out-degrees of connected nodes
Asks: "Do nodes with many outgoing edges tend to connect to other nodes with many outgoing edges?"

3. Out-In Degree Assortativity (x='out', y='in')

Measures correlation between a node's out-degree and its neighbors' in-degrees
Asks: "Do nodes that send many connections tend to connect to nodes that receive many connections?"

In [11]:
G = nx.DiGraph()

edges = [
    ("1", "2"),
    ("2", "3"),
    ("3", "1"),
    ("3", "4"),
    ("4", "2")
]

G.add_edges_from(edges)

in_degree_assortativity = nx.degree_assortativity_coefficient(G, x='in', y='in')

out_degree_assortativity = nx.degree_assortativity_coefficient(G, x='out', y='out')

out_in_assortativity = nx.degree_assortativity_coefficient(G, x='out', y='in')

print(f"In-Degree Assortativity: {in_degree_assortativity}")
print(f"Out-Degree Assortativity: {out_degree_assortativity}")
print(f"Out-In Degree Assortativity: {out_in_assortativity}")

In-Degree Assortativity: -0.4082482904638645
Out-Degree Assortativity: -0.4082482904638645
Out-In Degree Assortativity: -0.6666666666666686


# Structural Holes

### Constraint

In [12]:
constraints = nx.constraint(G_football)

for node_id, constraint_value in constraints.items():
    print(f'{node_id} : {constraint_value}')

0 : 0.15866887697117074
1 : 0.18433120663707786
2 : 0.15886454507193143
3 : 0.15275783402203855
4 : 0.17942117982563763
5 : 0.16208239185485607
6 : 0.14832231723803693
7 : 0.16843778696051423
8 : 0.18895464790656374
9 : 0.19116700437887515
10 : 0.19056019283746561
11 : 0.16851853446076934
12 : 0.1713481086113662
13 : 0.1671528394387147
14 : 0.18007545658606267
15 : 0.1548315541781451
16 : 0.17938519302719155
17 : 0.16364182774400657
18 : 0.1793112732764713
19 : 0.18591357318489177
20 : 0.16690390000683014
21 : 0.1869044691847096
22 : 0.19868485759169455
23 : 0.1866161890210225
24 : 0.15073731251913072
25 : 0.20558149109426346
26 : 0.17265228558518605
27 : 0.16804585634287614
28 : 0.18398266238894745
29 : 0.19800833276415547
30 : 0.19441499897547984
31 : 0.17266412349557855
32 : 0.16732406711745557
33 : 0.24121673553719017
34 : 0.17516330294155777
35 : 0.1862871388566355
36 : 0.15141666128615702
37 : 0.20986011504982205
38 : 0.15836940234636204
39 : 0.17427469662819026
40 : 0.1630681913

### Effective Size

In [13]:
effective_sizes = nx.effective_size(G_football)

for node_id, es in effective_sizes.items():
    print(f'{node_id} : {es}')

0 : 8.166666666666666
1 : 7.166666666666667
2 : 7.833333333333333
3 : 8.333333333333334
4 : 6.818181818181818
5 : 8.0
6 : 8.5
7 : 7.666666666666667
8 : 6.2727272727272725
9 : 6.2727272727272725
10 : 6.0
11 : 7.2
12 : 7.0
13 : 7.181818181818182
14 : 6.6
15 : 8.166666666666666
16 : 6.818181818181818
17 : 7.545454545454545
18 : 7.0
19 : 6.636363636363637
20 : 7.363636363636363
21 : 6.454545454545454
22 : 5.909090909090909
23 : 6.454545454545454
24 : 8.0
25 : 5.909090909090909
26 : 7.0
27 : 7.363636363636363
28 : 6.333333333333334
29 : 6.090909090909091
30 : 6.2727272727272725
31 : 7.181818181818182
32 : 7.181818181818182
33 : 4.2
34 : 7.181818181818182
35 : 6.636363636363637
36 : 7.0
37 : 5.7272727272727275
38 : 7.909090909090909
39 : 6.818181818181818
40 : 7.545454545454545
41 : 5.2
42 : 6.142857142857143
43 : 8.09090909090909
44 : 7.363636363636363
45 : 5.7272727272727275
46 : 5.7272727272727275
47 : 7.181818181818182
48 : 7.363636363636363
49 : 5.545454545454546
50 : 5.888888888888889


# Conducting K-Core decomposition

In [14]:
G_dolphins = nx.read_gml("Data/dolphins.gml")

In core_numbers, the key is the `id` of a dolphin, and the value is the highest value k of a k-core containing that dolphin. 

In [15]:
core_numbers = nx.core_number(G_dolphins)

for node_id, highest_core in core_numbers.items():
    print(f'{node_id} : {highest_core}')

Beak : 4
Beescratch : 4
Bumper : 3
CCL : 3
Cross : 1
DN16 : 3
DN21 : 4
DN63 : 4
Double : 4
Feather : 4
Fish : 4
Five : 1
Fork : 1
Gallatin : 4
Grin : 4
Haecksel : 4
Hook : 4
Jet : 4
Jonah : 4
Knit : 4
Kringel : 4
MN105 : 4
MN23 : 1
MN60 : 3
MN83 : 4
Mus : 3
Notch : 3
Number1 : 3
Oscar : 4
Patchback : 4
PL : 4
Quasi : 1
Ripplefluke : 2
Scabs : 4
Shmuddel : 3
SMN5 : 1
SN100 : 4
SN4 : 4
SN63 : 4
SN89 : 2
SN9 : 4
SN90 : 4
SN96 : 4
Stripes : 4
Thumper : 3
Topless : 4
TR120 : 2
TR77 : 4
TR82 : 1
TR88 : 2
TR99 : 4
Trigger : 4
TSN103 : 4
TSN83 : 2
Upbang : 4
Vau : 2
Wave : 2
Web : 4
Whitetip : 1
Zap : 4
Zig : 1
Zipfel : 2
