In [2]:
import json
import pandas as pd
import numpy as np
import networkx as nx
from pprint import pprint
import matplotlib.pyplot as plt

In [3]:
target = "cs"
G = nx.read_gml("../data/{0}/{0}_coauthorship_network.gml".format(target))
node_attributes = nx.get_node_attributes(G, "affiliation")


---

# Centrality measures

### Degree Centrality

Degree centrality measures the importance of a node in a network based on the number of connections it has.

Interpretation:
The table shows the top 10 nodes with the highest degree centrality in the network.

Implications:
Nodes with higher degree centrality are more connected within the network.
These individuals play a crucial role in maintaining connections across different parts of the network.

Considerations:
The interpretation of degree centrality should be analyzed in conjunction with the specific goals and characteristics of the network.

In [4]:
degree_centrality = nx.degree_centrality(G)
dc_data = pd.DataFrame({"Name": list(degree_centrality.keys()),
                        "Affiliation": [node_attributes[node] for node in degree_centrality.keys()],
                        "DegreeCentrality": list(degree_centrality.values())
                        }).sort_values(by="DegreeCentrality", ascending=False).reset_index(drop=True)

dc_data.head(10)

Unnamed: 0,Name,Affiliation,DegreeCentrality
0,"BONONI, LUCIANO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.142276
1,"SALMON CINOTTI, TULLIO",CENTRO RICERCA SISTEMI ELETTRONICI INGEGN.INF....,0.119919
2,"MARFIA, GUSTAVO",DIPARTIMENTO DELLE ARTI,0.119919
3,"VITALI, FABIO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.111789
4,"DI FELICE, MARCO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.109756
5,"BARTOLINI, ILARIA",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.099593
6,"BAZZANI, ARMANDO","DIPARTIMENTO DI FISICA E ASTRONOMIA ""AUGUSTO R...",0.089431
7,"BELLAVISTA, PAOLO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.089431
8,"TORRONI, PAOLO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.081301
9,"ROCCETTI, MARCO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.079268


### Betweenness Centrality

Weighted betweenness centrality measures the influence of a node in facilitating communication between other nodes, considering the weights of edges.

Interpretation:
The table shows the top 10 nodes with the highest weighted betweenness centrality in the network.

Implications:
Nodes with higher weighted betweenness centrality act as bridges, connecting different parts of the network.
These individuals play a crucial role in maintaining efficient communication paths considering edge weights.

In [5]:
weighted_betweenness_centrality = nx.betweenness_centrality(G, weight="weight")

wbc_data = pd.DataFrame({"Name": list(weighted_betweenness_centrality.keys()),
                        "Affiliation": [node_attributes[node] for node in weighted_betweenness_centrality.keys()],
                        "WeightedBetweennessCentrality": list(weighted_betweenness_centrality.values())
                        }).sort_values(by="WeightedBetweennessCentrality", ascending=False).reset_index(drop=True)

wbc_data.head(10)

Unnamed: 0,Name,Affiliation,WeightedBetweennessCentrality
0,"TOMASI, FRANCESCA",DIPARTIMENTO DI FILOLOGIA CLASSICA E ITALIANIS...,0.163943
1,"LOLI PICCOLOMINI, ELENA",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.157832
2,"MARFIA, GUSTAVO",DIPARTIMENTO DELLE ARTI,0.154433
3,"BARTOLINI, ILARIA",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.140926
4,"ASPERTI, ANDREA",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.13909
5,"VITALI, FABIO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.133884
6,"DEGLI ESPOSTI, MIRKO","DIPARTIMENTO DI FISICA E ASTRONOMIA ""AUGUSTO R...",0.133572
7,"BAZZANI, ARMANDO","DIPARTIMENTO DI FISICA E ASTRONOMIA ""AUGUSTO R...",0.125704
8,"MARTINI, SIMONE",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.111018
9,"ROCCETTI, MARCO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.095308


### Closeness Centrality

Weighted closeness centrality measures how easily a node can reach other nodes in the network, considering the weights of the edges.

Interpretation:
The table displays the top 10 nodes with the highest weighted closeness centrality in the network.

Implications:
Nodes with higher weighted closeness centrality are well-connected and can efficiently reach other nodes in the network, considering edge weights.
These individuals play a crucial role in information flow and communication.

In [6]:
weighted_closeness_centrality = nx.closeness_centrality(G, distance="weight")

wcc_data = pd.DataFrame({"Name": list(weighted_closeness_centrality.keys()),
                        "Affiliation": [node_attributes[node] for node in weighted_closeness_centrality.keys()],
                        "WeightedClosenessCentrality": list(weighted_closeness_centrality.values())
                        }).sort_values(by="WeightedClosenessCentrality", ascending=False).reset_index(drop=True)

wcc_data.head(10)

Unnamed: 0,Name,Affiliation,WeightedClosenessCentrality
0,"CONDELLO, FEDERICO",DIPARTIMENTO DI FILOLOGIA CLASSICA E ITALIANIS...,0.223332
1,"MARTINI, SIMONE",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.222826
2,"TOMASI, FRANCESCA",DIPARTIMENTO DI FILOLOGIA CLASSICA E ITALIANIS...,0.221622
3,"ROCCETTI, MARCO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.220529
4,"BARTOLINI, ILARIA",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.218764
5,"MARFIA, GUSTAVO",DIPARTIMENTO DELLE ARTI,0.214379
6,"VITALI, FABIO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.212987
7,"DEGLI ESPOSTI, MIRKO","DIPARTIMENTO DI FISICA E ASTRONOMIA ""AUGUSTO R...",0.212527
8,"BONONI, LUCIANO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.210797
9,"ASPERTI, ANDREA",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.209898


### Eigenvector centrality

Weighted eigenvector centrality assesses the importance of a node in a network, considering both the network topology and the weights of its connections.

Why it Changes:
Edge Weights Influence Strength:

Nodes with strong connections (lower weights) contribute more to the eigenvector centrality of their neighbors. The weights influence the flow of influence through the network.

Weighted Paths Matter:
Paths with lower weights contribute more to the eigenvector centrality. If a node is connected to nodes with strong connections, its centrality increases.
Higher Eigenvector Centrality for Some Nodes:

Implications:
Nodes with higher weighted eigenvector centrality are influential in the network, considering both the structure and weights of connections.
These individuals play a significant role in information flow and network dynamics

In [7]:
weighted_eigenvector_centrality = nx.eigenvector_centrality(G, weight="weight")

wec_data = pd.DataFrame({"Name": list(weighted_eigenvector_centrality.keys()),
                        "Affiliation": [node_attributes[node] for node in weighted_eigenvector_centrality.keys()],
                        "WeightedEigenvectorCentrality": list(weighted_eigenvector_centrality.values())
                        }).sort_values(by="WeightedEigenvectorCentrality", ascending=False).reset_index(drop=True)

wec_data.head(10)

Unnamed: 0,Name,Affiliation,WeightedEigenvectorCentrality
0,"DI FELICE, MARCO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.568878
1,"BONONI, LUCIANO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.562206
2,"BEDOGNI, LUCA",AFORM - AREA FORMAZIONE E DOTTORATO,0.371572
3,"TROTTA, ANGELO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.256683
4,"SALMON CINOTTI, TULLIO",CENTRO RICERCA SISTEMI ELETTRONICI INGEGN.INF....,0.21937
5,"MONTORI, FEDERICO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.175792
6,"D'ELIA, ALFREDO",CENTRO RICERCA SISTEMI ELETTRONICI INGEGN.INF....,0.136345
7,"SCIULLO, LUCA",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.104689
8,"GIGLI, LORENZO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.065988
9,"BELLAVISTA, PAOLO",DIPARTIMENTO DI INFORMATICA - SCIENZA E INGEGN...,0.062306


---

# Other measures

### Clustering

In [8]:
clustering = nx.clustering(G, weight="weight")
clustering_data = pd.DataFrame.from_dict(clustering, 
                                columns=["Clustering"],
                                orient="index")
clustering_data.sort_values(by=["Clustering"], ascending=False)

Unnamed: 0,Clustering
"MONTALI, MARCO",0.108872
"LUCCHI, ROBERTO",0.091796
"MONTECCHIARI, LEONARDO",0.071099
"BRACUTO, MICHELE",0.064661
"ROUHI, RAHIMEH",0.064291
...,...
"GABBRIELLINI, SIMONE",0.000000
"YAGLIKCI, NEFISE GIZEM",0.000000
"LINARES ZAILA, YISLEIDY",0.000000
"BURATTI, GIACOMO",0.000000


The weighted clustering coefficient measures the tendency of nodes in a network to form clusters or groups, taking into account the strengths of connections.

Interpretation:
The table presents nodes with their corresponding weighted clustering coefficients, indicating the level of clustering around each node.

Implications:
Nodes with higher weighted clustering coefficients have a greater tendency to form tightly-knit clusters in the network.
These individuals may be central in cohesive groups, indicating potential community structures.

Considerations:
A higher weighted clustering coefficient suggests stronger connections within local neighborhoods of nodes.

In [9]:
avg_cohesion = nx.average_clustering(G, weight="weight")
print("Cohesion: ", avg_cohesion)

Cohesion:  0.014078158686333917


The result avg_cohesion = 0.014078158686333917 represents the average clustering coefficient of the graph G when considering edge weights (weight="weight"). Let's break down the interpretation:

Clustering Coefficient:
The clustering coefficient measures the tendency of nodes in a graph to form clusters or groups.

Average Clustering Coefficient:
The average clustering coefficient is the average of the local clustering coefficients across all nodes in the graph.

It ranges from 0 to 1, where:
0 indicates no clustering (nodes are not connected in clusters).
1 indicates maximum clustering (all neighbors of a node are connected to each other).

Interpretation:
In your case, the average clustering coefficient is approximately 0.0141.
This suggests a relatively low level of clustering in the graph when considering edge weights.
Nodes in the graph do not form dense local clusters, indicating a more sparse and interconnected network.

Implications:
Sparse Connectivity: The low average clustering coefficient might indicate that the graph has a sparse or decentralized structure when considering edge weights.

Potential Isolation: Nodes may have connections, but these connections do not necessarily form tight-knit groups or communities.

Considerations:
The interpretation may vary based on the nature and purpose of your network.

Depending on your specific domain or application, a low average clustering coefficient might be expected or might raise further questions about the structure of the graph.

In [18]:
num_connected_components = nx.number_connected_components(G)
print("Connectedness: ", num_connected_components)

Connectedness:  1


Interpretation:
The network consists of 1 connected component.

Observations:
A single connected component implies that all nodes in the network are connected, forming a unified structure.

Implications:
Information and influence can easily flow between any pair of nodes within the network.
The network is highly cohesive and lacks isolated clusters of nodes.

Considerations:
The connectedness of the network may impact information diffusion, collaboration, and overall communication dynamics.
Understanding the connected components helps in assessing the global structure of the network.

In [11]:
def calc_compactness(graph):
    shortest_path_lengths = dict(nx.all_pairs_shortest_path_length(graph))

    total_compactness = 0
    total_pairs = 0

    for source, lengths in shortest_path_lengths.items():
        for target, distance in lengths.items():
            if source != target:
                total_compactness += 1 / distance
                total_pairs += 1

    if total_pairs == 0:
        return 0  # Avoid division by zero

    return total_compactness / total_pairs

compactness = calc_compactness(G)
print("Compactness: ", compactness)

Compactness:  0.28515237325619924


Compactness in a network refers to the efficiency of information flow, indicating how closely connected nodes are to each other.

Interpretation:
The compactness of the network is approximately 0.285.

Observations:
A compactness value closer to 1 suggests a highly interconnected network with efficient information flow.
A lower compactness value, like the one observed, indicates a network with some level of separation between nodes.

Implications:
The network may have distinct clusters or communities that are not fully connected.
Information diffusion may encounter some barriers, affecting the overall efficiency of communication.

Considerations:
Assessing compactness helps in understanding how well the network facilitates the exchange of information.
Depending on the network's purpose, a certain level of separation between nodes may be expected or could be an area for improvement.

In [12]:
transitivity = nx.transitivity(G)
print("Transitivity: ", transitivity)

Transitivity:  0.4332347525249707


Network Transitivity Analysis:
Transitivity in a network measures the likelihood that two nodes connected to the same node are also connected to each other. It provides insights into the clustering tendency of nodes.

Interpretation:
The transitivity of the network is approximately 0.433.

Observations:
Transitivity values range from 0 to 1.
A higher transitivity indicates a higher probability of nodes forming clusters.

Implications:
The network exhibits a moderate level of clustering.
Nodes in the network are somewhat likely to form local clusters or groups.

Considerations:
A higher transitivity might suggest the presence of well-defined communities.
The interpretation may vary based on the specific goals and characteristics of the network.

In [None]:

import matplotlib.colors as mcolors
from matplotlib.cm import ScalarMappable
import json
import pandas as pd
import numpy as np
import networkx as nx
from pprint import pprint
import matplotlib.pyplot as plt2
k_number = nx.k_core(G, None)

node_affiliations = nx.get_node_attributes(k_number, "affiliation")

unique_affiliations = set(node_affiliations.values())
node_labels = nx.get_node_attributes(k_number, "label")

norm = mcolors.Normalize(vmin=0, vmax=len(unique_affiliations))
scalar_map = ScalarMappable(norm=norm, cmap="viridis")
color_palette = {affiliation: scalar_map.to_rgba(i) for i, affiliation in enumerate(unique_affiliations)}

pos = nx.spring_layout(k_number, iterations=100, weight="weight", k=2)
plt.figure(figsize=(30, 30))

# Draw nodes with color, size, and labels for each node in the graph
node_sizes = [100 * k_number.degree(node) for node in k_number.nodes]
node_colors = [color_palette[node_affiliations[node]] for node in k_number.nodes]

nx.draw_networkx_nodes(k_number, pos, node_color=node_colors, node_size=node_sizes, alpha=0.7)
nx.draw_networkx_edges(k_number, pos, alpha=0.7)

# Initialize labels with empty strings for all nodes
labels = {node: node_labels[node] if node_labels.get(node) else '' for node in k_number.nodes}
bbox_props = {'boxstyle': 'round', 'facecolor': 'white', 'edgecolor': 'black', 'linewidth': 1, 'pad': 0.5, 'alpha': 1}

# Label nodes based on certain criteria (e.g., high degree centrality)
selected_nodes = [node for node, degree in k_number.degree() if degree > 10]
labels = {node: node for node in selected_nodes}
nx.draw_networkx_labels(k_number, pos, labels=labels, font_size=14, font_color='black', verticalalignment='top', bbox=bbox_props)

# Display the final graph
plt.show()

In [2]:
core_number = nx.core_number(G)
k_data = pd.DataFrame.from_dict(core_number,
                                    columns=["KCore"],
                                    orient="index")
k_data.sort_values(by=["KCore"], ascending=False)

NameError: name 'nx' is not defined

K-core decomposition is a graph analysis method that identifies the core of a network by iteratively removing nodes with the lowest degrees.

Interpretation:
The K-Core decomposition reveals core numbers for each node, representing the level of connectedness within the network.

Observations:
Nodes in the network are assigned a K-Core value, where higher values indicate a stronger level of connectivity within the core structure.

Implications:
Nodes with higher core numbers play a more central role in maintaining the overall connectivity of the network.
The network's core structure is essential for understanding its robustness and resilience.

Considerations:
K-core decomposition helps identify the backbone of the network.
Core numbers can provide insights into the hierarchical organization of nodes.

Note:
The K-core decomposition results in core numbers ranging from 1 to 22. Higher core numbers indicate nodes with increased connectivity within the core structure, highlighting their importance in maintaining network integrity

In [17]:
communities = nx.algorithms.community.greedy_modularity_communities(G, weight="weight")
community_mapping = {}
for i, community in enumerate(communities):
    for node in community:
        community_mapping[node] = i
        

c_data = pd.DataFrame.from_dict(community_mapping,
                                    columns=["Communities"],
                                    orient="index")
c_data.sort_values(by=["Communities"], ascending=False)

Unnamed: 0,Communities
"FERRETTI, STEFANO",11
"GALLEGUILLOS MICCONO, CRISTIAN ALEJANDRO",11
"BRIDI, THOMAS",11
"BORGHESI, ANDREA",11
"SIRBU, ALINA",11
...,...
"TAMBURINI, FABIO",0
"NAVIGLI, ROBERTO",0
"PEDRETTI, CARLO TEO",0
"CERVONE, LUCA",0


Greedy Modularity Communities:

Community Assignment:
The algorithm assigns nodes to communities based on the concept of modularity, aiming to maximize the quality of community structure.
Each node is placed in the community that results in the highest increase in modularity.

Community Mapping:
The result you provided is a mapping of nodes to their assigned communities, represented by numeric labels (e.g., 14, 13, 12).
Each node is associated with the community to which it belongs.

Interpreting the Results:

Community Labels: Nodes with the same label (e.g., 11) belong to the same community.

Community Sizes: Some communities might have more members than others. For example, nodes labeled 0 may represent smaller or less cohesive communities.

Modularity and Weighted Graphs:

Modularity Definition: Modularity measures the quality of a network's division into communities. Higher modularity values indicate a better community structure.

Weighted Graphs: When the graph is weighted, the strength of connections between nodes is considered. Weighted modularity takes into account both the presence and strength of edges.

Why Weight Changes the Result:

Edge Strength Influence: In a weighted graph, the strength of connections can significantly impact community detection.

Community Formation: Nodes may be more likely to be grouped together if they share strong weighted connections.

Optimization Objective: The algorithm aims to optimize the modularity score by adjusting community assignments. Weighted edges contribute to this optimization differently than unweighted edges.

Greedy Modularity Algorithm:

Basic Idea: Greedy Modularity algorithms iteratively add or remove nodes from communities to maximize the modularity score.

Steps:

Start with each node in its own community.
Greedily merge or split communities to maximize the modularity score.
Repeat until modularity cannot be further improved.
Considerations:

The choice of community detection algorithm depends on the specific characteristics and goals of your network.

The interpretation of community assignments may be context-dependent, and the results should be analyzed in conjunction with domain knowledge.

In [20]:
def homophily(G):
    num_same_ties = 0
    num_diff_ties = 0
    for n1, n2 in G.edges():
        if G.nodes[n1]['affiliation'] == G.nodes[n2]['affiliation']:
            num_same_ties += 1
        else:
            num_diff_ties += 1
    return (num_same_ties / (num_same_ties + num_diff_ties))
print("Homophily:", homophily(G))

Homophily: 0.33466733366683343


Homophily is a measure that assesses the tendency of nodes in a network to connect with others that share similar attributes.

Interpretation:
The calculated homophily coefficient of approximately 0.3347 suggests a moderate level of homophily in the network.

Observations:
The homophily coefficient represents the proportion of edges connecting nodes with similar affiliations compared to all edges in the network.

Implications:
A homophily coefficient greater than 0.5 would indicate a high tendency for nodes to connect with others of the same affiliation.
A coefficient around 0.3347 suggests a moderate, yet discernible, level of homophily.

Considerations:
Homophily coefficients close to 0 indicate a lack of preference for connections between nodes with similar attributes.
Understanding homophily helps in characterizing the social or organizational structure of the network.

Note:
The homophily coefficient of 0.3347 implies that nodes in the network are moderately inclined to form connections with others who share the same affiliation.

In [16]:
prof_list = ['PERONI, SILVIO', 'TOMASI, FRANCESCA', 'VITALI, FABIO', 'PESCARIN, SOFIA', 'GANGEMI, ALDO', 'ITALIA, PAOLA MARIA CARMELA', 'TAMBURINI, FABIO', 'DAQUINO, MARILENA', 'GIALLORENZO, SAVERIO', 'ZUFFRANO, ANNAFELICIA', 'IOVINE, GIULIO', 'BARTOLINI, ILARIA', 'SPEDICATO, GIORGIO', 'PALMIRANI, MONICA', 'BASKAKOVA, EKATERINA', 'FERRIANI, SIMONE']
def affiliation_homophiliy(G, nodes):
    data = []
    for node in nodes:
        affiliation = G.nodes[node]['affiliation']
        neighbors = list(G.neighbors(node))
        total_connections = len(neighbors)
        connections_outside_affiliation = sum(1 for neighbor in neighbors if G.nodes[neighbor]['affiliation'] != affiliation)
        
        ratio = 0 if total_connections == 0 else round(connections_outside_affiliation / total_connections, 2)

        data.append([node, connections_outside_affiliation, affiliation, total_connections, ratio])

    df = pd.DataFrame(data, columns=['name', 'connections_outside_affiliation', 'affiliation', 'total_connections', 'ratio'])
    return df.sort_values(by=['ratio'], ascending=False)
print(affiliation_homophiliy(G, prof_list))


KeyError: 'PESCARIN, SOFIA'