In [2]:
import pandas as pd
import networkx as nx
from itertools import combinations
from pyvis.network import Network
import re, os

In [3]:
df_informatica = pd.read_csv('./data/informatica.csv')

df_informatica.head()

Unnamed: 0,Authors,Author full names,Author(s) ID,Title,Year,Source title,Volume,Issue,Art. No.,Page start,...,ISBN,CODEN,PubMed ID,Language of Original Document,Abbreviated Source Title,Document Type,Publication Stage,Open Access,Source,EID
0,"Mari, M.; Snidaro, L.","Mari, Marco (59388337700); Snidaro, Lauro (650...",59388337700; 6507499895,Survey of Neural Network Approaches to Target ...,2026,Information Fusion,127,,103789.0,,...,,,,English,Inf. Fusion,Article,Final,All Open Access; Hybrid Gold Open Access,Scopus,2-s2.0-105017548985
1,"Mari, M.; Snidaro, L.","Mari, Marco (59388337700); Snidaro, Lauro (650...",59388337700; 6507499895,Ensemble of KalmanNets with innovation-based a...,2026,Information Fusion,127,,103777.0,,...,,,,English,Inf. Fusion,Article,Final,,Scopus,2-s2.0-105017546340
2,"Ceschia, S.; Di Gaspero, L.; Rosati, R.M.; Sch...","Ceschia, Sara (35193496000); Di Gaspero, Luca ...",35193496000; 6505944235; 57406536600; 6701629145,Multi-neighborhood simulated annealing for the...,2026,International Transactions in Operational Rese...,33,1.0,,38.0,...,,,,English,Int. Trans. Oper. Res.,Article,Final,All Open Access; Hybrid Gold Open Access,Scopus,2-s2.0-85211480324
3,"Pagliarini, R.","Pagliarini, Roberto (25229039800)",25229039800,Differential Flux-Balance Analysis Infers Meta...,2026,Lecture Notes in Computer Science,16051 LNCS,,,155.0,...,9789819698936; 9789819698042; 9789819698110; 9...,,,English,Lect. Notes Comput. Sci.,Conference paper,Final,,Scopus,2-s2.0-105022894987
4,"Madni, H.A.; Shujat, H.; de Nardin, A.; Zottin...","Madni, Hussain Ahmad (57195220347); Shujat, Ha...",57195220347; 59705771800; 57259162600; 5795942...,FsBAD: Data-efficient feature reconstruction f...,2026,Pattern Recognition Letters,199,,,113.0,...,,,,English,Pattern Recogn. Lett.,Article,Final,All Open Access; Hybrid Gold Open Access,Scopus,2-s2.0-105021263351


In [4]:
df = df_informatica.copy()

# Parser robusto: split su ; e , e ricostruisce "Cognome, Nome"
def parse_authors(x):
    if pd.isna(x):
        return []
    parts = re.split(";|,", x)
    parts = [p.strip() for p in parts if p.strip()]
    authors = []
    # Ricostruisco coppie "Cognome, Nome"
    for i in range(0, len(parts), 2):
        if i+1 < len(parts):
            authors.append(f"{parts[i]}, {parts[i+1]}")
    return authors

df["author_list"] = df["Author full names"].apply(parse_authors)

In [5]:
edges = []

for authors in df["author_list"]:
    if len(authors) > 1:
        for a, b in combinations(sorted(authors), 2):
            edges.append((a, b))

print("Collaborazioni (con duplicati):", len(edges))


Collaborazioni (con duplicati): 26173614


In [14]:
G = nx.Graph()

for a, b in edges:
    if G.has_edge(a, b):
        G[a][b]["weight"] += 1
    else:
        G.add_edge(a, b, weight=1)

print("Nodi nel grafo:", G.number_of_nodes())
print("Archi nel grafo:", G.number_of_edges())


Nodi nel grafo: 15392
Archi nel grafo: 12638465


In [15]:
degree = nx.degree_centrality(G)
betw = nx.betweenness_centrality(G)

nx.set_node_attributes(G, degree, "degree")
nx.set_node_attributes(G, betw, "betweenness")


KeyboardInterrupt: 

In [None]:
import community as community_louvain

partition = community_louvain.best_partition(G)
nx.set_node_attributes(G, partition, "community")

In [None]:
net = new Net(height="800px", width="100%", notebook=True)
net.barnes_hut()

for node, data in G.nodes(data=True):
    net.add_node(
        node,
        title=f"{node}<br>Degree: {data['degree']:.3f}<br>Betweenness: {data['betweenness']:.3f}",
        value=max(data["degree"], 0.01)*50,
        group=data["community"]
    )

for a, b, data in G.edges(data=True):
    net.add_edge(a, b, value=data["weight"])

net.show("html/coauthorship_all_authors.html", notebook=False)

"✔ Grafo generato: guarda html/coauthorship_all_authors.html"
