In [11]:
from matplotlib.font_manager import font_scalings
from pyvis.network import Network
import networkx as nx
import itertools
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import re
from collections import Counter

In [8]:
# Carica il dataset completo
df = pd.read_csv("../data/informatica.csv")

In [9]:
# Funzione per estrarre istituzioni da una cella
def extract_institutions(cell):
    if pd.isna(cell):
        return []
    parts = [x.strip() for x in str(cell).split(";")]
    # elimina stringhe vuote
    return [p for p in parts if p]

In [16]:
# Conta le istituzioni
counter = Counter()

for affiliations in df["Affiliations"]:
    inst_list = extract_institutions(affiliations)
    counter.update(inst_list)

# -----------------------------
# 2) Filtra istituzioni con almeno 10 occorrenze
# -----------------------------
min_occ = 10
istituzioni_valid = {inst for inst, c in counter.items() if c >= min_occ}

print(f"Istituzioni presenti almeno {min_occ} volte: {len(istituzioni_valid)}")

Istituzioni presenti almeno 10 volte: 161


In [17]:
G_filt = nx.Graph()

for affiliations in df["Affiliations"]:
    inst_list = extract_institutions(affiliations)

    # Applica il filtro
    inst_list = [inst for inst in inst_list if inst in istituzioni_valid]

    # Se rimane meno di 2 istituzioni, non pu√≤ formarsi un arco
    if len(inst_list) < 2:
        continue

    # aggiungi nodi
    for inst in inst_list:
        if inst not in G_filt:
            G_filt.add_node(inst)

    # crea archi
    for a, b in itertools.combinations(inst_list, 2):
        if G_filt.has_edge(a, b):
            G_filt[a][b]["weight"] += 1
        else:
            G_filt.add_edge(a, b, weight=1)

print("Grafo filtrato creato!")
print(f"Nodi: {G_filt.number_of_nodes()}")
print(f"Archi: {G_filt.number_of_edges()}")

Grafo filtrato creato!
Nodi: 161
Archi: 1988


In [22]:
net = Network(height="900px", width="100%", filter_menu=True, bgcolor="white", font_color="black")
net.barnes_hut()

# Aggiungi nodi
for node in G_filt.nodes():
    net.add_node(
        node,
        label=node,
        title=node,
        size=12,
        color="#4A90E2"
    )

# Aggiungi archi
for u, v, data in G_filt.edges(data=True):
    peso = data.get("weight", 1)
    net.add_edge(
        u, v,
        value=peso,
        title=f"Collaborazioni: {peso}"
    )

output_path = "grafo_istituzioni_filtrate.html"
net.write_html(output_path)
print("File generato:", output_path)
net.save_graph(output_path)

File generato: grafo_istituzioni_filtrate.html
