In [22]:
import pandas as pd
import ast
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, RDFS, XSD

# Carica i dati
df_authors = pd.read_csv('../data/authors/authors_final.csv')
df_courses = pd.read_csv('../data/courses/courses.csv')

# Inizializza grafo RDF
g = Graph()
DS = Namespace("http://example.org/ds#")
g.bind("ds", DS)
g.bind("rdfs", RDFS)

# Mappa ORCID → info autore
author_info = {
    str(row['orcid']).strip(): row for _, row in df_authors.iterrows()
}

# Aggiungi corsi e relazioni autore-corso
for _, row in df_courses.iterrows():
    course_name = row['Course']
    course_id = course_name.replace(' ', '_')
    course_uri = URIRef(f"http://example.org/ds/{course_id}")

    g.add((course_uri, RDF.type, DS.Course))
    g.add((course_uri, DS.courseName, Literal(course_name)))
    g.add((course_uri, RDFS.label, Literal(course_name)))

    # Parsa lista di ORCID
    try:
        orcid_list = ast.literal_eval(row['orcid'])
    except Exception as e:
        print(f"⚠️ Errore parsing ORCID per '{course_name}': {e}")
        continue

    for orcid in orcid_list:
        orcid = orcid.strip().replace('"', '').replace("'", "")
        author_uri = URIRef(f"http://example.org/ds/{orcid}")
        
        # Base autore
        g.add((author_uri, RDF.type, DS.Author))
        g.add((author_uri, DS.hasORCID, Literal(orcid)))
        g.add((author_uri, DS.teaches, course_uri))

        # Arricchimento se info presente
        if orcid in author_info:
            info = author_info[orcid]
            name = str(info['Name'])

            g.add((author_uri, DS.fullName, Literal(name)))
            g.add((author_uri, RDFS.label, Literal(name)))  # Mostrato nei tool

            if pd.notna(info['SSD']):
                g.add((author_uri, DS.hasSSD, Literal(info['SSD'])))

            if pd.notna(info['hindex']):
                g.add((author_uri, DS.hasHIndex, Literal(int(info['hindex']), datatype=XSD.integer)))

            if pd.notna(info['openalex id']):
                g.add((author_uri, DS.hasOpenAlexID, Literal(info['openalex id'])))

            if pd.notna(info['topics']):
                g.add((author_uri, DS.hasTopic, Literal(info['topics'])))

            if pd.notna(info['past_institutions_id']):
                g.add((author_uri, DS.pastInstitutionsID, Literal(info['past_institutions_id'])))

            if pd.notna(info['DS Department']):
                g.add((author_uri, DS.hasDSDepartment, Literal(info['DS Department'])))

            if pd.notna(info['ins_id']):
                g.add((author_uri, DS.hasInstitutionID, Literal(info['ins_id'])))
        else:
            print(f"⚠️ ORCID non trovato in df_authors: {orcid}")
        
    # STEP FINALE: aggiungi autori non presenti tra gli insegnanti
    for orcid, info in author_info.items():
        orcid = orcid.strip()
        author_uri = URIRef(f"http://example.org/ds/{orcid}")

        # Se autore non ha nessuna tripla ds:teaches → aggiungilo ora
        if (author_uri, DS.teaches, None) not in g:
            g.add((author_uri, RDF.type, DS.Author))
            g.add((author_uri, DS.hasORCID, Literal(orcid)))
            name = str(info['Name'])
            g.add((author_uri, DS.fullName, Literal(name)))
            g.add((author_uri, RDFS.label, Literal(name)))

            if pd.notna(info['SSD']):
                g.add((author_uri, DS.hasSSD, Literal(info['SSD'])))

            if pd.notna(info['hindex']):
                g.add((author_uri, DS.hasHIndex, Literal(int(info['hindex']), datatype=XSD.integer)))

            if pd.notna(info['openalex id']):
                g.add((author_uri, DS.hasOpenAlexID, Literal(info['openalex id'])))

            if pd.notna(info['topics']):
                g.add((author_uri, DS.hasTopic, Literal(info['topics'])))

            if pd.notna(info['past_institutions_id']):
                g.add((author_uri, DS.pastInstitutionsID, Literal(info['past_institutions_id'])))

            if pd.notna(info['DS Department']):
                g.add((author_uri, DS.hasDSDepartment, Literal(info['DS Department'])))

            if pd.notna(info['ins_id']):
                g.add((author_uri, DS.hasInstitutionID, Literal(info['ins_id'])))
            
# --- Aggiunta dei paper al grafo ---
df_papers = pd.read_csv('../data/papers/papers.csv')  # adatta il path

for _, row in df_papers.iterrows():
    if pd.isna(row['doi']):
        continue

    doi = row['doi'].strip()
    paper_uri = URIRef(f"http://example.org/ds/paper/{doi.replace('/', '_')}")

    g.add((paper_uri, RDF.type, DS.Paper))
    g.add((paper_uri, DS.hasDOI, Literal(doi)))
    g.add((paper_uri, DS.hasTitle, Literal(row['title'])))
    g.add((paper_uri, DS.hasYear, Literal(int(row['year']), datatype=XSD.gYear)))
    g.add((paper_uri, DS.hasType, Literal(row['type'])))
    g.add((paper_uri, DS.hasTopic, Literal(row['topics'])))

    # Associa autori via ORCID
    try:
        orcid_list = ast.literal_eval(row['author_orcids'])
    except Exception as e:
        print(f"⚠️ Errore parsing ORCID in paper {doi}: {e}")
        continue

    for orcid in orcid_list:
        if orcid is None or pd.isna(orcid):
            continue
        orcid = orcid.strip()
        author_uri = URIRef(f"http://example.org/ds/{orcid}")
        g.add((paper_uri, DS.hasAuthor, author_uri))
        g.add((author_uri, DS.authored, paper_uri))  # relazione inversa opzionale

# --- Aggiunta delle istituzioni e collegamento agli autori ---
df_institutions = pd.read_csv('../data/institution/institutions.csv')  # adatta path

# Mappa ID → dati istituzione
institution_info = {
    str(row['ins_id']).strip(): row for _, row in df_institutions.iterrows()
}

# Aggiungi istituzioni al grafo
for ins_id, row in institution_info.items():
    inst_uri = URIRef(ins_id)  # usiamo direttamente l'URI di OpenAlex
    g.add((inst_uri, RDF.type, DS.Institution))
    g.add((inst_uri, DS.institutionName, Literal(row['ins_name'])))
    g.add((inst_uri, DS.institutionType, Literal(row['ins_type'])))
    g.add((inst_uri, DS.institutionCountry, Literal(row['ins_country'])))
    g.add((inst_uri, RDFS.label, Literal(row['ins_name'])))

# Collega autori alle istituzioni (se l'ins_id esiste)
for orcid, info in author_info.items():
    orcid = orcid.strip()
    author_uri = URIRef(f"http://example.org/ds/{orcid}")
    ins_id = str(info['ins_id']).strip()

    if ins_id in institution_info:
        inst_uri = URIRef(ins_id)
        g.add((author_uri, DS.hasInstitution, inst_uri))




# Esporta in Turtle
ttl_output = g.serialize(format="turtle")
print(ttl_output[:2000])  # Anteprima


@prefix ds: <http://example.org/ds#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.org/ds/0000-0001-9601-0403> a ds:Author ;
    rdfs:label "Mirko Cesarini" ;
    ds:fullName "Mirko Cesarini" ;
    ds:hasDSDepartment 1 ;
    ds:hasHIndex 15 ;
    ds:hasInstitution <https://openalex.org/I66752286> ;
    ds:hasInstitutionID "https://openalex.org/I66752286" ;
    ds:hasORCID "0000-0001-9601-0403" ;
    ds:hasOpenAlexID "https://openalex.org/A5049259722" ;
    ds:hasSSD "ING-INF/05" ;
    ds:hasTopic "['Data Quality and Management', 'Semantic Web and Ontologies', 'Data Mining Algorithms and Applications', 'Privacy-Preserving Technologies in Data', 'Advanced Database Systems and Queries', 'Service-Oriented Architecture and Web Services', 'Big Data and Business Intelligence', 'Web Data Mining and Analysis', 'Business Strategy and Innovation', 'Business Process Modeling and Analysis', 'E-Government and Public Serv

In [23]:
ttl_path = "../data/knowledge_base.ttl"
g.serialize(destination=ttl_path, format="turtle")

ttl_path

'../data/knowledge_base.ttl'

In [17]:
from rdflib import Graph

# Esegui la query
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT ?authorName ?courseName
WHERE {
  ?author a ds:Author ;
          ds:fullName ?authorName ;
          ds:teaches ?course .
  ?course a ds:Course ;
          ds:courseName ?courseName .
}
ORDER BY ?courseName
""")

# Stampa i risultati
for row in results:
    print(f"{row.authorName} → {row.courseName}")


Paola Rebora → Big Data in Public Health
Fabio Mercorio → Business Intelligence
Claudio Ferretti → Cybersecurity for Data Science
Andrea Maurino → Data Management
Marco Fattore → Data Science Lab
Dario Pescini → Data Science Lab in Biosciences
Enrico Moretto → Data Science Lab in Business and Marketing
Marco Paganoni → Data Science Lab in Environment and Physics
Davide Paolo Bernasconi → Data Science Lab in Medicine
Luca Presotto → Data Science Lab in Medicine
Matteo Palmonari → Data Semantics
Enza Messina → Decision Models
Simone Bianco → Digital Signal and Image Management
Marco Guerzoni → Economics for Data Science
Gianfranco Forte → Financial Markets Analytics
Gianluca Della Vedova → Foundations of Computer Science
Paolo Napoletano → Foundations of Deep Learning
Pier Giovanni Bissiri → Foundations of Probability and Statistics
Davide Chicco → Green Computing
Gabriele Gianini → Green Computing
Gianna Monti → High Dimensional Data Analysis
Fabio Antonio Stella → Machine Learning
Elis

In [11]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT ?authorName
WHERE {
  ?author a ds:Author ;
          ds:fullName ?authorName ;
          ds:teaches ?course .
  ?course a ds:Course ;
          ds:courseName "Text Mining and Search" .
}
""")

for row in results:
    print(row.authorName)


Gabriella Pasi
Marco Viviani


In [12]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT ?authorName ?openalexID
WHERE {
  ?author a ds:Author ;
          ds:fullName ?authorName ;
          ds:teaches ?course ;
          ds:hasOpenAlexID ?openalexID .
  ?course a ds:Course ;
          ds:courseName "Text Mining and Search" .
}
""")

for row in results:
    print(f"{row.authorName} — {row.openalexID}")


Gabriella Pasi — https://openalex.org/A5066756066
Marco Viviani — https://openalex.org/A5074645479


In [6]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT ?authorName ?topics
WHERE {
  ?author a ds:Author ;
          ds:fullName ?authorName ;
          ds:hasTopic ?topics ;
          ds:teaches ?course .
  ?course a ds:Course ;
          ds:courseName "Text Mining and Search" .
}
""")

for row in results:
    print(f"{row.authorName} — {row.topics}")


Gabriella Pasi — ['Data Management and Algorithms', 'Semantic Web and Ontologies', 'Information Retrieval and Search Behavior', 'Topic Modeling', 'Recommender Systems and Techniques', 'Web Data Mining and Analysis', 'Multi-Criteria Decision Making', 'Advanced Database Systems and Queries', 'Rough Sets and Fuzzy Logic', 'Advanced Text Analysis Techniques', 'Natural Language Processing Techniques', 'Misinformation and Its Impacts', 'Text and Document Classification Technologies', 'Fuzzy Logic and Control Systems', 'Logic, Reasoning, and Knowledge', 'Image Retrieval and Classification Techniques', 'Spam and Phishing Detection', 'Complex Network Analysis Techniques', 'Data Quality and Management', 'Expert finding and Q&A systems', 'Data Mining Algorithms and Applications', 'Advanced Image and Video Retrieval Techniques', 'Biomedical Text Mining and Ontologies', 'Sentiment Analysis and Opinion Mining', 'Big Data and Business Intelligence']
Marco Viviani — ['Misinformation and Its Impacts', 

In [13]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT ?authorName ?topic
WHERE {
  ?author a ds:Author ;
          ds:fullName ?authorName ;
          ds:hasTopic ?topic .
  FILTER(CONTAINS(LCASE(STR(?topic)), "ontologies"))
}
""")

for row in results:
    print(f"{row.authorName} — {row.topic}")


Mirko Cesarini — ['Data Quality and Management', 'Semantic Web and Ontologies', 'Data Mining Algorithms and Applications', 'Privacy-Preserving Technologies in Data', 'Advanced Database Systems and Queries', 'Service-Oriented Architecture and Web Services', 'Big Data and Business Intelligence', 'Web Data Mining and Analysis', 'Business Strategy and Innovation', 'Business Process Modeling and Analysis', 'E-Government and Public Services', 'Management, Economics, and Public Policy', 'Advanced Graph Neural Networks', 'Recommender Systems and Techniques', 'Complex Network Analysis Techniques', 'Graph Theory and Algorithms', 'E-Learning and Knowledge Management', 'Access Control and Trust', 'Text and Document Classification Technologies', 'Internet Traffic Analysis and Secure E-voting', 'Open Education and E-Learning', 'Sentiment Analysis and Opinion Mining', 'Legal and Labor Studies', 'Digital and Cyber Forensics', 'Mobile Agent-Based Network Management']
Davide Chicco — ['Gene expression a

In [14]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT ?authorName ?topic
WHERE {
  ?author a ds:Author ;
          ds:fullName ?authorName ;
          ds:hasTopic ?topic ;
          ds:hasDSDepartment ?dept .
  FILTER(CONTAINS(LCASE(STR(?topic)), "ontologies"))
  FILTER(STR(?dept) = "1")
}
""")

for row in results:
    print(f"{row.authorName} — {row.topic}")


Mirko Cesarini — ['Data Quality and Management', 'Semantic Web and Ontologies', 'Data Mining Algorithms and Applications', 'Privacy-Preserving Technologies in Data', 'Advanced Database Systems and Queries', 'Service-Oriented Architecture and Web Services', 'Big Data and Business Intelligence', 'Web Data Mining and Analysis', 'Business Strategy and Innovation', 'Business Process Modeling and Analysis', 'E-Government and Public Services', 'Management, Economics, and Public Policy', 'Advanced Graph Neural Networks', 'Recommender Systems and Techniques', 'Complex Network Analysis Techniques', 'Graph Theory and Algorithms', 'E-Learning and Knowledge Management', 'Access Control and Trust', 'Text and Document Classification Technologies', 'Internet Traffic Analysis and Secure E-voting', 'Open Education and E-Learning', 'Sentiment Analysis and Opinion Mining', 'Legal and Labor Studies', 'Digital and Cyber Forensics', 'Mobile Agent-Based Network Management']
Davide Chicco — ['Gene expression a

In [18]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT ?paperTitle ?authorName
WHERE {
  ?paper a ds:Paper ;
         ds:hasTitle ?paperTitle ;
         ds:hasAuthor ?author .
  ?author ds:fullName ?authorName .
}
ORDER BY ?paperTitle
""")

for row in results:
    print(f"{row.paperTitle} ← {row.authorName}")


A Hodrick–Prescott filter with automatically selected breaks ← Matteo Pelagatti
A LoRa-Based Mesh Network for Peer-to-Peer Long-Range Communication ← Riccardo Berto
A LoRa-Based Mesh Network for Peer-to-Peer Long-Range Communication ← Paolo Napoletano
A LoRa-Based Mesh Network for Peer-to-Peer Long-Range Communication ← Marco Savi
A Mobile App Leveraging Citizenship Engagement to Perform Anonymized Longitudinal Studies in the Context of COVID-19 Adverse Drug Reaction Monitoring: Development and Usability Study ← Marzia Di Filippo
A Mobile App Leveraging Citizenship Engagement to Perform Anonymized Longitudinal Studies in the Context of COVID-19 Adverse Drug Reaction Monitoring: Development and Usability Study ← Alessandro Avellone
A Mobile App Leveraging Citizenship Engagement to Perform Anonymized Longitudinal Studies in the Context of COVID-19 Adverse Drug Reaction Monitoring: Development and Usability Study ← Michael Belingheri
A Mobile App Leveraging Citizenship Engagement to Perfo

In [20]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT ?paperTitle ?doi
WHERE {
  ?author a ds:Author ;
          ds:fullName "Fabio Mercorio" ;
          ds:authored ?paper .
  ?paper ds:hasTitle ?paperTitle ;
         ds:hasDOI ?doi .
}
ORDER BY ?doi
""")

for row in results:
    print(f"{row.paperTitle} — DOI: {row.doi}")


eXDiL: A Tool for Classifying and eXplaining Hospital Discharge Letters — DOI: 10.1007/978-3-030-57321-8_9
NEO: A Tool for Taxonomy Enrichment with New Emerging Occupations — DOI: 10.1007/978-3-030-62466-8_35
TaxoRef: Embeddings Evaluation for AI-driven Taxonomy Refinement — DOI: 10.1007/978-3-030-86523-8_37
Augmenting XAI with LLMs: A Case Study in Banking Marketing Recommendation — DOI: 10.1007/978-3-031-63787-2_11
SEEDOT: Tool for Enhancing Sentiment Lexicon with Machine Learning — DOI: 10.1007/978-3-031-74633-8_28
GraphLMI: A data driven system for exploring labor market information through graph databases — DOI: 10.1007/s11042-020-09115-x
Embeddings Evaluation Using a Novel Measure of Semantic Similarity — DOI: 10.1007/s12559-021-09987-7
ContrXT: Generating contrastive explanations from any text classifier — DOI: 10.1016/j.inffus.2021.11.016
A survey on XAI and natural language explanations — DOI: 10.1016/j.ipm.2022.103111
XAI for myo-controlled prosthesis: Explaining EMG data for

In [21]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT DISTINCT ?coauthorName
WHERE {
  # Trova paper scritti da Matteo Pelagatti
  ?author a ds:Author ;
          ds:fullName "Matteo Pelagatti" ;
          ds:authored ?paper .

  # Trova altri autori dello stesso paper
  ?paper ds:hasAuthor ?coauthor .
  ?coauthor ds:fullName ?coauthorName .

  # Escludi lui stesso
  FILTER(?coauthor != ?author)
}
ORDER BY ?coauthorName
""")

for row in results:
    print(f"Coautore: {row.coauthorName}")


Coautore: Alessandro Fassò
Coautore: Elisa Ossola
Coautore: Giacomo Sbrana
Coautore: Gianluca Gucciardi
Coautore: Lucia Parisio
Coautore: Manfred Mudelsee
Coautore: Paolo Maranzano


In [27]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT DISTINCT ?coauthorName ?institutionName
WHERE {
  # Trova paper scritti da Matteo Pelagatti
  ?pelagatti a ds:Author ;
             ds:fullName "Matteo Pelagatti" ;
             ds:authored ?paper .

  # Trova altri autori dello stesso paper
  ?paper ds:hasAuthor ?coauthor .
  ?coauthor ds:fullName ?coauthorName .

  # Collegamento istituzione
  ?coauthor ds:hasInstitution ?inst .
  ?inst ds:institutionName ?institutionName .

  # Escludi Pelagatti stesso
  FILTER(?coauthor != ?pelagatti)
}
ORDER BY ?institutionName
""")

for row in results:
    print(f"{row.coauthorName} — {row.institutionName}")


Giacomo Sbrana — NEOMA Business School
Alessandro Fassò — University of Gastronomic Sciences
Elisa Ossola — University of Milano-Bicocca
Lucia Parisio — University of Milano-Bicocca
Paolo Maranzano — University of Milano-Bicocca
Manfred Mudelsee — University of Potsdam
Gianluca Gucciardi — Università degli Studi del Piemonte Orientale “Amedeo Avogadro”


In [28]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT ?institutionName
WHERE {
  ?inst a ds:Institution ;
        ds:institutionCountry "CH" ;
        ds:institutionName ?institutionName .
}
""")

for row in results:
    print(f"Istituzione in CH: {row.institutionName}")


Istituzione in CH: ETH Zurich
Istituzione in CH: Colgate-Palmolive (Switzerland)
Istituzione in CH: University of Applied Sciences and Arts of Southern Switzerland
Istituzione in CH: Università della Svizzera italiana


In [30]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT DISTINCT ?authorETHName ?collaboratorDSName
WHERE {
  # Autore affiliato a ETH Zurich
  ?ethAuthor a ds:Author ;
             ds:hasInstitution ?ethInst ;
             ds:fullName ?authorETHName ;
             ds:authored ?paper .
  ?ethInst ds:institutionName "ETH Zurich" .

  # Coautore sullo stesso paper
  ?paper ds:hasAuthor ?collaborator .
  ?collaborator ds:fullName ?collaboratorDSName ;
                ds:hasDSDepartment "1" .

  # Evita di contare due volte lo stesso autore
  FILTER(?collaborator != ?ethAuthor)
}
ORDER BY ?authorETHName ?collaboratorDSName
""")

for row in results:
    print(f"{row.authorETHName} (ETH Zurich) ⇄ {row.collaboratorDSName} (Data Science Dept)")


In [31]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT DISTINCT ?authorName
WHERE {
  ?author a ds:Author ;
          ds:fullName ?authorName ;
          ds:hasInstitution ?inst .
  ?inst ds:institutionName "ETH Zurich" .
}
""")

for row in results:
    print(f"Autore affiliato a ETH Zurich: {row.authorName}")


Autore affiliato a ETH Zurich: Noemi Gozzi
Autore affiliato a ETH Zurich: D. Giardini


In [40]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT DISTINCT ?coauthorName
WHERE {
  # Trova l'autore Noemi Gozzi
  ?author a ds:Author ;
          ds:fullName ?name ;
          ds:authored ?paper .
  FILTER(CONTAINS(LCASE(STR(?name)), "noemi gozzi"))

  # Trova i coautori sullo stesso paper
  ?paper ds:hasAuthor ?coauthor .
  ?coauthor ds:fullName ?coauthorName .

  FILTER(?coauthor != ?author)
}
ORDER BY ?coauthorName
""")

for row in results:
    print(f"Coautore di Noemi Gozzi: {row.coauthorName}")


Coautore di Noemi Gozzi: Fabio Mercorio
Coautore di Noemi Gozzi: Lorenzo Malandri


In [38]:
results = g.query("""
PREFIX ds: <http://example.org/ds#>

SELECT ?paperTitle ?doi
WHERE {
  ?author a ds:Author ;
          ds:fullName "Noemi Gozzi" ;
          ds:authored ?paper .
  ?paper ds:hasTitle ?paperTitle ;
         ds:hasDOI ?doi .
}
ORDER BY ?doi
""")

for row in results:
    print(f"{row.paperTitle} — DOI: {row.doi}")


XAI for myo-controlled prosthesis: Explaining EMG data for hand gesture classification — DOI: 10.1016/j.knosys.2021.108053
