## KG building 

In [None]:
from rdflib.namespace import RDF, RDFS, XSD, OWL

In [39]:
import pandas as pd
import ast
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, RDFS, XSD, OWL

# === Caricamento dati ===
df_authors = pd.read_csv('../data/authors/authors_final.csv')
df_courses = pd.read_csv('../data/courses/courses.csv')
df_papers = pd.read_csv('../data/papers/papers.csv')
df_wiki = pd.read_csv('../data/institution/institutions_wiki.csv')
df_topic_entities = pd.read_csv('../data/topics/topics.csv')  # topic, wikidata_uri

# === Inizializza grafo ===
g = Graph()
DS = Namespace("http://dsgraph.org/ds#")
g.bind("ds", DS)
g.bind("rdfs", RDFS)
g.bind("owl", OWL)

# === Mappa ORCID → info autore ===
author_info = {
    str(row['orcid']).strip(): row for _, row in df_authors.iterrows()
}

# === Mappa topic → URI Wikidata ===
topic_map = {}
for _, row in df_topic_entities.iterrows():
    topic_label = row["topic"].strip()
    wikidata_uri = row["wikidata_uri"].strip()
    topic_uri = URIRef(f"http://dsgraph.org/ds/topic/{topic_label.replace(' ', '_')}")
    topic_map[topic_label] = topic_uri
    g.add((topic_uri, RDF.type, DS.Topic))
    g.add((topic_uri, RDFS.label, Literal(topic_label)))
    g.add((topic_uri, OWL.sameAs, URIRef(wikidata_uri)))

# === Aggiungi corsi e relazioni con autori ===
for _, row in df_courses.iterrows():
    course_name = row['Course']
    course_id = course_name.replace(' ', '_')
    course_uri = URIRef(f"http://dsgraph.org/ds/{course_id}")
    g.add((course_uri, RDF.type, DS.Course))
    g.add((course_uri, DS.courseName, Literal(course_name)))
    g.add((course_uri, RDFS.label, Literal(course_name)))

    try:
        orcid_list = ast.literal_eval(row['orcid'])
    except Exception as e:
        print(f"⚠️ Errore parsing ORCID per '{course_name}': {e}")
        continue

    for orcid in orcid_list:
        orcid = orcid.strip().replace('"', '').replace("'", "")
        author_uri = URIRef(f"http://dsgraph.org/ds/{orcid}")
        g.add((author_uri, RDF.type, DS.Author))
        g.add((author_uri, DS.hasORCID, Literal(orcid)))
        g.add((author_uri, DS.teaches, course_uri))

        if orcid in author_info:
            info = author_info[orcid]
            g.add((author_uri, DS.fullName, Literal(info['Name'])))
            g.add((author_uri, RDFS.label, Literal(info['Name'])))
            if pd.notna(info['SSD']):
                g.add((author_uri, DS.hasSSD, Literal(info['SSD'])))
            if pd.notna(info['hindex']):
                g.add((author_uri, DS.hasHIndex, Literal(int(info['hindex']), datatype=XSD.integer)))
            if pd.notna(info['openalex id']):
                g.add((author_uri, DS.hasOpenAlexID, Literal(info['openalex id'])))
            if pd.notna(info['past_institutions_id']):
                g.add((author_uri, DS.pastInstitutionsID, Literal(info['past_institutions_id'])))
            if pd.notna(info['DS Department']):
                g.add((author_uri, DS.hasDSDepartment, Literal(info['DS Department'])))
            if pd.notna(info['ins_id']):
                g.add((author_uri, DS.hasInstitutionID, Literal(info['ins_id'])))

# === Aggiungi autori non docenti ===
for orcid, info in author_info.items():
    author_uri = URIRef(f"http://dsgraph.org/ds/{orcid}")
    if (author_uri, DS.teaches, None) not in g:
        g.add((author_uri, RDF.type, DS.Author))
        g.add((author_uri, DS.hasORCID, Literal(orcid)))
        g.add((author_uri, DS.fullName, Literal(info['Name'])))
        g.add((author_uri, RDFS.label, Literal(info['Name'])))
        if pd.notna(info['SSD']):
            g.add((author_uri, DS.hasSSD, Literal(info['SSD'])))
        if pd.notna(info['hindex']):
            g.add((author_uri, DS.hasHIndex, Literal(int(info['hindex']), datatype=XSD.integer)))
        if pd.notna(info['openalex id']):
            g.add((author_uri, DS.hasOpenAlexID, Literal(info['openalex id'])))
        if pd.notna(info['past_institutions_id']):
            g.add((author_uri, DS.pastInstitutionsID, Literal(info['past_institutions_id'])))
        if pd.notna(info['DS Department']):
            g.add((author_uri, DS.hasDSDepartment, Literal(info['DS Department'])))
        if pd.notna(info['ins_id']):
            g.add((author_uri, DS.hasInstitutionID, Literal(info['ins_id'])))

# === Aggiungi paper e collega ai topic ===
for _, row in df_papers.iterrows():
    if pd.isna(row['doi']):
        continue

    doi = row['doi'].strip()
    paper_uri = URIRef(f"http://dsgraph.org/ds/paper/{doi.replace('/', '_')}")
    g.add((paper_uri, RDF.type, DS.Paper))
    g.add((paper_uri, DS.hasDOI, Literal(doi)))
    g.add((paper_uri, DS.hasTitle, Literal(row['title'])))
    g.add((paper_uri, DS.hasYear, Literal(int(row['year']), datatype=XSD.gYear)))
    g.add((paper_uri, DS.hasType, Literal(row['type'])))

    # === Gestione dei topic ===
    topic_field = row['topics']
    if pd.isna(topic_field):
        topic_list = []
    elif topic_field.startswith('['):  # lista in formato stringa
        try:
            topic_list = ast.literal_eval(topic_field)
        except:
            topic_list = []
    else:  # singolo topic come stringa
        topic_list = [topic_field.strip()]

    for topic in topic_list:
        topic_clean = topic.strip()
        if topic_clean in topic_map:
            topic_uri = topic_map[topic_clean]
        else:
            topic_uri = URIRef(f"http://dsgraph.org/ds/topic/{topic_clean.replace(' ', '_')}")
            g.add((topic_uri, RDF.type, DS.Topic))
            g.add((topic_uri, RDFS.label, Literal(topic_clean)))
        g.add((paper_uri, DS.hasTopic, topic_uri))

    # === Aggiungi autori al paper ===
    try:
        orcid_list = ast.literal_eval(row['author_orcids'])
    except Exception as e:
        print(f"⚠️ Errore parsing ORCID in paper {doi}: {e}")
        continue

    for orcid in orcid_list:
        if orcid and pd.notna(orcid):
            orcid = orcid.strip()
            author_uri = URIRef(f"http://dsgraph.org/ds/{orcid}")
            g.add((paper_uri, DS.hasAuthor, author_uri))
            g.add((author_uri, DS.authored, paper_uri))

# === Aggiungi istituzioni da df_wiki ===
institution_info = {
    str(row['ins_id']).strip(): row for _, row in df_wiki.iterrows()
}

for ins_id, row in institution_info.items():
    inst_uri = URIRef(ins_id)
    g.add((inst_uri, RDF.type, DS.Institution))
    g.add((inst_uri, DS.institutionName, Literal(row['ins_name'])))
    g.add((inst_uri, DS.institutionType, Literal(row['ins_type'])))
    g.add((inst_uri, DS.institutionCountry, Literal(row['ins_country'])))
    g.add((inst_uri, RDFS.label, Literal(row['ins_name'])))
    if pd.notna(row['wikidata_id']) and row['wikidata_id'].strip():
        g.add((inst_uri, OWL.sameAs, URIRef(row['wikidata_id'].strip())))

# === Collega autori alle istituzioni ===
for orcid, info in author_info.items():
    author_uri = URIRef(f"http://dsgraph.org/ds/{orcid}")
    ins_id = str(info['ins_id']).strip()
    if ins_id in institution_info:
        inst_uri = URIRef(ins_id)
        g.add((author_uri, DS.hasInstitution, inst_uri))

# === Aggiungi semantica base (ontologia) ===
g.add((DS.Author, RDFS.subClassOf, DS.Person))
g.add((DS.authored, RDFS.domain, DS.Author))
g.add((DS.authored, RDFS.range, DS.Paper))
g.add((DS.hasAuthor, RDFS.domain, DS.Paper))
g.add((DS.hasAuthor, RDFS.range, DS.Author))
g.add((DS.authored, OWL.inverseOf, DS.hasAuthor))
g.add((DS.hasTopic, RDFS.domain, DS.Paper))
g.add((DS.hasTopic, RDFS.range, DS.Topic))

# === Serializza su file TTL ===
ttl_output = g.serialize(format="turtle")
print(ttl_output[:2000])  # Anteprima

ttl_path = "../data/DSkg.ttl"
g.serialize(destination=ttl_path, format="turtle")
print(f"✅ Grafo serializzato in: {ttl_path}")


@prefix ds: <http://dsgraph.org/ds#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ds:authored rdfs:domain ds:Author ;
    rdfs:range ds:Paper ;
    owl:inverseOf ds:hasAuthor .

ds:hasTopic rdfs:domain ds:Paper ;
    rdfs:range ds:Topic .

<http://dsgraph.org/ds/0000-0001-9601-0403> a ds:Author ;
    rdfs:label "Mirko Cesarini" ;
    ds:fullName "Mirko Cesarini" ;
    ds:hasDSDepartment 1 ;
    ds:hasHIndex 15 ;
    ds:hasInstitution <https://openalex.org/I66752286> ;
    ds:hasInstitutionID "https://openalex.org/I66752286" ;
    ds:hasORCID "0000-0001-9601-0403" ;
    ds:hasOpenAlexID "https://openalex.org/A5049259722" ;
    ds:hasSSD "ING-INF/05" ;
    ds:pastInstitutionsID "['https://openalex.org/I4210135780', 'https://openalex.org/I30771326', 'https://openalex.org/I189158943', 'https://openalex.org/I93860229']" ;
    ds:teaches <http://dsgraph.org/ds/Service_Science> .



In [31]:
## Create the turtle file
#DeductiveClosure(OWLRL_Semantics).expand(g)
ttl_path = "../data/DSkg.ttl"
g.serialize(destination=ttl_path, format="turtle")

ttl_path

'../data/DSkg.ttl'

### Example queries

In [21]:
results = g.query("""
PREFIX ds: <http://dsgraph.org/ds#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT DISTINCT ?personName
WHERE {
  ?person a ds:Person ;
          ds:teaches ?course ;
          ds:fullName ?personName .
}
""")

for row in results:
    print(row.personName)


Enrico Moretto
Marco Paganoni
Mirko Cesarini
Fulvia Pennoni
Paolo Napoletano
Davide Chicco
Gianluca Della Vedova
Gianna Monti
Matteo Palmonari
Simone Bianco
Luca Presotto
Enza Messina
Marco Fattore
Davide Paolo Bernasconi
Andrea Maurino
Matteo Pelagatti
Dario Pescini
Fabio Antonio Stella
Marco Guerzoni
Pier Giovanni Bissiri
Gabriele Gianini
Fabio Mercorio
Gabriella Pasi
Marco Viviani
Claudio Ferretti
Gianfranco Forte
Elisabetta Fersini
Michele Ciavotta
Paola Rebora


In [24]:
results = g.query("""
PREFIX ds: <http://dsgraph.org/ds#>
SELECT DISTINCT ?paper ?authorName
WHERE {
  ?paper a ds:Paper ;
         ds:hasAuthor ?author .
  ?author ds:fullName ?authorName .
}
LIMIT 10
""")

for row in results:
    print(f"{row.paper} — {row.authorName}")


http://dsgraph.org/ds/paper/10.1016_j.jhep.2023.09.008 — Riccardo De Carlis
http://dsgraph.org/ds/paper/10.1016_j.jhep.2023.09.008 — Andrea Lauterio
http://dsgraph.org/ds/paper/10.1016_j.jhep.2023.09.008 — Davide Paolo Bernasconi
http://dsgraph.org/ds/paper/10.1016_j.jhep.2023.09.008 — C. Burcin Taner
http://dsgraph.org/ds/paper/10.3390_curroncol28060391 — Nicolò Tamini
http://dsgraph.org/ds/paper/10.3390_curroncol28060391 — Luca Gianotti
http://dsgraph.org/ds/paper/10.3390_curroncol28060391 — Davide Paolo Bernasconi
http://dsgraph.org/ds/paper/10.3390_cancers13071745 — Nicolò Tamini
http://dsgraph.org/ds/paper/10.3390_cancers13071745 — Davide Paolo Bernasconi
http://dsgraph.org/ds/paper/10.3390_cancers13071745 — Lorenzo Ripamonti


In [25]:

results = g.query("""
PREFIX ds: <http://dsgraph.org/ds#>

SELECT ?authorName ?courseName
WHERE {
  ?author a ds:Author ;
          ds:fullName ?authorName ;
          ds:teaches ?course .
  ?course a ds:Course ;
          ds:courseName ?courseName .
}
ORDER BY ?courseName
""")

# Stampa i risultati
for row in results:
    print(f"{row.authorName} → {row.courseName}")


Paola Rebora → Big Data in Public Health
Fabio Mercorio → Business Intelligence
Claudio Ferretti → Cybersecurity for Data Science
Andrea Maurino → Data Management
Marco Fattore → Data Science Lab
Dario Pescini → Data Science Lab in Biosciences
Enrico Moretto → Data Science Lab in Business and Marketing
Marco Paganoni → Data Science Lab in Environment and Physics
Davide Paolo Bernasconi → Data Science Lab in Medicine
Luca Presotto → Data Science Lab in Medicine
Matteo Palmonari → Data Semantics
Enza Messina → Decision Models
Simone Bianco → Digital Signal and Image Management
Marco Guerzoni → Economics for Data Science
Gianfranco Forte → Financial Markets Analytics
Gianluca Della Vedova → Foundations of Computer Science
Paolo Napoletano → Foundations of Deep Learning
Pier Giovanni Bissiri → Foundations of Probability and Statistics
Davide Chicco → Green Computing
Gabriele Gianini → Green Computing
Gianna Monti → High Dimensional Data Analysis
Fabio Antonio Stella → Machine Learning
Elis

In [26]:
results = g.query("""
PREFIX ds: <http://dsgraph.org/ds#>

SELECT ?authorName
WHERE {
  ?author a ds:Author ;
          ds:fullName ?authorName ;
          ds:teaches ?course .
  ?course a ds:Course ;
          ds:courseName "Text Mining and Search" .
}
""")

for row in results:
    print(row.authorName)


Gabriella Pasi
Marco Viviani


In [27]:
results = g.query("""
PREFIX ds: <http://dsgraph.org/ds#>

SELECT ?paperTitle ?authorName
WHERE {
  ?paper a ds:Paper ;
         ds:hasTitle ?paperTitle ;
         ds:hasAuthor ?author .
  ?author ds:fullName ?authorName .
}
ORDER BY ?paperTitle
""")

for row in results:
    print(f"{row.paperTitle} ← {row.authorName}")


A Hodrick–Prescott filter with automatically selected breaks ← Matteo Pelagatti
A LoRa-Based Mesh Network for Peer-to-Peer Long-Range Communication ← Riccardo Berto
A LoRa-Based Mesh Network for Peer-to-Peer Long-Range Communication ← Paolo Napoletano
A LoRa-Based Mesh Network for Peer-to-Peer Long-Range Communication ← Marco Savi
A Mobile App Leveraging Citizenship Engagement to Perform Anonymized Longitudinal Studies in the Context of COVID-19 Adverse Drug Reaction Monitoring: Development and Usability Study ← Marzia Di Filippo
A Mobile App Leveraging Citizenship Engagement to Perform Anonymized Longitudinal Studies in the Context of COVID-19 Adverse Drug Reaction Monitoring: Development and Usability Study ← Alessandro Avellone
A Mobile App Leveraging Citizenship Engagement to Perform Anonymized Longitudinal Studies in the Context of COVID-19 Adverse Drug Reaction Monitoring: Development and Usability Study ← Michael Belingheri
A Mobile App Leveraging Citizenship Engagement to Perfo

In [40]:
from rdflib import Graph, Namespace
from rdflib.namespace import RDFS, OWL

# Namespace
DS = Namespace("http://dsgraph.org/ds#")

# Query SPARQL
query = """
PREFIX ds: <http://dsgraph.org/ds#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

SELECT ?paper ?title ?wikidataTopic
WHERE {
  ?paper a ds:Paper ;
         ds:hasTitle ?title ;
         ds:hasTopic ?topic .
  ?topic rdfs:label "Computer Vision and Pattern Recognition" .
  OPTIONAL { ?topic owl:sameAs ?wikidataTopic }
}
"""

# Esecuzione
results = g.query(query)

# Stampa risultati
for row in results:
    print(f"📄 Paper: {row.title}")
    print(f"🔗 URI: {row.paper}")
    if row.wikidataTopic:
        print(f"🌐 Wikidata: {row.wikidataTopic}")
    print("---")


📄 Paper: Shallow Camera Pipeline for Night Photography Enhancement
🔗 URI: http://dsgraph.org/ds/paper/10.1007_978-3-031-43148-7_5
🌐 Wikidata: https://www.wikidata.org/wiki/Q844240
---
📄 Paper: Evaluation of 3D Reconstruction Pipelines Under Varying Imaging Conditions
🔗 URI: http://dsgraph.org/ds/paper/10.1007_978-3-031-43153-1_8
🌐 Wikidata: https://www.wikidata.org/wiki/Q844240
---
📄 Paper: Scalable Residual Laplacian Network for HEVC-compressed Video Restoration
🔗 URI: http://dsgraph.org/ds/paper/10.1145_3727147
🌐 Wikidata: https://www.wikidata.org/wiki/Q844240
---
📄 Paper: Learning Parametric Functions for Color Image Enhancement
🔗 URI: http://dsgraph.org/ds/paper/10.1007_978-3-030-13940-7_16
🌐 Wikidata: https://www.wikidata.org/wiki/Q844240
---
📄 Paper: Deep Residual Autoencoder for Blind Universal JPEG Restoration
🔗 URI: http://dsgraph.org/ds/paper/10.1109_access.2020.2984387
🌐 Wikidata: https://www.wikidata.org/wiki/Q844240
---
📄 Paper: Video Analytics for Understanding Pedestrian

## Institution

In [13]:
import requests

# ID OpenAlex della Bicocca
openalex_id = "https://openalex.org/I66752286"

# Step 1: recupera i dati OpenAlex
openalex_resp = requests.get("https://api.openalex.org/institutions/I66752286")
data = openalex_resp.json()

# Step 2: prendi il ROR ID
ror_uri = data.get("ror")
if not ror_uri:
    print("Nessun ROR ID trovato.")
else:
    ror_id = ror_uri.strip().split("/")[-1]
    ror_api_url = f"https://api.ror.org/organizations/{ror_id}"

    # Step 3: chiama ROR API
    ror_resp = requests.get(ror_api_url)
    if ror_resp.status_code != 200:
        print("Errore nella chiamata ROR:", ror_resp.status_code)
    else:
        ror_data = ror_resp.json()
        wikidata_ids = ror_data.get("external_ids", {}).get("Wikidata", {}).get("all", [])
        if wikidata_ids:
            wikidata_id = wikidata_ids[0]
            print(f"Wikidata ID: {wikidata_id}")
            print(f"Tripla:")
            print(f"<{openalex_id}> owl:sameAs <http://www.wikidata.org/entity/{wikidata_id} .")
        else:
            print("Nessun Wikidata ID trovato.")


Wikidata ID: Q1073674
Tripla:
<https://openalex.org/I66752286> owl:sameAs <http://www.wikidata.org/entity/Q1073674 .


In [18]:
import pandas as pd
import requests


df = pd.read_csv('../data/institution/institutions.csv')


# Funzione per ottenere Wikidata ID da OpenAlex → ROR → Wikidata
def get_wikidata_id_from_openalex(openalex_url):
    try:
        # Estrai ID OpenAlex
        openalex_id = openalex_url.split("/")[-1]
        openalex_resp = requests.get(f"https://api.openalex.org/institutions/{openalex_id}")
        if openalex_resp.status_code != 200:
            return None
        data = openalex_resp.json()
        ror_uri = data.get("ror")
        if not ror_uri:
            return None
        ror_id = ror_uri.strip().split("/")[-1]
        ror_resp = requests.get(f"https://api.ror.org/organizations/{ror_id}")
        if ror_resp.status_code != 200:
            return None
        ror_data = ror_resp.json()
        wikidata_ids = ror_data.get("external_ids", {}).get("Wikidata", {}).get("all", [])
        if wikidata_ids:
            return f"http://www.wikidata.org/entity/{wikidata_ids[0]}"
        return None
    except Exception:
        return None

# Applica la funzione a ogni riga
df["wikidata_id"] = df["ins_id"].apply(get_wikidata_id_from_openalex)



In [21]:
df.to_csv('../data/institution/institutions_wiki.csv')

In [20]:
df[df["wikidata_id"].isna()]


Unnamed: 0,ins_id,ins_name,ins_type,ins_country,wikidata_id
3,https://openalex.org/I4210110840,Azienda Ospedaliera San Gerardo,healthcare,IT,
5,https://openalex.org/I4210153126,Istituti di Ricovero e Cura a Carattere Scient...,healthcare,IT,
7,https://openalex.org/I4210151645,Policlinico San Matteo Fondazione,healthcare,IT,
9,https://openalex.org/I2277624104,Fondazione Bruno Kessler,funder,IT,
13,https://openalex.org/I4210139705,Ingegneria dei Sistemi (Italy),company,IT,
21,https://openalex.org/I4210105192,United Institute of Informatics Problems,facility,BY,
22,https://openalex.org/I4210117802,Institute of Electronics,nonprofit,BG,
24,,,,,
37,https://openalex.org/I4210125301,Health Awareness (United States),company,US,
62,https://openalex.org/I4210095629,Institute of Molecular Bioimaging and Physiology,facility,IT,


In [22]:
df

Unnamed: 0,ins_id,ins_name,ins_type,ins_country,wikidata_id
0,https://openalex.org/I138689650,University of Padua,funder,IT,http://www.wikidata.org/entity/Q193510
1,https://openalex.org/I4210094195,Azienda Socio Sanitaria Territoriale Grande Os...,healthcare,IT,http://www.wikidata.org/entity/Q3886620
2,https://openalex.org/I4210146710,Mayo Clinic in Florida,healthcare,US,http://www.wikidata.org/entity/Q6797499
3,https://openalex.org/I4210110840,Azienda Ospedaliera San Gerardo,healthcare,IT,
4,https://openalex.org/I66752286,University of Milano-Bicocca,funder,IT,http://www.wikidata.org/entity/Q1073674
...,...,...,...,...,...
122,https://openalex.org/I108290504,University of Pisa,funder,IT,http://www.wikidata.org/entity/Q645663
123,https://openalex.org/I4210156583,Laboratoire d'Informatique de Paris-Nord,facility,FR,http://www.wikidata.org/entity/Q3214424
124,https://openalex.org/I135117807,Université de Sherbrooke,funder,CA,http://www.wikidata.org/entity/Q2579532
125,https://openalex.org/I186771145,Covenant University,funder,NG,http://www.wikidata.org/entity/Q742241


add to graph

In [37]:
from rdflib import Graph, URIRef
from rdflib.namespace import OWL
import pandas as pd

# Carica il dataset (modifica il percorso se necessario)
df = pd.read_csv("../data/institution/institutions_wiki.csv")

# Inizializza il grafo RDF
g 

# Aggiungi solo le triple owl:sameAs se wikidata_id è presente
for _, row in df.iterrows():
    wikidata_id = row.get("wikidata_id")
    if pd.notna(wikidata_id) and wikidata_id.strip() != "":
        openalex_uri = URIRef(row["ins_id"].strip())
        wikidata_uri = URIRef(wikidata_id.strip())
        g.add((openalex_uri, OWL.sameAs, wikidata_uri))


In [31]:
from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""
SELECT ?rank ?year ?rankingLabel
WHERE {
  wd:Q1073674 p:P1352 ?rankStatement .
  ?rankStatement ps:P1352 ?rank .
  ?rankStatement pq:P585 ?year .
  ?rankStatement pq:P459 ?rankingMethod .
  
  ?rankingMethod rdfs:label ?rankingLabel .
  FILTER(LANG(?rankingLabel) = "en")
  FILTER(CONTAINS(?rankingLabel, "QS"))
}
ORDER BY ?year
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
    print(f"Year: {result['year']['value']}, Rank: {result['rank']['value']}, Method: {result['rankingLabel']['value']}")


Year: 2022-01-01T00:00:00Z, Rank: 450, Method: QS World University Rankings
Year: 2024-01-01T00:00:00Z, Rank: 481, Method: QS World University Rankings
Year: 2025-01-01T00:00:00Z, Rank: 513, Method: QS World University Rankings


In [35]:
from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""
SELECT ?countryLabel ?adminLabel ?coord ?address ?cap
WHERE {
  OPTIONAL { wd:Q1073674 wdt:P17 ?country . }
  OPTIONAL { wd:Q1073674 wdt:P131 ?admin . }
  OPTIONAL { wd:Q1073674 wdt:P625 ?coord . }
  OPTIONAL { wd:Q1073674 wdt:P6375 ?address . }
  OPTIONAL { wd:Q1073674 wdt:P281 ?cap . }

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
    country = result.get("countryLabel", {}).get("value", "N/A")
    admin = result.get("adminLabel", {}).get("value", "N/A")
    coord = result.get("coord", {}).get("value", "N/A")
    address = result.get("address", {}).get("value", "N/A")
    cap = result.get("cap", {}).get("value", "N/A")

    print(f"Country: {country}")
    print(f"Administrative area: {admin}")
    print(f"Coordinates: {coord}")
    print(f"Address: {address}")
    print(f"Postal code: {cap}")


Country: Italy
Administrative area: Milan
Coordinates: Point(9.213344 45.518406)
Address: Piazza dell'Ateneo Nuovo 1
Postal code: N/A


## Query con Aggiunta di wikidata

#### Not Federate

In [28]:
from rdflib.namespace import OWL

# Query SPARQL per cercare "ETH Zurich" nel campo ds:institutionName
query_eth = """
PREFIX ds: <http://dsgraph.org/ds#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

SELECT ?wikidata
WHERE {
  ?inst a ds:Institution ;
        ds:institutionName ?name ;
        owl:sameAs ?wikidata .
  FILTER(CONTAINS(LCASE(STR(?name)), "tu wien"))
}
"""

wikidata_uri = None
for row in g.query(query_eth):
    wikidata_uri = str(row.wikidata)
    print(f"🔗 Wikidata URI: {wikidata_uri}")
    break  # prende solo il primo risultato




🔗 Wikidata URI: http://www.wikidata.org/entity/Q689400


In [29]:
from SPARQLWrapper import SPARQLWrapper, JSON

if wikidata_uri:
    wikidata_id = wikidata_uri.split("/")[-1]
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setReturnFormat(JSON)

    sparql.setQuery(f"""
    SELECT ?rank ?year ?rankingLabel
    WHERE {{
      wd:{wikidata_id} p:P1352 ?rankStatement .
      ?rankStatement ps:P1352 ?rank .
      ?rankStatement pq:P585 ?year .
      
    }}
    ORDER BY DESC(?year)
    """)

    print(f"📡 Interrogazione di Wikidata per {wikidata_id} (QS Ranking)...")
    results = sparql.query().convert()

    print("📊 QS World University Rankings – Bicocca:")
    for res in results["results"]["bindings"]:
        year = res["year"]["value"][:4]  # taglia a "2025-01-01" → "2025"
        rank = res["rank"]["value"]
        
        print(f"🗓 {year} → 🏆 Rank: {rank}")
else:
    print("❌ Nessun URI Wikidata trovato per 'Bicocca'")


📡 Interrogazione di Wikidata per Q689400 (QS Ranking)...
📊 QS World University Rankings – Bicocca:
🗓 2025 → 🏆 Rank: 190
🗓 2024 → 🏆 Rank: 184
🗓 2023 → 🏆 Rank: 179
🗓 2022 → 🏆 Rank: 180
🗓 2021 → 🏆 Rank: 191
🗓 2020 → 🏆 Rank: 192
🗓 2019 → 🏆 Rank: 199
🗓 2018 → 🏆 Rank: 182
🗓 2017 → 🏆 Rank: 183
🗓 2016 → 🏆 Rank: 197
🗓 2015 → 🏆 Rank: 246
🗓 2014 → 🏆 Rank: 264
🗓 2012 → 🏆 Rank: 274


#### Federated ON RDF4J

In [None]:
'''
PREFIX ds: <http://example.org/ds#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX bd: <http://www.bigdata.com/rdf#>

SELECT ?rank ?year ?rankingLabel WHERE {
  # Trova l'istituzione nel grafo locale
  ?inst a ds:Institution ;
        ds:institutionName ?name ;
        owl:sameAs ?wikidata .
  FILTER(CONTAINS(LCASE(STR(?name)), "tu wien"))

  # Federata su Wikidata
  SERVICE <https://query.wikidata.org/sparql> {
    ?wikidata p:P1352 ?rankStatement .
    ?rankStatement ps:P1352 ?rank .
    ?rankStatement pq:P585 ?year .
    ?rankStatement pq:P459 ?rankingMethod .
    
    ?rankingMethod rdfs:label ?rankingLabel .
    FILTER(LANG(?rankingLabel) = "en")
    FILTER(CONTAINS(LCASE(STR(?rankingLabel)), "qs"))
  }
}
ORDER BY DESC(?year)


'''


## Add Inference

In [89]:
from collections import defaultdict

# === Inferenze relazioni inverse ===
g.add((DS.teaches, OWL.inverseOf, DS.taughtBy))
g.add((DS.hasInstitution, OWL.inverseOf, DS.hasMember))
g.add((DS.coAuthorWith, RDF.type, OWL.SymmetricProperty))

# === Autori ereditano i topic dei paper ===
for paper in g.subjects(RDF.type, DS.Paper):
    topics = list(g.objects(paper, DS.hasTopic))
    authors = list(g.objects(paper, DS.hasAuthor))
    for author in authors:
        for topic in topics:
            g.add((author, DS.hasTopic, topic))

# === Inferenze coautori ===
coauthorships = set()
for paper in g.subjects(RDF.type, DS.Paper):
    authors = list(g.objects(paper, DS.hasAuthor))
    for i in range(len(authors)):
        for j in range(i + 1, len(authors)):
            a1, a2 = authors[i], authors[j]
            coauthorships.add((a1, a2))
            coauthorships.add((a2, a1))

for a1, a2 in coauthorships:
    g.add((a1, DS.coAuthorWith, a2))

# === Inferenza: istituzioni ereditano i topic dei ricercatori affiliati ===
inst_topics = defaultdict(set)
for author, _, inst in g.triples((None, DS.hasInstitution, None)):
    for _, _, topic in g.triples((author, DS.hasTopic, None)):
        inst_topics[inst].add(topic)

for inst, topics in inst_topics.items():
    for topic in topics:
        g.add((inst, DS.researchFocus, topic))

# === Inferenza: corsi ereditano i topic dei docenti ===
for author, _, course in g.triples((None, DS.teaches, None)):
    for _, _, topic in g.triples((author, DS.hasTopic, None)):
        g.add((course, DS.relatedToTopic, topic))

# === Inferenza: autori ereditano il paese della loro istituzione ===
for author, _, inst in g.triples((None, DS.hasInstitution, None)):
    for _, _, country in g.triples((inst, DS.institutionCountry, None)):
        g.add((author, DS.worksInCountry, country))
