In [None]:
!pip install SPARQLWrapper

Collecting SPARQLWrapper
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl.metadata (2.0 kB)
Collecting rdflib>=6.1.1 (from SPARQLWrapper)
  Downloading rdflib-7.0.0-py3-none-any.whl.metadata (11 kB)
Collecting isodate<0.7.0,>=0.6.0 (from rdflib>=6.1.1->SPARQLWrapper)
  Downloading isodate-0.6.1-py2.py3-none-any.whl.metadata (9.6 kB)
Downloading SPARQLWrapper-2.0.0-py3-none-any.whl (28 kB)
Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m531.9/531.9 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: isodate, rdflib, SPARQLWrapper
Successfully installed SPARQLWrapper-2.0.0 isodate-0.6.1 rdflib-7.0.0


In [None]:
import sys
import json
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

query = """
SELECT DISTINCT ?softwareLabel
                ?subInstanceLabel
                ?developerLabel
                ?subclassOfLabel
                ?partOfLabel
                ?publicationDate
                ?distributedByLabel
                ?operatingSystemLabel
                ?programmedInLabel
                ?copyrightLicenseLabel
                ?editionLabel
                ?editionPartOfLabel
                ?editionPublicationDate
                ?followsLabel
                ?editionProgrammedInLabel
                ?editionCopyrightLicenseLabel
WHERE {
  # Trova tutte le istanze di software di produttività
  ?software wdt:P31 wd:Q17155032.

  VALUES ?developer { wd:Q2283 wd:Q11463 } # Microsoft e Adobe

  # Trova le sotto-istanze di ciascun software di produttività e sviluppatore
  ?subInstance wdt:P31 ?software;
               wdt:P178 ?developer.

  # Ottieni gli attributi aggiuntivi
  OPTIONAL { ?subInstance wdt:P279 ?subclassOf. }
  OPTIONAL { ?subInstance wdt:P361 ?partOf. }
  OPTIONAL { ?subInstance wdt:P577 ?publicationDate. }
  OPTIONAL { ?subInstance wdt:P750 ?distributedBy. }
  OPTIONAL { ?subInstance wdt:P306 ?operatingSystem. }
  OPTIONAL { ?subInstance wdt:P277 ?programmedIn. }
  OPTIONAL { ?subInstance wdt:P275 ?copyrightLicense. }

  # Per ogni edizione o traduzione ottieni data di pubblicazione e follows
  OPTIONAL {
    ?subInstance wdt:P747 ?edition.
    OPTIONAL { ?edition wdt:P577 ?editionPublicationDate. }
    OPTIONAL { ?edition wdt:P155 ?follows. }
    OPTIONAL { ?edition wdt:P361 ?editionPartOf. }
    OPTIONAL { ?edition wdt:P277 ?editionProgrammedIn. }
    OPTIONAL { ?edition wdt:P275 ?editionCopyrightLicense. }
    ?edition rdfs:label ?editionLabel.
    FILTER(LANG(?editionLabel) = "en")
  }

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY ?editionLabel
"""

def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

results = get_results(endpoint_url, query)

# Estre informazioni principali dai risultati
extracted_results = []
for result in results["results"]["bindings"]:
    extracted_result = {
        "softwareLabel": result.get("softwareLabel", {}).get("value", None),
        "subInstanceLabel": result.get("subInstanceLabel", {}).get("value", None),
        "developerLabel": result.get("developerLabel", {}).get("value", None),
        "subclassOfLabel": result.get("subclassOfLabel", {}).get("value", None),
        "partOfLabel": result.get("partOfLabel", {}).get("value", None),
        "publicationDate": result.get("publicationDate", {}).get("value", None),
        "distributedByLabel": result.get("distributedByLabel", {}).get("value", None),
        "operatingSystemLabel": result.get("operatingSystemLabel", {}).get("value", None),
        "programmedInLabel": result.get("programmedInLabel", {}).get("value", None),
        "copyrightLicenseLabel": result.get("copyrightLicenseLabel", {}).get("value", None),
        "editionLabel": result.get("editionLabel", {}).get("value", None),
        "editionPartOfLabel": result.get("editionPartOfLabel", {}).get("value", None),
        "editionPublicationDate": result.get("editionPublicationDate", {}).get("value", None),
        "followsLabel": result.get("followsLabel", {}).get("value", None),
        "editionProgrammedInLabel": result.get("editionProgrammedInLabel", {}).get("value", None),
        "editionCopyrightLicenseLabel": result.get("editionCopyrightLicenseLabel", {}).get("value", None)
    }
    # Rimuove le chiavi con valori None
    filtered_result = {k: v for k, v in extracted_result.items() if v is not None}
    extracted_results.append(filtered_result)

with open('software_kb.json', 'w', encoding='utf-8') as f:
    json.dump(extracted_results, f, ensure_ascii=False, indent=4)

print("I risultati filtrati sono stati salvati nel file 'software_kb_filtered.json'.")


I risultati filtrati sono stati salvati nel file 'software_kb_filtered.json'.


In [None]:
import json

def create_node(nodes, identity_counter, label, properties):
    # Verifica se un nodo con le stesse proprietà esiste già
    for node in nodes:
        if node['label'] == label and node['properties'] == properties:
            return node['identity']
    # Se non esiste, crea un nuovo nodo
    node = {"jtype": "node", "identity": identity_counter, "label": label, "properties": properties}
    nodes.append(node)
    return identity_counter

def create_relationship(relationships, subject, object, name):
    # Verifica se una relazione con lo stesso soggetto, oggetto e nome esiste già
    for relationship in relationships:
        if relationship['subject'] == subject and relationship['object'] == object and relationship['name'] == name:
            return
    # Se non esiste, crea una nuova relazione
    relationship = {"jtype": "relationship", "subject": subject, "object": object, "name": name, "properties": {}}
    relationships.append(relationship)

def convert_kb(input_file, output_file):
    with open(input_file, 'r') as f:
        software_kb = json.load(f)

    nodes = []
    relationships = []
    identity_counter = 0
    node_identity_map = {}

    for entry in software_kb:
        software_identity = create_node(nodes, identity_counter, "Software", {
            "name": entry.get("subInstanceLabel"),
            "license": entry.get("copyrightLicenseLabel"),
            "softwareType": entry.get("softwareLabel"),
            "softwareCategory": entry.get("subclassOfLabel"),
            "presentationDate": entry.get("publicationDate", "").split("T")[0]
        })
        node_identity_map[entry.get("subInstanceLabel")] = software_identity
        identity_counter += 1

        if "developerLabel" in entry:
            stakeholder_identity = create_node(nodes, identity_counter, "Stakeholder", {"name": entry["developerLabel"]})
            identity_counter += 1
            create_relationship(relationships, software_identity, stakeholder_identity, "developedBy")

        if "partOfLabel" in entry:
            part_of_identity = create_node(nodes, identity_counter, "Software", {"name": entry["partOfLabel"]})
            identity_counter += 1
            create_relationship(relationships, software_identity, part_of_identity, "partOf")

        if "distributedByLabel" in entry:
            distributed_by_identity = create_node(nodes, identity_counter, "Software", {"name": entry["distributedByLabel"]})
            identity_counter += 1
            create_relationship(relationships, software_identity, distributed_by_identity, "distributedBy")

        if "operatingSystemLabel" in entry:
            os_identity = create_node(nodes, identity_counter, "OperatingSystem", {"name": entry["operatingSystemLabel"]})
            identity_counter += 1
            create_relationship(relationships, software_identity, os_identity, "executableOn")

        if "programmedInLabel" in entry:
            pl_identity = create_node(nodes, identity_counter, "ProgrammingLanguage", {"name": entry["programmedInLabel"]})
            identity_counter += 1
            create_relationship(relationships, software_identity, pl_identity, "writtenIn")

        if "editionLabel" in entry:
            edition_identity = create_node(nodes, identity_counter, "Software", {
                "name": entry["editionLabel"],
                "presentationDate": entry.get("editionPublicationDate", "").split("T")[0]
            })
            identity_counter += 1
            create_relationship(relationships, software_identity, edition_identity, "hasEdition")

            if "editionPartOfLabel" in entry:
                edition_part_of_identity = create_node(nodes, identity_counter, "Software", {"name": entry["editionPartOfLabel"]})
                identity_counter += 1
                create_relationship(relationships, edition_identity, edition_part_of_identity, "partOf")

            if "followsLabel" in entry:
                follows_identity = create_node(nodes, identity_counter, "Software", {"name": entry["followsLabel"]})
                identity_counter += 1
                create_relationship(relationships, edition_identity, follows_identity, "follows")

            if "editionProgrammedInLabel" in entry:
                edition_pl_identity = create_node(nodes, identity_counter, "ProgrammingLanguage", {"name": entry["editionProgrammedInLabel"]})
                identity_counter += 1
                create_relationship(relationships, edition_identity, edition_pl_identity, "writtenIn")

    with open(output_file, 'w') as f:
        for node in nodes:
            json.dump(node, f)
            f.write("\n")
        for relationship in relationships:
            json.dump(relationship, f)
            f.write("\n")

In [None]:
convert_kb('software_kb.json', 'kb_software_pinto.json')