# 2. Units

## Fetch prefixes name list

In [1]:
import requests
from pprint import pprint
import json
import uuid

sidf_prefixes_res_de = requests.get(
    url="https://si-digital-framework.org/SI/prefixes",
    params={"lang": "de"},
)

# Convert result to dict
sidf_prefixes_de = sidf_prefixes_res_de.json()
print(sidf_prefixes_de)
prefix_name_list = [item["label"] for item in sidf_prefixes_de]
print(prefix_name_list)

[{'label': 'quecto', 'symbol': 'q', 'scalingFactor': 1e-30, 'prefixId': 'quecto', 'scalingFactorStr': '1e-30', 'doi': 'https://doi.org/10.59161/cgpm2022res3e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/quecto'}, {'label': 'ronto', 'symbol': 'r', 'scalingFactor': 1e-27, 'prefixId': 'ronto', 'scalingFactorStr': '1e-27', 'doi': 'https://doi.org/10.59161/cgpm2022res3e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/ronto'}, {'label': 'yocto', 'symbol': 'y', 'scalingFactor': 1e-24, 'prefixId': 'yocto', 'scalingFactorStr': '1e-24', 'doi': 'https://doi.org/10.59161/cgpm1991res4e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/yocto'}, {'label': 'zepto', 'symbol': 'z', 'scalingFactor': 1e-21, 'prefixId': 'zepto', 'scalingFactorStr': '1e-21', 'doi': 'https://doi.org/10.59161/cgpm1991res4e', 'dataType': 'http://

## Fetch all quantities (SI) from Qudt with its units

In [2]:
from utils import sparql_wrapper
from pprint import pprint

sparql_qudt_qk = sparql_wrapper.Sparql(
    endpoint="https://www.qudt.org/fuseki/qudt/sparql",
    src_filepath="../ontology/qudt/sparql/quantitykind.sparql",
    tgt_filepath="../ontology/qudt/data/quantitykind.json",
    debug=False,
)
qudt_qk = sparql_qudt_qk.execQuery()
sparql_qudt_qk.writeJsonFile(data=qudt_qk)
print("num of quantitykind: ", len(qudt_qk["results"]["bindings"]))
pprint(qudt_qk)

Reading SPARQL Query from c:\Users\raeder\Desktop\dev\Gitlab\osw\apps\python-lvl2\quantities-units\src\quantities-units\utils\../ontology/qudt/sparql/quantitykind.sparql
Writing JSON data to c:\Users\raeder\Desktop\dev\Gitlab\osw\apps\python-lvl2\quantities-units\src\quantities-units\utils\../ontology/qudt/data/quantitykind.json
num of quantitykind:  770
{'head': {'vars': ['quantity',
                   'dimension',
                   'broader',
                   'dbpediaMatch',
                   'siExactMatch',
                   'labels',
                   'descriptions',
                   'plainTextDescriptions',
                   'applicableUnits',
                   'ucumCodes']},
 'results': {'bindings': [{'applicableUnits': {'type': 'literal',
                                               'value': 'http://qudt.org/vocab/unit/CentiM2-PER-V-SEC, '
                                                        'http://qudt.org/vocab/unit/M2-PER-V-SEC'},
                           'd

## Split applicableUnits into prefixedUnits and nonPrefixedUnits

In [3]:
from utils import sparql_wrapper
from pprint import pprint
import json
import requests

quant_kind_list = json.load(open("ontology/qudt/data/quantitykind.json", "r"))[
    "results"
]["bindings"]

pprint(quant_kind_list[5]["applicableUnits"])


def get_prefix_list():

    sidf_prefixes_res_de = requests.get(
        url="https://si-digital-framework.org/SI/prefixes",
        params={"lang": "de"},
    )

    # Convert result to dict
    sidf_prefixes_de = sidf_prefixes_res_de.json()
    print(sidf_prefixes_de)
    prefix_name_list = [item["label"] for item in sidf_prefixes_de]
    print(prefix_name_list)
    return prefix_name_list


prefix_name_list = get_prefix_list()


def get_unit_prefix(unit_str):
    for prefix in prefix_name_list:
        if prefix in unit_str.lower():
            return prefix
    return None


def split_prefixed_applicable_units(applicable_units_str, debug=False):
    non_prefixed_units = []
    prefixed_units = []
    for unit_str in applicable_units_str.split(", "):

        if debug:
            print(unit_str)
            print(get_unit_prefix(unit_str))
        if get_unit_prefix(unit_str) == None:
            if debug:
                print("no prefix: ", unit_str)
            non_prefixed_units.append(unit_str)
        else:
            if debug:
                print(
                    "prefix: ",
                    get_unit_prefix(unit_str),
                    "found in ",
                    unit_str,
                )
            prefixed_units.append(unit_str)
    return non_prefixed_units, prefixed_units


all_prefixed_units = []
all_non_prefixed_units = []
for quant_kind in quant_kind_list:
    non_prefixed, prefixed = split_prefixed_applicable_units(
        quant_kind["applicableUnits"]["value"]
    )
    all_prefixed_units = all_prefixed_units + prefixed
    all_non_prefixed_units = all_non_prefixed_units + non_prefixed

all_prefixed_units = list(set(all_prefixed_units))
all_non_prefixed_units = list(set(all_non_prefixed_units))

print(len(all_prefixed_units))
print(len(all_non_prefixed_units))


import re


def remove_kilo(uri=None, debug=False):
    """Function to remove the kilo prefix from a given URI."""
    pattern = re.compile(r"kilo", re.IGNORECASE)
    if debug:
        print(pattern.sub("", uri))
    return pattern.sub("", uri)


unit_str = "http://qudt.org/vocab/unit/KiloGM-PER-M3"
print("Remove kilo test:")
a = remove_kilo(unit_str, debug=False)


def remove_kilo_list(uri_list, debug=False):
    _list = [remove_kilo(uri, debug) for uri in uri_list]
    if debug:
        print(_list)
    return _list


kilo_list = [
    "http://qudt.org/vocab/unit/KiloGM-PER-M3",
    "http://qudt.org/vocab/unit/CentiMOL-PER-KiloGM",
]
print("Remove kilo list test:")
b = remove_kilo_list(kilo_list, debug=True)


def remove_prefix(uri=None, prefix_list=None, debug=False):
    """Function to remove any prefix from a given URI."""
    pattern = re.compile("|".join(prefix_list), re.IGNORECASE)
    if debug:
        print(pattern.sub("", uri))
    return pattern.sub("", uri)


print("Remove any prefix test:")
c = remove_prefix(uri=unit_str, prefix_list=prefix_name_list, debug=True)


def remove_prefix_list(uri_list=None, prefix_list=None, debug=False):
    """Function to remove any prefix from a given list of URIs
    and return the cleaned list with no duplicates."""
    _list = [remove_prefix(uri, prefix_list, debug) for uri in uri_list]
    if debug:
        print(_list)
    return list(set(_list))


print("Remove any prefix list test:")
d = remove_prefix_list(kilo_list, prefix_name_list, debug=True)

# Helper for OSW Domain Check by unit_path
import uuid


def get_qudt_osl_item(
    qudt_uri="http://qudt.org/vocab/unit/GM-PER-M3",
    osl_domain="https://wiki-dev.open-semantic-lab.org/wiki/Item:OSW",
):
    """Helper function to get the OSW item path domain of a given Qudt URI."""
    return osl_domain + str(
        uuid.uuid5(
            namespace=uuid.NAMESPACE_URL,
            name=qudt_uri,
        )
    ).replace("-", "")


get_qudt_osl_item()


def get_deterministic_url_uuid(prefix="", uri=None) -> uuid.UUID:
    """Function to generate a deterministic UUID from a URI and prefix."""
    return uuid.uuid5(namespace=uuid.NAMESPACE_URL, name=f"{prefix}{uri}")


def get_osw_uuid_str(namespace="", _uuid=None) -> str:
    """Function to get the OSW category by URI."""
    return f"{namespace}OSW{str(_uuid).replace('-', '')}"


# TEST: Prefix for uuid characteristic.https://qudt.org/vocab/quantitykind/Pressure
quantity_qudt_speed_uri = "https://qudt.org/vocab/quantitykind/Speed"
prefixed_quantity_qudt_speed_uri = f"characteristic:{quantity_qudt_speed_uri}"
uuid_speed = uuid.uuid5(
    namespace=uuid.NAMESPACE_URL, name=quantity_qudt_speed_uri
)
prefixed_uuid_speed = uuid.uuid5(
    namespace=uuid.NAMESPACE_URL, name=prefixed_quantity_qudt_speed_uri
)
print(uuid_speed)
print(get_deterministic_url_uuid(uri=quantity_qudt_speed_uri))
assert uuid_speed == get_deterministic_url_uuid(uri=quantity_qudt_speed_uri)
print(prefixed_uuid_speed)
print(
    get_deterministic_url_uuid(
        uri=quantity_qudt_speed_uri, prefix="characteristic:"
    )
)
assert prefixed_uuid_speed == get_deterministic_url_uuid(
    uri=quantity_qudt_speed_uri, prefix="characteristic:"
)
print(get_osw_uuid_str(_uuid=uuid_speed))
print(get_osw_uuid_str(namespace="Category:", _uuid=prefixed_uuid_speed))

import os


def export_osw_obj_json(osw_obj_list=None, ontology_name=None, file_name=None):
    """Function to export a list of OSW objects to a JSON file."""
    # Check if all the parameters are provided
    if osw_obj_list is None or ontology_name is None or file_name is None:
        exception_message = "Please provide all the parameters."
        raise Exception(exception_message)
    else:
        # Convert the OSW objects to JSON serializable format
        osw_obj_json_dumpable = [
            json.loads(osw_obj.json()) for osw_obj in osw_obj_list
        ]
        # Convert the JSON serializable format to JSON
        osw_obj_json_dump = json.dumps(osw_obj_json_dumpable, indent=4)
        # Write the JSON to a file
        if not file_name.endswith(".json"):
            file_name += ".json"
        with open(
            os.path.join(
                os.path.abspath(""),  # https://stackoverflow.com/a/54376484
                "ontology",
                ontology_name,
                "data",
                file_name,
            ),
            "w",
        ) as f:
            f.write(osw_obj_json_dump)

{'type': 'literal', 'value': 'http://qudt.org/vocab/unit/RAD-PER-SEC2'}
[{'label': 'quecto', 'symbol': 'q', 'scalingFactor': 1e-30, 'prefixId': 'quecto', 'scalingFactorStr': '1e-30', 'doi': 'https://doi.org/10.59161/cgpm2022res3e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/quecto'}, {'label': 'ronto', 'symbol': 'r', 'scalingFactor': 1e-27, 'prefixId': 'ronto', 'scalingFactorStr': '1e-27', 'doi': 'https://doi.org/10.59161/cgpm2022res3e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/ronto'}, {'label': 'yocto', 'symbol': 'y', 'scalingFactor': 1e-24, 'prefixId': 'yocto', 'scalingFactorStr': '1e-24', 'doi': 'https://doi.org/10.59161/cgpm1991res4e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/yocto'}, {'label': 'zepto', 'symbol': 'z', 'scalingFactor': 1e-21, 'prefixId': 'zepto', 'scalingFactorStr': '1e-21

In [4]:
# Functions to extract the path of a given URL
def get_path(url):
    return url.split("/")[-1]


# def get_path_list(url_list):
#     return [get_path(url) for url in url_list]

# def get_iri_without_path(iri):
#     return iri.split("/")[:-1]


def get_main_string(unit_str, prefix_name_list):
    for prefix in prefix_name_list:
        capitalized_prefix = prefix.capitalize()
        if capitalized_prefix in unit_str:
            return unit_str.replace(capitalized_prefix, "")
    return unit_str


def merge_prefixed_and_non_prefixed_units(
    all_non_prefixed_units, all_prefixed_units, prefix_name_list
):
    unit_dict = {}
    for non_prefixed_unit in all_non_prefixed_units:
        prefixed_units = []
        # Match the non prefixed unit with all the prefixed units
        for prefixed_unit in all_prefixed_units:
            if get_path(non_prefixed_unit) == get_main_string(
                get_path(prefixed_unit), prefix_name_list
            ):
                prefixed_units.append(prefixed_unit)
        unit_dict[non_prefixed_unit] = {"prefixed_units": prefixed_units}

    return unit_dict


unit_dict = merge_prefixed_and_non_prefixed_units(
    all_non_prefixed_units, all_prefixed_units, prefix_name_list
)
pprint(unit_dict)

{'http://qudt.org/vocab/unit/A': {'prefixed_units': ['http://qudt.org/vocab/unit/MegaA',
                                                     'http://qudt.org/vocab/unit/MilliA',
                                                     'http://qudt.org/vocab/unit/PicoA',
                                                     'http://qudt.org/vocab/unit/KiloA',
                                                     'http://qudt.org/vocab/unit/MicroA']},
 'http://qudt.org/vocab/unit/A-HR': {'prefixed_units': ['http://qudt.org/vocab/unit/KiloA-HR',
                                                        'http://qudt.org/vocab/unit/MilliA-HR']},
 'http://qudt.org/vocab/unit/A-M2': {'prefixed_units': []},
 'http://qudt.org/vocab/unit/A-M2-PER-J-SEC': {'prefixed_units': []},
 'http://qudt.org/vocab/unit/A-PER-DEG_C': {'prefixed_units': []},
 'http://qudt.org/vocab/unit/A-PER-J': {'prefixed_units': []},
 'http://qudt.org/vocab/unit/A-PER-M': {'prefixed_units': ['http://qudt.org/vocab/unit/MilliA-PER-

## Fetch OSL Schema to Update Local Model

In [5]:
from osw.core import OSW
from osw.express import OswExpress
import osw.model.entity as model


required_schemas = [
    "Category:OSW99e0f46a40ca4129a420b4bb89c4cc45",  # Unit prefix
    "Category:OSWd2520fa016844e01af0097a85bb25b25",  # Quantity Unit
    "Category:OSW00fbd6feecb5408997ca18d4e681a131",  # Quantity Kind
    "Category:OSW268cc84d3dff4a7ba5fd489d53254cb0",  # Composed Quantity Unit with Unit Prefix (Ausreiser)
    "Category:OSWffe74f291d354037b318c422591c5023",  # Characteristic Type
    "Category:OSW4082937906634af992cf9a1b18d772cf",  # Quantity Value
    "Category:OSWc7f9aec4f71f4346b6031f96d7e46bd7",  # Fundamental Quantiy Value Type
]


def update_local_osw(osw_obj):
    print("fetch schemas")
    osw_obj.fetch_schema(
        OSW.FetchSchemaParam(
            schema_title=required_schemas,
            mode="replace",
        )
    )


if __name__ == "__main__":
    # Authentication
    osw_obj = OswExpress(
        domain="wiki-dev.open-semantic-lab.org",  # cred_filepath=pwd_file_path
    )
    update_local_osw(osw_obj)

  self.pattern = re.compile(self.pattern)
'module 'osw.model.entity' has no attribute 'File''You will be now have to connect to an OSW instance to fetch the dependencies from!
  warn(


Fetch Category:OSW3e3f5dd4f71842fbb8f270e511af8031
Fetch Category:OSWff333fd349af4f65a69100405a9e60c7
Fetch Category:OSW2ac4493f8635481eaf1db961b63c8325
Fetch Category:Item
Fetch Category:Entity
Fetch JsonSchema:Label
Fetch JsonSchema:Label
Fetch JsonSchema:Description
Fetch JsonSchema:Statement
Fetch JsonSchema:Label
Fetch JsonSchema:Meta
Fetch Category:OSW11a53cdfbdc24524bf8ac435cbf65d9d
Fetch Category:OSW05b244d0a669436e96fe4e1631d5a171
Fetch Category:OSWff333fd349af4f65a69100405a9e60c7
Fetch Category:OSW2ac4493f8635481eaf1db961b63c8325
Fetch Category:Item
Fetch Category:Entity
Fetch JsonSchema:Label
Fetch JsonSchema:Label
Fetch JsonSchema:Description
Fetch JsonSchema:Statement
Fetch JsonSchema:Label
Fetch JsonSchema:Meta
ReadAccess
AccessRestrictions
Label
Description
WikiPage
Meta
Entity
ObjectStatement
DataStatement
QuantityStatement
Item
Data
File
LocalFile


{'LocalFile': 'Category:OSW3e3f5dd4f71842fbb8f270e511af8031', 'WikiFile': 'Category:OSW11a53cdfbdc24524bf8ac435cbf65d9d'}
Its recommended to restart the kernel, to apply all changes seamlessly.
  warn(


fetch schemas
Fetch Category:OSW99e0f46a40ca4129a420b4bb89c4cc45
Fetch Category:OSWd02741381aaa4709ae0753a0edc341ce
Fetch Category:OSWcbb09a36336740c6a2cd62db9bf647ec
Fetch Category:Item
Fetch Category:Entity
Fetch JsonSchema:Label
Fetch JsonSchema:Label
Fetch JsonSchema:Description
Fetch JsonSchema:Statement
Fetch JsonSchema:Label
Fetch JsonSchema:Meta
Fetch Category:OSW6ef70c808fb54abbbacb059c285713d4
Fetch Category:OSW93ccae36243542ceac6c951450a81d47
Fetch Category:OSWd2520fa016844e01af0097a85bb25b25
Fetch Category:Item
Fetch Category:Entity
Fetch JsonSchema:Label
Fetch JsonSchema:Label
Fetch JsonSchema:Description
Fetch JsonSchema:Statement
Fetch JsonSchema:Label
Fetch JsonSchema:Meta
Fetch Category:OSW6ef70c808fb54abbbacb059c285713d4
Fetch Category:OSW93ccae36243542ceac6c951450a81d47
Fetch Category:OSW6ef70c808fb54abbbacb059c285713d4
Fetch Category:OSW93ccae36243542ceac6c951450a81d47
ReadAccess
AccessRestrictions
Label
Description
WikiPage
Meta
Characteristic
OntologyRelated
Entit

## Instanciate Example QuantityUnit (former Unit of Measure)

In [6]:
from pprint import pprint

title = "Item:OSWc73852e7049f42e7b5282866f3839f5e"  # Meter


meter_u = osw_obj.load_entity(title)


pprint(meter_u)

Fetch Category:OSW7199b9568d96476cacfe30060c64f371
Fetch Category:OSWd2520fa016844e01af0097a85bb25b25
Fetch Category:Item
Fetch Category:Entity
Fetch JsonSchema:Label
Fetch JsonSchema:Label
Fetch JsonSchema:Description
Fetch JsonSchema:Statement
Fetch JsonSchema:Label
Fetch JsonSchema:Meta
Fetch Category:OSW6ef70c808fb54abbbacb059c285713d4
Fetch Category:OSW93ccae36243542ceac6c951450a81d47
Fetch Category:OSW6ef70c808fb54abbbacb059c285713d4
Fetch Category:OSW93ccae36243542ceac6c951450a81d47
ReadAccess
AccessRestrictions
Label
Description
WikiPage
Meta
Characteristic
OntologyRelated
Entity
ObjectStatement
DataStatement
QuantityStatement
Item
IntangibleItem
Enumeration
PrefixUnit
ComposedQuantityUnitWithUnitPrefix
PrimitiveProperty
ComplexProperty
Category
GeneratedQuantityValue
QuantityValue
FundamentalQuantityValueType
BaseUnitOfMeasure(type=['Category:OSW7199b9568d96476cacfe30060c64f371'], uuid=UUID('c73852e7-049f-42e7-b528-2866f3839f5e'), exact_ontology_match=['https://qudt.org/vocab/

## Fetch Units from Ontology and complete properties of unit dictionary

In [7]:
from utils import sparql_wrapper
from pprint import pprint

sparql_qudt_unit_params = sparql_wrapper.Sparql(
    endpoint="https://www.qudt.org/fuseki/qudt/sparql",
    src_filepath="../ontology/qudt/sparql/units.sparql",
    tgt_filepath="../ontology/qudt/data/units.json",
    debug=False,
)
qudt_units_param_res = sparql_qudt_unit_params.execQuery()
sparql_qudt_unit_params.writeJsonFile(data=qudt_units_param_res)
print(
    f"num results.bindings: {len(qudt_units_param_res['results']['bindings'])}"
)
pprint(qudt_units_param_res)

Reading SPARQL Query from c:\Users\raeder\Desktop\dev\Gitlab\osw\apps\python-lvl2\quantities-units\src\quantities-units\utils\../ontology/qudt/sparql/units.sparql
Writing JSON data to c:\Users\raeder\Desktop\dev\Gitlab\osw\apps\python-lvl2\quantities-units\src\quantities-units\utils\../ontology/qudt/data/units.json
num results.bindings: 950
{'head': {'vars': ['applicableUnit',
                   'dimensionVector',
                   'symbol',
                   'conversionMultiplierSN',
                   'dbpediaMatch',
                   'siExactMatch',
                   'plainTextDescription',
                   'description',
                   'ucumCodes',
                   'applicableSystems',
                   'qlabels']},
 'results': {'bindings': [{'applicableSystems': {'type': 'literal',
                                                 'value': 'http://qudt.org/vocab/sou/SI, '
                                                          'http://qudt.org/vocab/sou/CGS, '
      

In [8]:
# from jsonpath_ng import jsonpath, parse

# # use json path to extract all another info of same level in results.bindings where the identifier is results.bindings[*].applicableUnit.value and the value is "http://qudt.org/vocab/unit#Meter"
# jsonpath_expr = parse(
#     "$.results.bindings[?(@.applicableUnit.value == 'http://qudt.org/vocab/unit#Meter')]"
# )
# meter_unit_info = [match.value for match in jsonpath_expr.find(qudt_units_param_res)]

from jsonpath_ng.ext import parse
import uuid

# Use json path to extract all another info of same level in results.bindings where the identifier is results.bindings[*].applicableUnit.value and the value is "http://qudt.org/vocab/unit#Meter"
jsonpath_expr = parse(
    '$.results.bindings[?(@.applicableUnit.value = "http://qudt.org/vocab/unit/MilliPA-SEC")]'
)
# meter_unit_info = [match.value for match in jsonpath_expr.find(qudt_units_param_res)]
# pprint(meter_unit_info)

pprint(jsonpath_expr.find(qudt_units_param_res)[0].value)


def match_json_path_key(qudt_units_param_res, identifier="", key=""):
    jsonpath_expr = parse(
        f'$.results.bindings[?(@.applicableUnit.value = "{identifier}")].{key}.value'
    )
    return jsonpath_expr.find(qudt_units_param_res)[0].value


def match_object_json_path(qudt_units_param_res=None, identifier=""):
    # print(f"IDENTIFIER: {identifier}")
    jsonpath_expr = parse(
        f'$.results.bindings[?(@.applicableUnit.value = "{identifier}")]'
    )

    return jsonpath_expr.find(qudt_units_param_res)[0].value


def get_prefix_uuid(data=[], prefix=""):
    jsonpath_expr = parse(f'$[?(@.label = "{prefix}")].pid')
    return uuid.uuid5(
        namespace=uuid.NAMESPACE_URL, name=jsonpath_expr.find(data)[0].value
    )


# test
# pprint(
#     match_json_path_key(
#         qudt_units_param_res,
#         identifier="http://qudt.org/vocab/unit/KiloBYTE",
#         key="ucumCodes",
#     )
# )

pprint(
    match_object_json_path(
        qudt_units_param_res=qudt_units_param_res,
        identifier="http://qudt.org/vocab/unit/KiloBYTE",
    )
)

pprint(get_prefix_uuid(sidf_prefixes_de, "kilo"))

{'applicableSystems': {'type': 'literal',
                       'value': 'http://qudt.org/vocab/sou/SI, '
                                'http://qudt.org/vocab/sou/CGS, '
                                'http://qudt.org/vocab/sou/CGS-EMU, '
                                'http://qudt.org/vocab/sou/CGS-GAUSS'},
 'applicableUnit': {'type': 'uri',
                    'value': 'http://qudt.org/vocab/unit/MilliPA-SEC'},
 'conversionMultiplierSN': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
                            'type': 'literal',
                            'value': '0.001e0'},
 'description': {'datatype': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML',
                 'type': 'literal',
                 'value': '0.001-fold of the product of the SI derived unit '
                          'pascal and the SI base unit second'},
 'dimensionVector': {'type': 'uri',
                     'value': 'http://qudt.org/vocab/dimensionvector/A0E0L-1I0M1H0T-1D0'},
 'plainTextDe

## Map Attributes from Ontology Data to OSL Schema

In [9]:
import uuid


def dict_from_comma_separated_list(qlabel):
    parts = qlabel.split(", ")
    ret = {}
    for part in parts:
        value, key = part.split("@")
        ret[key] = value
    return ret


def get_osw_prefix_unit(
    qudt_units_param_res=None, prefixes_list=None, url=None, parent_uuid=None
):

    prefix_unit_dict = match_object_json_path(
        qudt_units_param_res=qudt_units_param_res,
        identifier=url,
    )
    # print(prefix_unit_dict)
    ontology_match_list = [prefix_unit_dict["applicableUnit"]["value"]]
    # print("dbpediaMatch" in prefix_unit_dict.keys())
    # print(prefix_unit_dict.keys())
    if "dbpediaMatch" in prefix_unit_dict.keys():
        ontology_match_list.append(prefix_unit_dict["dbpediaMatch"]["value"])
        # print(prefix_unit_dict["dbpediaMatch"]["value"])
    if "siExactMatch" in prefix_unit_dict:
        ontology_match_list.append(prefix_unit_dict["siExactMatch"]["value"])
    conversion_multiplier = None
    if "conversionMultiplierSN" in prefix_unit_dict:

        conversion_multiplier = prefix_unit_dict["conversionMultiplierSN"][
            "value"
        ]
        # print(conversion_multiplier)

    _uuid = str(uuid.uuid5(namespace=uuid.NAMESPACE_URL, name=url))
    # print(_uuid)
    prefix_unit = model.PrefixUnit(
        uuid=_uuid,
        osw_id="Item:OSW"
        + str(parent_uuid).replace("-", "")
        + "#OSW"
        + _uuid.replace("-", ""),
        prefix="Item:OSW"
        + str(get_prefix_uuid(prefixes_list, get_unit_prefix(url))).replace(
            "-", ""
        ),
        # prefix_symbol="",  # Causes edge case error
        main_symbol=match_json_path_key(
            qudt_units_param_res,
            identifier=url,
            key="symbol",
        ),
        exact_ontology_match=ontology_match_list,
        conversion_factor_from_si=conversion_multiplier,
        description=[{"text": "Description", "lang": "en"}],
    )

    return prefix_unit


def get_osw_quantity_unit_list(unit_dict=None):
    """Function to extract the QuantityUnit objects from the QUDT API."""
    i = 0
    units = []
    # Iteration over the unit_dict to create the QuantityUnit objects
    for non_prefixed_unit_iri, unit_property_dict in unit_dict.items():
        name = non_prefixed_unit_iri.split("/")[-1]

        match_unit_dict = match_object_json_path(
            qudt_units_param_res=qudt_units_param_res,
            identifier=non_prefixed_unit_iri,
        )
        ontology_match_list = [match_unit_dict["applicableUnit"]["value"]]
        if "dbpediaMatch" in match_unit_dict.keys():
            ontology_match_list.append(
                match_unit_dict["dbpediaMatch"]["value"]
            )
        if "siExactMatch" in match_unit_dict:
            ontology_match_list.append(
                match_unit_dict["siExactMatch"]["value"]
            )
        conversion_multiplier = None
        if "conversionMultiplierSN" in match_unit_dict:
            conversion_multiplier = match_unit_dict["conversionMultiplierSN"][
                "value"
            ]
        # sequence of description before plainTextDescription is essential for overwriting
        description_list = None
        plainTextDescription = None
        if "description" in match_unit_dict:
            plainTextDescription = match_unit_dict["description"]["value"]
            description_list = [
                model.Description(
                    text=plainTextDescription,
                    lang="en",
                )
            ]
            # print(description_list)
        # overwrite description if plainTextDescription is present
        if "plainTextDescription" in match_unit_dict:
            plainTextDescription = match_unit_dict["plainTextDescription"][
                "value"
            ]
            description_list = [
                model.Description(
                    text=plainTextDescription,
                    lang="en",
                )
            ]
            # print(description_list)
        if description_list == None:
            i += 1
            # print("No description found for ", name)

        qlabels = match_json_path_key(
            qudt_units_param_res,
            identifier=non_prefixed_unit_iri,
            key="qlabels",
        )

        label_dict = dict_from_comma_separated_list(qlabels)
        ### clean missing "en"
        if "" in label_dict.keys():
            label_dict["en"] = label_dict[""]
            del label_dict[""]

        osw_label_list = [
            model.Label(text=value, lang=key)
            for key, value in label_dict.items()
        ]
        symbol = match_json_path_key(
            qudt_units_param_res,
            identifier=non_prefixed_unit_iri,
            key="symbol",
        )
        _uuid = uuid.uuid5(
            namespace=uuid.NAMESPACE_URL, name=non_prefixed_unit_iri
        )
        # print(_uuid)
        prefix_unit_list = [
            get_osw_prefix_unit(
                qudt_units_param_res=qudt_units_param_res,
                prefixes_list=sidf_prefixes_de,
                url=url,
                parent_uuid=_uuid,
            )
            for url in unit_property_dict["prefixed_units"]
        ]

        unit = model.QuantityUnit(
            uuid=_uuid,
            exact_ontology_match=ontology_match_list,
            name=name,
            label=osw_label_list,
            main_symbol=symbol,
            prefix_units=prefix_unit_list,
            description=description_list,
            conversion_factor_from_si=conversion_multiplier,
        )
        units.append(unit)

    return units

    # pprint(units)
    # print("not described units couter: ", i)
    # print("num of non prfixed units: ", len(unit_dict.items()))


quanityunits = []
quanittyunits = get_osw_quantity_unit_list(unit_dict=unit_dict)

export_osw_obj_json(
    osw_obj_list=quanittyunits,
    ontology_name="osl",
    file_name="quantity_units.json",
)

## Upload Units to DEV WIKI

In [10]:
# # Test of single unit
# # test_unit = units[6]
# # pprint(test_unit.dict())

# from osw.core import OSW

# # Upload single unit to OSW
# # osw_obj.delete_entity(test_unit)
# # osw_obj.store_entity(test_unit)

# # Upload all units to OSW
# # osw_obj.delete_entity(units)
# # osw_obj.store_entity(units)
# osw_obj.store_entity(OSW.StoreEntityParam(entities=units, overwrite=True))

# Quantity Kind

## Check uploaded units with quantities for unit reference

In [11]:
import re

# Helper functions


# Function to lookup existing non prefixed units in a given list of units
def lookup_existing_non_prefixed_units(
    all_non_prefixed_units=None, lookup_list=None
):
    """Function to lookup existing non prefixed units in a given list of units."""
    existing_unit_list = []
    for unit in lookup_list:
        if unit in all_non_prefixed_units:
            existing_unit_list.append(unit)
    return existing_unit_list


# Function to check if any unit has multiple prefixes
def has_multiple_prefixes(unit_uri_list, prefix_list):
    """
    Check if any unit in the unit URI list has more than one prefix from the given prefix list.

    Parameters:
    unit_uri_list (list of str): List of unit URIs to check.
    prefix_list (list of str): List of prefixes to check against.

    Returns:
    tuple: A tuple containing two lists:
        - units_with_multiple_prefixes: List of units that have more than one prefix.
        - units_with_no_or_single_prefix: List of units that have no or a single prefix.
    """
    units_with_multiple_prefixes = []
    units_with_no_or_single_prefix = []
    prefix_counter = 0

    for unit_uri in unit_uri_list:
        # Detect if the unit has multiple prefixes using regex
        prefix_counter = len(
            re.findall("|".join(prefix_list), unit_uri, re.IGNORECASE)
        )
        if prefix_counter > 1:
            units_with_multiple_prefixes.append(unit_uri)
        else:
            units_with_no_or_single_prefix.append(unit_uri)

    return units_with_multiple_prefixes, units_with_no_or_single_prefix


# Function to check if path_end of a single unit is in another list of units using regex
def check_path_end_in_list(unit_uri=None, check_unit_list=None, get_bool=True):
    """Function to check if the path end of a unit URI is in another list of units using regex."""
    matched_units = []
    path_end = unit_uri.split("/")[-1]
    for check_unit in check_unit_list:
        # print(f"check_unit: {check_unit}")
        if re.search(path_end, check_unit):
            # print(
            #     f"Path end: {path_end} found in {check_unit} on unit {unit_uri}"
            # )
            if get_bool:
                return True
            else:
                matched_units.append(check_unit)
                # print(f"In check_path_end_in_list: {matched_units}")
    if get_bool:
        return False

    return matched_units


# def check_path_end_in_list(unit_uri=None, check_unit_list=None):
#     """Function to check if the path end of a unit URI is in another list of units using regex."""
#     path_end = unit_uri.split("/")[-1]
#     for check_unit in check_unit_list:
#         print(f"check_unit: {check_unit}")
#         if re.search(path_end, check_unit):
#             print(f"Path end: {path_end} found in {check_unit} on unit {unit_uri}")
#             return True
#     return False


# Function to categorize units based on prefixes
def categorize_units(unit_uri_list=None, prefix_list=None):
    """
    Categorize the given unit URI list based on the provided prefix list.

    Parameters:
    unit_uri_list (list of str): List of unit URIs to be categorized.
    prefix_list (list of str): List of prefixes to be used for categorization.

    Returns:
    tuple: A tuple containing two lists:
        - compound_prefix_unit_tuple_list: List of tuples with base unit and list of prefixed units.
        - not_determinable_unit_list: List of units that couldn't be categorized.
    """
    compound_prefix_unit_tuple_list = []
    not_determinable_unit_list = []

    # Check if any unit has multiple prefixes
    units_with_multiple_prefixes, units_with_no_or_single_prefix = (
        has_multiple_prefixes(unit_uri_list, prefix_list)
    )

    # print(
    #     f"In Catetorize units, units_with_multiple_prefixes: {units_with_multiple_prefixes}"
    # )
    # print(
    #     f"In Catetorize units, units_with_no_or_single_prefix: {units_with_no_or_single_prefix}"
    # )

    if units_with_no_or_single_prefix != []:
        # Check if any unit in units_with_no_or_single_prefix can be a base compound unit
        for possible_compound_unit in units_with_no_or_single_prefix:
            # Check if possible_compound_unit is part of units_with_multiple_prefixes
            if check_path_end_in_list(
                unit_uri=possible_compound_unit,
                check_unit_list=units_with_multiple_prefixes,
            ):
                compound_prefix_unit_tuple_list.append(
                    (
                        possible_compound_unit,
                        check_path_end_in_list(
                            possible_compound_unit,
                            units_with_multiple_prefixes,
                            False,
                        ),
                    )
                )
            else:
                not_determinable_unit_list.append(
                    (possible_compound_unit, None)
                )
    else:
        for not_compound_unit in units_with_multiple_prefixes:
            not_determinable_unit_list.append((not_compound_unit, None))

    return compound_prefix_unit_tuple_list, not_determinable_unit_list


# Function to unify the composed unit tuple lists
def merge_unify_tuples_to_dict(tuple_list=None):
    if tuple_list is None:
        return {}

    result_dict = {}

    for key, values in tuple_list:
        if key not in result_dict:
            result_dict[key] = set(values) if values else None
        else:
            if result_dict[key] is not None:
                result_dict[key].update(values if values else [])
            else:
                result_dict[key] = set(values) if values else None

    # Convert sets back to lists
    for key in result_dict:
        if result_dict[key] is not None:
            result_dict[key] = list(result_dict[key])

    return result_dict


# Example usage
unit_uri_list = [
    "http://qudt.org/vocab/unit/CentiMOL-PER-KiloGM",
    "http://qudt.org/vocab/unit/KiloMOL-PER-KiloGM",
    "http://qudt.org/vocab/unit/FemtoMOL-PER-KiloGM",
    "http://qudt.org/vocab/unit/KiloMOL-PER-KiloGM",
    "http://qudt.org/vocab/unit/MOL-PER-KiloGM",
    "http://qudt.org/vocab/unit/MicroMOL-PER-GM",
    "http://qudt.org/vocab/unit/MicroMOL-PER-KiloGM",
    "http://qudt.org/vocab/unit/MilliMOL-PER-GM",
    "http://qudt.org/vocab/unit/MilliMOL-PER-KiloGM",
    "http://qudt.org/vocab/unit/NanoMOL-PER-KiloGM",
    "http://qudt.org/vocab/unit/PicoMOL-PER-KiloGM",
]

prefix_list = ["Centi", "Femto", "Kilo", "Micro", "Milli", "Nano", "Pico"]

# Test the function: has_multiple_prefixes(...)
units_with_multiple_prefixes, units_with_no_or_single_prefix = (
    has_multiple_prefixes(unit_uri_list, prefix_list)
)
print(f"Units with multiple prefixes: {units_with_multiple_prefixes}")
print(f"Units with no or single prefix: {units_with_no_or_single_prefix}")

# Call the function (currently does nothing and needs implementation)
compound_prefix_unit_tuple_list, not_determinable_unit_list = categorize_units(
    unit_uri_list=unit_uri_list, prefix_list=prefix_list
)
print(f"Compound prefix unit tuple list: {compound_prefix_unit_tuple_list}")
print(f"Not determinable unit list: {not_determinable_unit_list}")
# print single tuples
for (
    compound_prefix_unit,
    prefixed_compound_prefix_units,
) in compound_prefix_unit_tuple_list:
    print(f"compound_prefix_unit: {compound_prefix_unit}")
    print(f"prefixed_compound_prefix_units: {prefixed_compound_prefix_units}")

# Test the function: merge_unified_composed_unit_tuple_list(...)
# Example usage
compound_prefix_unit_tuple_list = [
    (
        "http://qudt.org/vocab/unit/MOL-PER-KiloGM",
        [
            "http://qudt.org/vocab/unit/CentiMOL-PER-KiloGM",
            "http://qudt.org/vocab/unit/KiloMOL-PER-KiloGM",
            "http://qudt.org/vocab/unit/MilliMOL-PER-KiloGM",
        ],
    ),
    ("http://qudt.org/vocab/unit/MicroMOL-PER-GM", None),
    (
        "http://qudt.org/vocab/unit/MOL-PER-KiloGM",
        [
            "http://qudt.org/vocab/unit/CentiMOL-PER-KiloGM",
            "http://qudt.org/vocab/unit/KiloMOL-PER-KiloGM",
            "http://qudt.org/vocab/unit/PicoMOL-PER-KiloGM",
        ],
    ),
    ("http://qudt.org/vocab/unit/MilliMOL-PER-GM", None),
]

to_be_uploaded_key_value_dict = merge_unify_tuples_to_dict(
    compound_prefix_unit_tuple_list
)
print(
    f"merge_prefixed_and_non_prefixed_units: {to_be_uploaded_key_value_dict}"
)
# Print to_be_uploaded_key_value_dict as json
print(json.dumps(to_be_uploaded_key_value_dict, indent=4))


# Test case 2
print("TEST 2")
test_list = [
    "http://qudt.org/vocab/unit/CentiM3-PER-GM",
    "http://qudt.org/vocab/unit/DeciL-PER-GM",
    "http://qudt.org/vocab/unit/L-PER-KiloGM",
    "http://qudt.org/vocab/unit/M3-PER-KiloGM",
    "http://qudt.org/vocab/unit/MilliL-PER-GM",
    "http://qudt.org/vocab/unit/MilliL-PER-KiloGM",
    "http://qudt.org/vocab/unit/MilliM3-PER-GM",
    "http://qudt.org/vocab/unit/MilliM3-PER-KiloGM",
]
compound_prefix_unit_tuple_list, not_determinable_unit_list = categorize_units(
    unit_uri_list=test_list, prefix_list=prefix_name_list
)
# compound_prefix_unit_tuple_list = [
#   ("http://qudt.org/vocab/unit/L-PER-KiloGM",["http://qudt.org/vocab/unit/MilliL-PER-KiloGM"]),
#   ("http://qudt.org/vocab/unit/M3-PER-KiloGM",["http://qudt.org/vocab/unit/MilliM3-PER-KiloGM"])
# ]

# not_determinable_unit_list = [
#   "http://qudt.org/vocab/unit/CentiM3-PER-GM",
#   "http://qudt.org/vocab/unit/DeciL-PER-GM",
#   "http://qudt.org/vocab/unit/MilliL-PER-GM",
#   "http://qudt.org/vocab/unit/MilliM3-PER-GM",
# ]
print(f"compound_prefix_unit_tuple_list: {compound_prefix_unit_tuple_list}")
print(f"not_determinable_unit_list: {not_determinable_unit_list}")

Units with multiple prefixes: ['http://qudt.org/vocab/unit/CentiMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/KiloMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/FemtoMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/KiloMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/MicroMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/MilliMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/NanoMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/PicoMOL-PER-KiloGM']
Units with no or single prefix: ['http://qudt.org/vocab/unit/MOL-PER-KiloGM', 'http://qudt.org/vocab/unit/MicroMOL-PER-GM', 'http://qudt.org/vocab/unit/MilliMOL-PER-GM']
Compound prefix unit tuple list: [('http://qudt.org/vocab/unit/MOL-PER-KiloGM', ['http://qudt.org/vocab/unit/CentiMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/KiloMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/FemtoMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/KiloMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/MicroMOL-PER-KiloGM', 'http://qudt.org/vocab/unit/MilliMOL-PER-KiloGM', 'http://qudt.org/vocab/u

In [12]:
# Already fetched all Quantity Kind from Qudt
qudt_quantitykind_bindings = qudt_qk["results"]["bindings"]
# print("num of quantities: ", len(qudt_quantitykind_bindings))
# pprint(qudt_quantitykind_bindings[0])


def get_composed_quantitiy_unit_dict(
    qudt_quantitykind_bindings=None, prefix_name_list=None
):
    """Function to determine all the composed quantity units and their prefixed units."""

    # Inititializations
    aggregated_to_be_uploaded_unit_tuple_list = []
    composed_quantity_unit_dict = {}

    unit_upload_match_counter = 0
    unit_referenceable_counter = 0
    composed_quantitiy_unit_counter = 0

    # Iterate over all the quantity kind bindings
    for quantity_binding in qudt_quantitykind_bindings:

        # Get all the prefixed and non prefixed units of the quantity kind

        non_prefixed_units, prefixed_units = split_prefixed_applicable_units(
            quantity_binding["applicableUnits"]["value"]
        )

        # Algorithm to identify uploaded units and construct pattern for to be uploaded units
        uploaded_units = []
        compound_prefix_unit_tuple_list = []
        not_determinable_unit_tuple_list = []
        # Step 1 - Remove prefixes from the applicable units
        removed_prefixes_applicable_units = remove_prefix_list(
            uri_list=prefixed_units, prefix_list=prefix_name_list
        )

        # TEST
        # ---------------------------------------------------------------------------------
        # investigation_unit = "http://qudt.org/vocab/unit/NanoH"
        # if investigation_unit in prefixed_units:
        #     print(f"Quanity: {quantity_binding['quantity']['value']}")
        #     print(f"In remove_prefix_list: {investigation_unit}")
        #     print(
        #         f"removed_prefixes_applicable_units: {removed_prefixes_applicable_units}"
        #     )
        #     print(f"non_prefixed_units: {non_prefixed_units}")
        #     print(f"prefixed_units: {prefixed_units}")
        # ---------------------------------------------------------------------------------

        # Step 2 - Lookup existing non prefixed units
        uploaded_units = lookup_existing_non_prefixed_units(
            all_non_prefixed_units=all_non_prefixed_units,
            lookup_list=non_prefixed_units,
        )

        # Step 3 - Check if any uploaded, referenceable or to be uploaded units are found
        if uploaded_units != []:
            unit_upload_match_counter += 1
        else:
            referenceable_uploaded_units = []
            referenceable_uploaded_units = lookup_existing_non_prefixed_units(
                all_non_prefixed_units=all_non_prefixed_units,
                lookup_list=removed_prefixes_applicable_units,
            )
            if referenceable_uploaded_units != []:
                uploaded_units = referenceable_uploaded_units
                unit_referenceable_counter += 1
            else:
                # print(f"Quantity: {quantity_binding['quantity']['value']}")
                # print(f"To be defined units: {to_be_defined_units}")
                (
                    compound_prefix_unit_tuple_list,
                    not_determinable_unit_tuple_list,
                ) = categorize_units(
                    unit_uri_list=prefixed_units, prefix_list=prefix_name_list
                )
                # Merge compound_prefix_unit_tuple_list with not_determinable_unit_tuple_list
                aggregated_to_be_uploaded_unit_tuple_list += (
                    compound_prefix_unit_tuple_list
                    + not_determinable_unit_tuple_list
                )
                checksum = len(
                    compound_prefix_unit_tuple_list
                    + not_determinable_unit_tuple_list
                )
                composed_quantitiy_unit_counter += 1
                if checksum == 0:
                    print(
                        f"Quantity without any unit: {quantity_binding['quantity']['value']}"
                    )
                    print(f"prefixed_units: {prefixed_units}")
                    print(f"non_prefixed_units: {non_prefixed_units}")

                    print(
                        f"Compound prefix unit tuple list: {compound_prefix_unit_tuple_list}"
                    )
                    print(
                        f"Not determinable unit tuple list: {not_determinable_unit_tuple_list}\n"
                    )

    # Merge the aggregated tuple list to a unified dictionary
    # print(f"Aggregated to be uploaded unit tuple list: {aggregated_to_be_uploaded_unit_tuple_list}")
    # print(f"Matched uploaded units: {unit_upload_match_counter}")
    # print(f"Referenceable units: {unit_referenceable_counter}")
    # print(f"Composed unit counter: {composed_quantitiy_unit_counter}")

    composed_quantity_unit_dict = merge_unify_tuples_to_dict(
        aggregated_to_be_uploaded_unit_tuple_list
    )

    return composed_quantity_unit_dict


# Test the function: get_composed_quantitiy_unit_dict(...)
composed_quantity_unit_dict = get_composed_quantitiy_unit_dict(
    qudt_quantitykind_bindings=qudt_quantitykind_bindings,
    prefix_name_list=prefix_name_list,
)

print(f"Composed quantity unit dict: {composed_quantity_unit_dict}")
print(json.dumps(composed_quantity_unit_dict, indent=4))
print(f"num of composed units: {len(composed_quantity_unit_dict)}")

Composed quantity unit dict: {'http://qudt.org/vocab/unit/KiloGM-M2': None, 'http://qudt.org/vocab/unit/C-PER-KiloGM-SEC': None, 'http://qudt.org/vocab/unit/L-PER-KiloGM': ['http://qudt.org/vocab/unit/MilliL-PER-KiloGM'], 'http://qudt.org/vocab/unit/M3-PER-KiloGM': ['http://qudt.org/vocab/unit/MilliM3-PER-KiloGM'], 'http://qudt.org/vocab/unit/CentiM3-PER-GM': None, 'http://qudt.org/vocab/unit/DeciL-PER-GM': None, 'http://qudt.org/vocab/unit/MilliL-PER-GM': None, 'http://qudt.org/vocab/unit/MilliM3-PER-GM': None, 'http://qudt.org/vocab/unit/BQ-PER-KiloGM': ['http://qudt.org/vocab/unit/MilliBQ-PER-KiloGM', 'http://qudt.org/vocab/unit/MicroBQ-PER-KiloGM'], 'http://qudt.org/vocab/unit/RAD-M2-PER-KiloGM': None, 'http://qudt.org/vocab/unit/MicroM-PER-N': None, 'http://qudt.org/vocab/unit/J-M2-PER-KiloGM': None, 'http://qudt.org/vocab/unit/PER-KiloGM2': None, 'http://qudt.org/vocab/unit/KiloGM-PER-PA-SEC-M': None, 'http://qudt.org/vocab/unit/KiloGM-PER-J': None, 'http://qudt.org/vocab/unit/MO

### Get OSW List of Composed Prefix Quanity Units

In [13]:
def get_osw_quantity_unit_list(composed_unit_dict=None, debug=False):
    """Function to extract the QuantityUnit objects from the QUDT API."""
    units = []
    i = 0
    # Iteration over the unit_dict to create the QuantityUnit objects
    for (
        non_prefixed_unit_iri,
        unit_property_dict,
    ) in composed_unit_dict.items():
        name = non_prefixed_unit_iri.split("/")[-1]

        match_unit_dict = match_object_json_path(
            qudt_units_param_res=qudt_units_param_res,
            identifier=non_prefixed_unit_iri,
        )
        ontology_match_list = [match_unit_dict["applicableUnit"]["value"]]
        if "dbpediaMatch" in match_unit_dict.keys():
            ontology_match_list.append(
                match_unit_dict["dbpediaMatch"]["value"]
            )
        if "siExactMatch" in match_unit_dict:
            ontology_match_list.append(
                match_unit_dict["siExactMatch"]["value"]
            )
        conversion_multiplier = None
        if "conversionMultiplierSN" in match_unit_dict:
            conversion_multiplier = match_unit_dict["conversionMultiplierSN"][
                "value"
            ]
        # sequence of description before plainTextDescription is essential for overwriting
        description_list = None
        plainTextDescription = None
        if "description" in match_unit_dict:
            plainTextDescription = match_unit_dict["description"]["value"]
            description_list = [
                model.Description(
                    text=plainTextDescription,
                    lang="en",
                )
            ]
            # print(description_list)
        # overwrite description if plainTextDescription is present
        if "plainTextDescription" in match_unit_dict:
            plainTextDescription = match_unit_dict["plainTextDescription"][
                "value"
            ]
            description_list = [
                model.Description(
                    text=plainTextDescription,
                    lang="en",
                )
            ]
            # print(description_list)
        if description_list == None:
            i += 1
            # print("No description found for ", name)

        qlabels = match_json_path_key(
            qudt_units_param_res,
            identifier=non_prefixed_unit_iri,
            key="qlabels",
        )

        label_dict = dict_from_comma_separated_list(qlabels)
        ### clean missing "en"
        if "" in label_dict.keys():
            label_dict["en"] = label_dict[""]
            del label_dict[""]

        osw_label_list = [
            model.Label(text=value, lang=key)
            for key, value in label_dict.items()
        ]
        symbol = match_json_path_key(
            qudt_units_param_res,
            identifier=non_prefixed_unit_iri,
            key="symbol",
        )
        _uuid = uuid.uuid5(
            namespace=uuid.NAMESPACE_URL, name=non_prefixed_unit_iri
        )
        # print(_uuid)
        if unit_property_dict != None:
            prefix_unit_list = [
                get_osw_prefix_unit(
                    qudt_units_param_res=qudt_units_param_res,
                    prefixes_list=sidf_prefixes_de,
                    url=url,
                    parent_uuid=_uuid,
                )
                for url in unit_property_dict
            ]
        else:
            prefix_unit_list = None

        unit = model.ComposedQuantityUnitWithUnitPrefix(
            uuid=_uuid,
            exact_ontology_match=ontology_match_list,
            name=name,
            label=osw_label_list,
            main_symbol=symbol,
            prefix_units=prefix_unit_list,
            description=description_list,
            conversion_factor_from_si=conversion_multiplier,
        )
        units.append(unit)

    if debug:
        pprint(units)
        print("not described units couter: ", i)
        print("num of non prefixed units: ", len(composed_unit_dict.items()))

    return units


osw_composed_prefix_quantity_unit_obj_list = get_osw_quantity_unit_list(
    composed_unit_dict=composed_quantity_unit_dict,
    debug=False,
)

export_osw_obj_json(
    osw_obj_list=osw_composed_prefix_quantity_unit_obj_list,
    ontology_name="osl",
    file_name="composed_prefix_quantity_units.json",
)

## Upload Composed Prefix Quantity Units to DEV WIKI

In [14]:
# # Upload all composed prefix quantity units to DEV WIKI
# osw_obj.store_entity(
#     OSW.StoreEntityParam(
#         entities=osw_composed_prefix_quantity_unit_obj_list,
#         overwrite=True,
#     )
# )

## Define Quantity Kind

In [15]:
# Test Broader or not by asserting the count of broader and non-broader units

qudt_quantitykind_bindings = []
qudt_quantitykind_bindings = qudt_qk["results"]["bindings"]
is_broader_counter = 0
has_broader_counter = 0
expected_is_broader_count = 329
expected_has_broader_count = 441
for quantity_binding in qudt_quantitykind_bindings:
    if "broader" in quantity_binding:
        has_broader_counter += 1
    else:
        is_broader_counter += 1

print(f"is_broader: {is_broader_counter}")
print(f"has_broader: {has_broader_counter}")
assert is_broader_counter == expected_is_broader_count
assert has_broader_counter == expected_has_broader_count

is_broader: 329
has_broader: 441


In [None]:
import uuid
from osw.utils.wiki import get_full_title
from osw.utils.strings import pascal_case

# Already fetched all Quantity Kind from Qudt
qudt_quantitykind_bindings = []
qudt_quantitykind_bindings = qudt_qk["results"]["bindings"]
# print("num of quantities: ", len(qudt_quantitykind_bindings))
# pprint(qudt_quantitykind_bindings[0])


def get_osw_quantity_kind_obj_list(
    qudt_quantitykind_bindings=None, debug=False
):
    osw_quantitiy_list = []
    osw_characteristic_list = []
    is_broader_counter = 0
    has_broader_counter = 0
    for quantity_binding in qudt_quantitykind_bindings:

        # Close Ontology Match
        quantity_close_ontology_match_list = []
        characteristic_close_ontology_match_list = []
        characteristic_close_ontology_match_list.append(
            quantity_binding["quantity"]["value"]
        )
        if "dbpediaMatch" in quantity_binding:
            quantity_close_ontology_match_list.append(
                quantity_binding["dbpediaMatch"]["value"]
            )
            characteristic_close_ontology_match_list.append(
                quantity_binding["dbpediaMatch"]["value"]
            )
        if "siExactMatch" in quantity_binding:
            quantity_close_ontology_match_list.append(
                quantity_binding["siExactMatch"]["value"]
            )
            characteristic_close_ontology_match_list.append(
                quantity_binding["siExactMatch"]["value"]
            )

        # Get all the prefixed and non prefixed units of the quantity kind
        non_prefixed_units, prefixed_units = split_prefixed_applicable_units(
            quantity_binding["applicableUnits"]["value"]
        )

        # sequence of "description" before "plainTextDescription" is essential for overwriting
        description_list = None
        if "descriptions" in quantity_binding:
            description_split_list = quantity_binding["descriptions"][
                "value"
            ].split(" #,# ")
            # if len(description_split_list) > 1:
            #     # This is just one case, set default to use the first description and english
            #     print(description_split_list)
            description_list = [
                model.Description(
                    text=description_split_list[0].strip(),
                    lang="en",
                )
            ]
        # Overwrite description if plainTextDescription is present
        if "plainTextDescriptions" in quantity_binding:
            plain_description_split_list = quantity_binding[
                "plainTextDescriptions"
            ]["value"].split(" #,# ")
            # if len(plain_description_split_list) > 1:
            #     # This is just one case, set default to use the first description and englishSW
            #     print(plain_description_split_list)
            description_list = [
                model.Description(
                    text=plain_description_split_list[0].strip(),
                    lang="en",
                )
            ]

        qlabels = quantity_binding["labels"]["value"]
        label_dict = dict_from_comma_separated_list(qlabels)
        # print(label_dict)
        clean_label_dict = label_dict.copy()
        for lang, text in label_dict.items():
            # print(lang, text)
            # Remove item if "en-US" and "en" are present
            if lang == "en-US" and "en" in label_dict.keys():
                del clean_label_dict["en-US"]
            # Rename "en-US" to "en" if "en" is not present
            if lang == "en-US" and "en" not in label_dict.keys():
                clean_label_dict["en"] = text
                del clean_label_dict["en-US"]
            # Set default language to "en" if key is empty and "en" is not present
            if lang == "" and "en" not in label_dict.keys():
                clean_label_dict["en"] = clean_label_dict[""]
                del clean_label_dict[""]
            # Remove empty item if "en" is present
            if lang == "" and "en" in label_dict.keys():
                del clean_label_dict[""]

            # print(f"clean_label_dict: {clean_label_dict}")

        osw_label_list = [
            model.Label(text=value, lang=key)
            for key, value in clean_label_dict.items()
        ]

        # Differentiate between is_broader and has_broader quantities/characteristics
        if "broader" not in quantity_binding:
            # This quantity is a broader quantity/characteristic
            is_broader_counter += 1
            # Algorithm to identify uploaded, referenceable or composed units
            osw_unit_uuids = []
            uploaded_units = []
            # Step 1 - Remove prefixes from the applicable units
            removed_prefixes_applicable_units = remove_prefix_list(
                uri_list=prefixed_units, prefix_list=prefix_name_list
            )
            # Step 2 - Lookup existing non prefixed units
            uploaded_units = lookup_existing_non_prefixed_units(
                all_non_prefixed_units=all_non_prefixed_units,
                lookup_list=non_prefixed_units,
            )
            # Step 3 - Check if any uploaded, referenceable or to be uploaded units are found
            if uploaded_units != []:
                # Set deterministic UUIDs for the non prefixed units
                osw_unit_uuids = [
                    f"Item:OSW{str(uuid.uuid5(namespace=uuid.NAMESPACE_URL, name=unit)).replace('-', '')}"
                    for unit in non_prefixed_units
                ]
            else:
                # Set deterministic UUIDs for the referencable applicable non prefixed units
                referenceable_uploaded_units = []
                referenceable_uploaded_units = (
                    lookup_existing_non_prefixed_units(
                        all_non_prefixed_units=all_non_prefixed_units,
                        lookup_list=removed_prefixes_applicable_units,
                    )
                )
                if referenceable_uploaded_units != []:
                    osw_unit_uuids = [
                        f"Item:OSW{str(uuid.uuid5(namespace=uuid.NAMESPACE_URL, name=unit)).replace('-', '')}"
                        for unit in referenceable_uploaded_units
                    ]
                else:
                    # Set deterministic UUIDs for the composed units
                    composed_quantity_unit_dict = get_composed_quantitiy_unit_dict(
                        qudt_quantitykind_bindings=qudt_quantitykind_bindings,
                        prefix_name_list=prefix_name_list,
                    )
                    # Step 4 - Get all removed prefixes of the applicable units
                    for unit in composed_quantity_unit_dict.keys():
                        if unit in prefixed_units:
                            osw_unit_uuids.append(
                                f"Item:OSW{str(uuid.uuid5(namespace=uuid.NAMESPACE_URL, name=unit)).replace('-', '')}"
                            )
                            # print(f"Quantity: {quantity_binding['quantity']['value']}")
                            # print(f"Composed unit: {unit}\n")

                    # osw_unit_uuids = [
                    #     f"Item:OSW{str(uuid.uuid5(namespace=uuid.NAMESPACE_URL, name=unit)).replace('-', '')}"
                    #     for unit in composed_quantity_unit_dict.keys()
                    # ]
                    # print(f"composed_quantity_unit_dict: {composed_quantity_unit_dict}")

            osw_quantity = model.QuantityKind(
                uuid=get_deterministic_url_uuid(
                    uri=quantity_binding["quantity"]["value"]
                ),
                label=osw_label_list,
                description=description_list,
                exact_ontology_match=[quantity_binding["quantity"]["value"]],
                close_ontology_match=quantity_close_ontology_match_list,
                units=osw_unit_uuids,
            )

            osw_quantitiy_list.append(osw_quantity)

            characteristic = model.FundamentalQuantityValueType(
                characteristics=None,  # only used for existing references
                subclass_of=[
                    "Category:OSW4082937906634af992cf9a1b18d772cf"
                ],  # QuantityValue
                quantity=get_full_title(osw_quantity),
                uuid=get_deterministic_url_uuid(
                    prefix="characteristic:",
                    uri=quantity_binding["quantity"]["value"],
                ),
                description=description_list,
                name=pascal_case(osw_label_list[0].text),
                label=osw_label_list,
                close_ontology_match=characteristic_close_ontology_match_list,
            )

            osw_characteristic_list.append(characteristic)

        else:
            # This characteristic has a broader characteristic/quantity
            has_broader_counter += 1
            broader_characteristic = get_osw_uuid_str(
                namespace="Category:",
                _uuid=get_deterministic_url_uuid(
                    prefix="characteristic:",
                    uri=quantity_binding["broader"]["value"],
                ),
            )
            characteristic = model.FundamentalQuantityValueType(
                characteristics=None,  # only used for existing references
                subclass_of=[broader_characteristic],  # Broader Characteristic
                quantity=get_full_title(osw_quantity),
                uuid=get_deterministic_url_uuid(
                    prefix="characteristic:",
                    uri=quantity_binding["quantity"]["value"],
                ),
                description=description_list,
                name=pascal_case(osw_label_list[0].text),
                label=osw_label_list,
                close_ontology_match=characteristic_close_ontology_match_list,
            )

            osw_characteristic_list.append(characteristic)

    print(f"NumOfQuantities: {len(osw_quantitiy_list)}")
    print(f"NumOfCharacteristics: {len(osw_characteristic_list)}")
    assert len(osw_quantitiy_list) == is_broader_counter
    assert (
        len(osw_characteristic_list)
        == has_broader_counter + is_broader_counter
    )
    return osw_quantitiy_list, osw_characteristic_list


# Get the OSW QuantityKind objects
osw_quantity_kind_obj_list, osw_characteristic_obj_list = (
    get_osw_quantity_kind_obj_list(
        qudt_quantitykind_bindings=qudt_quantitykind_bindings, debug=False
    )
)

# print(f"osw_quantity_kind_obj_list: {osw_quantity_kind_obj_list}")
osw_quantity_json_dumpable = [
    json.loads(quantity.json()) for quantity in osw_quantity_kind_obj_list
]

osw_characteristic_json_dumpable = [
    json.loads(characteristic.json())
    for characteristic in osw_characteristic_obj_list
]

# pprint(osw_quantity_json_dumpable)
osw_quantity_json_dump = json.dumps(osw_quantity_json_dumpable, indent=4)
osw_characteristic_json_dump = json.dumps(
    osw_characteristic_json_dumpable, indent=4
)
# write to json
with open("osw_quantitykind.json", "w") as f:
    f.write(osw_quantity_json_dump)

with open("osw_characteristic.json", "w") as f:
    f.write(osw_characteristic_json_dump)


export_osw_obj_json(
    osw_obj_list=osw_quantity_kind_obj_list,
    ontology_name="osl",
    file_name="quantitykind.json",
)
export_osw_obj_json(
    osw_obj_list=osw_quantity_kind_obj_list,
    ontology_name="osl",
    file_name="characteristics.json",
)

### Upload Quantity Kind to DEV WIKI

In [23]:
# osw_obj.store_entity(
#     OSW.StoreEntityParam(
#         entities=osw_quantity_kind_obj_list,
#         overwrite=True,
#     )
# )

Entities to be uploaded have been sorted according to their type.
Now uploading entities of class type 'Category:OSW00fbd6feecb5408997ca18d4e681a131' (QuantityKind). No class specific overwrite setting found. Using fallback option 'OverwriteOptions.true' for all entities of this class.
Performing parallel execution of store_entity_ (329 tasks).
[########################################] | 100% Completed | 360.88 s


### Upload Characteristic objects to DEV WIKI

In [25]:
# Single Characteristic Test
# --------------------------
# osw_obj.store_entity(
#     OSW.StoreEntityParam(
#         entities=osw_characteristic_obj_list[0],
#         overwrite=True,
#     )
# )

# All Characteristics
# -------------------
osw_obj.store_entity(
    OSW.StoreEntityParam(
        entities=osw_characteristic_obj_list,
        overwrite=True,
    )
)

### Delete Quantity objects from DEV WIKI

In [19]:
# All Quantities
# -------------------
# osw_obj.delete_entity(
#     OSW.DeleteEntityParam(
#         entities=osw_quantity_kind_obj_list,
#         overwrite=True,
#     )
# )

### Delete Characteristic objects from DEV WIKI

In [24]:
# Single Characteristic Test
# --------------------------
# osw_obj.delete_entity(
#     OSW.DeleteEntityParam(
#         entities=osw_characteristic_obj_list[0],
#         overwrite=True,
#     )
# )

# All Characteristics
# -------------------
# osw_obj.delete_entity(
#     OSW.DeleteEntityParam(
# entities=osw_characteristic_obj_list,
# overwrite=True,
#     )
# )

APIError: ('badtoken', 'Invalid CSRF token.', 'See https://wiki-dev.open-semantic-lab.org/w/api.php for API usage. Subscribe to the mediawiki-api-announce mailing list at &lt;https://lists.wikimedia.org/postorius/lists/mediawiki-api-announce.lists.wikimedia.org/&gt; for notice of API deprecations and breaking changes.')