# 2. Units

## Fetch prefixes name list

In [1]:
import requests
from pprint import pprint
import json
import uuid

sidf_prefixes_res_de = requests.get(
    url="https://si-digital-framework.org/SI/prefixes",
    params={"lang": "de"},
)

# Convert result to dict
sidf_prefixes_de = sidf_prefixes_res_de.json()
print(sidf_prefixes_de)
prefix_name_list = [item["label"] for item in sidf_prefixes_de]
print(prefix_name_list)

[{'label': 'quecto', 'symbol': 'q', 'scalingFactor': 1e-30, 'prefixId': 'quecto', 'scalingFactorStr': '1e-30', 'doi': 'https://doi.org/10.59161/cgpm2022res3e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/quecto'}, {'label': 'ronto', 'symbol': 'r', 'scalingFactor': 1e-27, 'prefixId': 'ronto', 'scalingFactorStr': '1e-27', 'doi': 'https://doi.org/10.59161/cgpm2022res3e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/ronto'}, {'label': 'yocto', 'symbol': 'y', 'scalingFactor': 1e-24, 'prefixId': 'yocto', 'scalingFactorStr': '1e-24', 'doi': 'https://doi.org/10.59161/cgpm1991res4e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/yocto'}, {'label': 'zepto', 'symbol': 'z', 'scalingFactor': 1e-21, 'prefixId': 'zepto', 'scalingFactorStr': '1e-21', 'doi': 'https://doi.org/10.59161/cgpm1991res4e', 'dataType': 'http://

## Fetch all quantities (SI) from Qudt with its units

In [2]:
from utils import sparql_wrapper
from pprint import pprint

sparql_qudt_qk = sparql_wrapper.Sparql(
    endpoint="https://www.qudt.org/fuseki/qudt/sparql",
    src_filepath="../ontology/qudt/sparql/quantitykind.sparql",
    tgt_filepath="../ontology/qudt/data/quantitykind.json",
    debug=False,
)
qudt_qk = sparql_qudt_qk.execQuery()
sparql_qudt_qk.writeJsonFile(data=qudt_qk)
print("num of quantitykind: ", len(qudt_qk["results"]["bindings"]))
pprint(qudt_qk)

Reading SPARQL Query from c:\Users\raeder\Desktop\dev\Gitlab\osw\apps\python-lvl2\quantities-units\src\quantities-units\utils\../ontology/qudt/sparql/quantitykind.sparql
Writing JSON data to c:\Users\raeder\Desktop\dev\Gitlab\osw\apps\python-lvl2\quantities-units\src\quantities-units\utils\../ontology/qudt/data/quantitykind.json
num of quantitykind:  329
{'head': {'vars': ['quantity',
                   'dimension',
                   'labels',
                   'descriptions',
                   'plainTextDescriptions',
                   'applicableUnits',
                   'ucumCodes']},
 'results': {'bindings': [{'applicableUnits': {'type': 'literal',
                                               'value': 'http://qudt.org/vocab/unit/J, '
                                                        'http://qudt.org/vocab/unit/MegaEV, '
                                                        'http://qudt.org/vocab/unit/EV, '
                                                        'http

## Split applicableUnits into prefixedUnits and nonPrefixedUnits

In [3]:
from utils import sparql_wrapper
from pprint import pprint
import json
import requests

quant_kind_list = json.load(open("ontology/qudt/data/quantitykind.json", "r"))[
    "results"
]["bindings"]

pprint(quant_kind_list[5]["applicableUnits"])


def get_prefix_list():

    sidf_prefixes_res_de = requests.get(
        url="https://si-digital-framework.org/SI/prefixes",
        params={"lang": "de"},
    )

    # Convert result to dict
    sidf_prefixes_de = sidf_prefixes_res_de.json()
    print(sidf_prefixes_de)
    prefix_name_list = [item["label"] for item in sidf_prefixes_de]
    print(prefix_name_list)
    return prefix_name_list


prefix_name_list = get_prefix_list()


def get_unit_prefix(unit_str):
    for prefix in prefix_name_list:
        if prefix in unit_str.lower():
            return prefix
    return None


def split_prefixed_applicable_units(applicable_units_str, debug=False):
    non_prefixed_units = []
    prefixed_units = []
    for unit_str in applicable_units_str.split(", "):

        if debug:
            print(unit_str)
            print(get_unit_prefix(unit_str))
        if get_unit_prefix(unit_str) == None:
            if debug:
                print("no prefix: ", unit_str)
            non_prefixed_units.append(unit_str)
        else:
            if debug:
                print(
                    "prefix: ",
                    get_unit_prefix(unit_str),
                    "found in ",
                    unit_str,
                )
            prefixed_units.append(unit_str)
    return non_prefixed_units, prefixed_units


all_prefixed_units = []
all_non_prefixed_units = []
for quant_kind in quant_kind_list:
    non_prefixed, prefixed = split_prefixed_applicable_units(
        quant_kind["applicableUnits"]["value"]
    )
    all_prefixed_units = all_prefixed_units + prefixed
    all_non_prefixed_units = all_non_prefixed_units + non_prefixed

all_prefixed_units = list(set(all_prefixed_units))
all_non_prefixed_units = list(set(all_non_prefixed_units))

print(len(all_prefixed_units))
print(len(all_non_prefixed_units))


import re


def remove_kilo(uri=None, debug=False):
    pattern = re.compile(r"kilo", re.IGNORECASE)
    if debug:
        print(pattern.sub("", uri))
    return pattern.sub("", uri)


unit_str = "http://qudt.org/vocab/unit/KiloGM-PER-M3"

a = remove_kilo(unit_str, debug=False)


def remove_kilo_list(uri_list, debug=False):
    _list = [remove_kilo(uri, debug) for uri in uri_list]
    if debug:
        print(_list)
    return _list


kilo_list = [
    "http://qudt.org/vocab/unit/KiloGM-PER-M3",
    "http://qudt.org/vocab/unit/CentiMOL-PER-KiloGM",
]

b = remove_kilo_list(kilo_list, debug=True)

{'type': 'literal', 'value': 'http://qudt.org/vocab/unit/M2-PER-SEC'}
[{'label': 'quecto', 'symbol': 'q', 'scalingFactor': 1e-30, 'prefixId': 'quecto', 'scalingFactorStr': '1e-30', 'doi': 'https://doi.org/10.59161/cgpm2022res3e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/quecto'}, {'label': 'ronto', 'symbol': 'r', 'scalingFactor': 1e-27, 'prefixId': 'ronto', 'scalingFactorStr': '1e-27', 'doi': 'https://doi.org/10.59161/cgpm2022res3e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/ronto'}, {'label': 'yocto', 'symbol': 'y', 'scalingFactor': 1e-24, 'prefixId': 'yocto', 'scalingFactorStr': '1e-24', 'doi': 'https://doi.org/10.59161/cgpm1991res4e', 'dataType': 'http://www.w3.org/2001/XMLSchema#float', 'pid': 'https://si-digital-framework.org/SI/prefixes/yocto'}, {'label': 'zepto', 'symbol': 'z', 'scalingFactor': 1e-21, 'prefixId': 'zepto', 'scalingFactorStr': '1e-21',

In [4]:
# Functions to extract the path of a given URL
def get_path(url):
    return url.split("/")[-1]


# def get_path_list(url_list):
#     return [get_path(url) for url in url_list]

# def get_iri_without_path(iri):
#     return iri.split("/")[:-1]


def get_main_string(unit_str, prefix_name_list):
    for prefix in prefix_name_list:
        capitalized_prefix = prefix.capitalize()
        if capitalized_prefix in unit_str:
            return unit_str.replace(capitalized_prefix, "")
    return unit_str


def merge_prefixed_and_non_prefixed_units(
    all_non_prefixed_units, all_prefixed_units, prefix_name_list
):
    unit_dict = {}
    for non_prefixed_unit in all_non_prefixed_units:
        prefixed_units = []
        # Match the non prefixed unit with all the prefixed units
        for prefixed_unit in all_prefixed_units:
            if get_path(non_prefixed_unit) == get_main_string(
                get_path(prefixed_unit), prefix_name_list
            ):
                prefixed_units.append(prefixed_unit)
        unit_dict[non_prefixed_unit] = {"prefixed_units": prefixed_units}

    return unit_dict


unit_dict = merge_prefixed_and_non_prefixed_units(
    all_non_prefixed_units, all_prefixed_units, prefix_name_list
)
pprint(unit_dict)

{'http://qudt.org/vocab/unit/A': {'prefixed_units': ['http://qudt.org/vocab/unit/MegaA',
                                                     'http://qudt.org/vocab/unit/MilliA',
                                                     'http://qudt.org/vocab/unit/PicoA',
                                                     'http://qudt.org/vocab/unit/KiloA',
                                                     'http://qudt.org/vocab/unit/MicroA']},
 'http://qudt.org/vocab/unit/A-HR': {'prefixed_units': ['http://qudt.org/vocab/unit/KiloA-HR',
                                                        'http://qudt.org/vocab/unit/MilliA-HR']},
 'http://qudt.org/vocab/unit/A-M2': {'prefixed_units': []},
 'http://qudt.org/vocab/unit/A-M2-PER-J-SEC': {'prefixed_units': []},
 'http://qudt.org/vocab/unit/A-PER-DEG_C': {'prefixed_units': []},
 'http://qudt.org/vocab/unit/A-PER-J': {'prefixed_units': []},
 'http://qudt.org/vocab/unit/A-PER-M': {'prefixed_units': ['http://qudt.org/vocab/unit/KiloA-PER-M

## Fetch OSL Schema for UnitPrefix, QuantityUnit, QuantityUnit

In [5]:
from osw.core import OSW
from osw.express import OswExpress
import osw.model.entity as model


required_schemas = [
    "Category:OSW99e0f46a40ca4129a420b4bb89c4cc45",  # Unit prefix
    "Category:OSWd2520fa016844e01af0097a85bb25b25",  # Quantity Unit
    "Category:OSW00fbd6feecb5408997ca18d4e681a131",  # Quantity Kind
    "Category:OSW08271a9eb60248a3a5cb7a13346255eb",  # Composed Quantity Unit with Unit Prefix (Ausreiser)
]


def update_local_osw(osw_obj):
    print("fetch schemas")
    osw_obj.fetch_schema(
        OSW.FetchSchemaParam(
            schema_title=required_schemas,
            mode="replace",
        )
    )


if __name__ == "__main__":
    # Authentication
    osw_obj = OswExpress(
        domain="wiki-dev.open-semantic-lab.org",  # cred_filepath=pwd_file_path
    )
    update_local_osw(osw_obj)

  self.pattern = re.compile(self.pattern)
'module 'osw.model.entity' has no attribute 'File''You will be now have to connect to an OSW instance to fetch the dependencies from!
  warn(


Fetch Category:OSW3e3f5dd4f71842fbb8f270e511af8031
Fetch Category:OSWff333fd349af4f65a69100405a9e60c7
Fetch Category:OSW2ac4493f8635481eaf1db961b63c8325
Fetch Category:Item
Fetch Category:Entity
Fetch JsonSchema:Label
Fetch JsonSchema:Label
Fetch JsonSchema:Description
Fetch JsonSchema:Statement
Fetch JsonSchema:Label
Fetch JsonSchema:Meta
Fetch Category:OSW11a53cdfbdc24524bf8ac435cbf65d9d
Fetch Category:OSW05b244d0a669436e96fe4e1631d5a171
Fetch Category:OSWff333fd349af4f65a69100405a9e60c7
Fetch Category:OSW2ac4493f8635481eaf1db961b63c8325
Fetch Category:Item
Fetch Category:Entity
Fetch JsonSchema:Label
Fetch JsonSchema:Label
Fetch JsonSchema:Description
Fetch JsonSchema:Statement
Fetch JsonSchema:Label
Fetch JsonSchema:Meta
ReadAccess
AccessRestrictions
Label
Description
WikiPage
Meta
Entity
ObjectStatement
DataStatement
QuantityStatement
Item
Data
File
LocalFile


{'LocalFile': 'Category:OSW3e3f5dd4f71842fbb8f270e511af8031', 'WikiFile': 'Category:OSW11a53cdfbdc24524bf8ac435cbf65d9d'}
Its recommended to restart the kernel, to apply all changes seamlessly.
  warn(


fetch schemas
Fetch Category:OSW99e0f46a40ca4129a420b4bb89c4cc45
Fetch Category:OSWd02741381aaa4709ae0753a0edc341ce
Fetch Category:OSWcbb09a36336740c6a2cd62db9bf647ec
Fetch Category:Item
Fetch Category:Entity
Fetch JsonSchema:Label
Fetch JsonSchema:Label
Fetch JsonSchema:Description
Fetch JsonSchema:Statement
Fetch JsonSchema:Label
Fetch JsonSchema:Meta
Fetch Category:OSW6ef70c808fb54abbbacb059c285713d4
Fetch Category:OSW93ccae36243542ceac6c951450a81d47
Fetch Category:OSWd2520fa016844e01af0097a85bb25b25
Fetch Category:Item
Fetch Category:Entity
Fetch JsonSchema:Label
Fetch JsonSchema:Label
Fetch JsonSchema:Description
Fetch JsonSchema:Statement
Fetch JsonSchema:Label
Fetch JsonSchema:Meta
Fetch Category:OSW6ef70c808fb54abbbacb059c285713d4
Fetch Category:OSW93ccae36243542ceac6c951450a81d47
Fetch Category:OSW6ef70c808fb54abbbacb059c285713d4
Fetch Category:OSW93ccae36243542ceac6c951450a81d47
ReadAccess
AccessRestrictions
Label
Description
WikiPage
Meta
Characteristic
OntologyRelated
Entit

## Instanciate Example QuantityUnit (former Unit of Measure)

In [6]:
from pprint import pprint

title = "Item:OSWc73852e7049f42e7b5282866f3839f5e"  # Meter


meter_u = osw_obj.load_entity(title)


pprint(meter_u)

Fetch Category:OSW7199b9568d96476cacfe30060c64f371
Fetch Category:OSWd2520fa016844e01af0097a85bb25b25
Fetch Category:Item
Fetch Category:Entity
Fetch JsonSchema:Label
Fetch JsonSchema:Label
Fetch JsonSchema:Description
Fetch JsonSchema:Statement
Fetch JsonSchema:Label
Fetch JsonSchema:Meta
Fetch Category:OSW6ef70c808fb54abbbacb059c285713d4
Fetch Category:OSW93ccae36243542ceac6c951450a81d47
Fetch Category:OSW6ef70c808fb54abbbacb059c285713d4
Fetch Category:OSW93ccae36243542ceac6c951450a81d47
ReadAccess
AccessRestrictions
Label
Description
WikiPage
Meta
Characteristic
OntologyRelated
Entity
ObjectStatement
DataStatement
QuantityStatement
Item
IntangibleItem
Enumeration
PrefixUnit
Items
BaseUnitOfMeasure(type=['Category:OSW7199b9568d96476cacfe30060c64f371'], uuid=UUID('c73852e7-049f-42e7-b528-2866f3839f5e'), exact_ontology_match=['https://qudt.org/vocab/unit/M', 'https://si-digital-framework.org/SI/units/metre', ' http://dbpedia.org/resource/Metre ', 'http://www.ontology-of-units-of-measur

## Fetch Units from Ontology and complete properties of unit dictionary

In [7]:
from utils import sparql_wrapper
from pprint import pprint

sparql_qudt_unit_params = sparql_wrapper.Sparql(
    endpoint="https://www.qudt.org/fuseki/qudt/sparql",
    src_filepath="../ontology/qudt/sparql/units.sparql",
    tgt_filepath="../ontology/qudt/data/units.json",
    debug=False,
)
qudt_units_param_res = sparql_qudt_unit_params.execQuery()
sparql_qudt_unit_params.writeJsonFile(data=qudt_units_param_res)
print(
    f"num results.bindings: {len(qudt_units_param_res['results']['bindings'])}"
)
pprint(qudt_units_param_res)

Reading SPARQL Query from c:\Users\raeder\Desktop\dev\Gitlab\osw\apps\python-lvl2\quantities-units\src\quantities-units\utils\../ontology/qudt/sparql/units.sparql
Writing JSON data to c:\Users\raeder\Desktop\dev\Gitlab\osw\apps\python-lvl2\quantities-units\src\quantities-units\utils\../ontology/qudt/data/units.json
num results.bindings: 950
{'head': {'vars': ['applicableUnit',
                   'dimensionVector',
                   'symbol',
                   'conversionMultiplierSN',
                   'dbpediaMatch',
                   'siExactMatch',
                   'plainTextDescription',
                   'description',
                   'ucumCodes',
                   'applicableSystems',
                   'qlabels']},
 'results': {'bindings': [{'applicableSystems': {'type': 'literal',
                                                 'value': 'http://qudt.org/vocab/sou/SI, '
                                                          'http://qudt.org/vocab/sou/CGS, '
      

In [8]:
# from jsonpath_ng import jsonpath, parse

# # use json path to extract all another info of same level in results.bindings where the identifier is results.bindings[*].applicableUnit.value and the value is "http://qudt.org/vocab/unit#Meter"
# jsonpath_expr = parse(
#     "$.results.bindings[?(@.applicableUnit.value == 'http://qudt.org/vocab/unit#Meter')]"
# )
# meter_unit_info = [match.value for match in jsonpath_expr.find(qudt_units_param_res)]

from jsonpath_ng.ext import parse
import uuid

# Use json path to extract all another info of same level in results.bindings where the identifier is results.bindings[*].applicableUnit.value and the value is "http://qudt.org/vocab/unit#Meter"
jsonpath_expr = parse(
    '$.results.bindings[?(@.applicableUnit.value = "http://qudt.org/vocab/unit/MilliPA-SEC")]'
)
# meter_unit_info = [match.value for match in jsonpath_expr.find(qudt_units_param_res)]
# pprint(meter_unit_info)

pprint(jsonpath_expr.find(qudt_units_param_res)[0].value)


def match_json_path_key(qudt_units_param_res, identifier="", key=""):
    jsonpath_expr = parse(
        f'$.results.bindings[?(@.applicableUnit.value = "{identifier}")].{key}.value'
    )
    return jsonpath_expr.find(qudt_units_param_res)[0].value


def match_object_json_path(qudt_units_param_res=None, identifier=""):
    # print(f"IDENTIFIER: {identifier}")
    jsonpath_expr = parse(
        f'$.results.bindings[?(@.applicableUnit.value = "{identifier}")]'
    )

    return jsonpath_expr.find(qudt_units_param_res)[0].value


def get_prefix_uuid(data=[], prefix=""):
    jsonpath_expr = parse(f'$[?(@.label = "{prefix}")].pid')
    return uuid.uuid5(
        namespace=uuid.NAMESPACE_URL, name=jsonpath_expr.find(data)[0].value
    )


# test
# pprint(
#     match_json_path_key(
#         qudt_units_param_res,
#         identifier="http://qudt.org/vocab/unit/KiloBYTE",
#         key="ucumCodes",
#     )
# )

pprint(
    match_object_json_path(
        qudt_units_param_res=qudt_units_param_res,
        identifier="http://qudt.org/vocab/unit/KiloBYTE",
    )
)

pprint(get_prefix_uuid(sidf_prefixes_de, "kilo"))

{'applicableSystems': {'type': 'literal',
                       'value': 'http://qudt.org/vocab/sou/SI, '
                                'http://qudt.org/vocab/sou/CGS, '
                                'http://qudt.org/vocab/sou/CGS-EMU, '
                                'http://qudt.org/vocab/sou/CGS-GAUSS'},
 'applicableUnit': {'type': 'uri',
                    'value': 'http://qudt.org/vocab/unit/MilliPA-SEC'},
 'conversionMultiplierSN': {'datatype': 'http://www.w3.org/2001/XMLSchema#double',
                            'type': 'literal',
                            'value': '0.001e0'},
 'description': {'datatype': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML',
                 'type': 'literal',
                 'value': '0.001-fold of the product of the SI derived unit '
                          'pascal and the SI base unit second'},
 'dimensionVector': {'type': 'uri',
                     'value': 'http://qudt.org/vocab/dimensionvector/A0E0L-1I0M1H0T-1D0'},
 'plainTextDe

## Map Attributes from Ontology Data to OSL Schema

In [9]:
import uuid

units = []


def dict_from_comma_separated_list(qlabel):
    parts = qlabel.split(", ")
    ret = {}
    for part in parts:
        value, key = part.split("@")
        ret[key] = value
    return ret


def get_osw_prefix_unit(
    qudt_units_param_res=None, prefixes_list=None, url=None, parent_uuid=None
):

    prefix_unit_dict = match_object_json_path(
        qudt_units_param_res=qudt_units_param_res,
        identifier=url,
    )
    # print(prefix_unit_dict)
    ontology_match_list = [prefix_unit_dict["applicableUnit"]["value"]]
    # print("dbpediaMatch" in prefix_unit_dict.keys())
    # print(prefix_unit_dict.keys())
    if "dbpediaMatch" in prefix_unit_dict.keys():
        ontology_match_list.append(prefix_unit_dict["dbpediaMatch"]["value"])
        # print(prefix_unit_dict["dbpediaMatch"]["value"])
    if "siExactMatch" in prefix_unit_dict:
        ontology_match_list.append(prefix_unit_dict["siExactMatch"]["value"])
    conversion_multiplier = None
    if "conversionMultiplierSN" in prefix_unit_dict:

        conversion_multiplier = prefix_unit_dict["conversionMultiplierSN"][
            "value"
        ]
        # print(conversion_multiplier)

    _uuid = str(uuid.uuid5(namespace=uuid.NAMESPACE_URL, name=url))
    # print(_uuid)
    prefix_unit = model.PrefixUnit(
        uuid=_uuid,
        osw_id="Item:OSW"
        + str(parent_uuid).replace("-", "")
        + "#OSW"
        + _uuid.replace("-", ""),
        prefix="Item:OSW"
        + str(get_prefix_uuid(prefixes_list, get_unit_prefix(url))).replace(
            "-", ""
        ),
        # prefix_symbol="",  # Causes edge case error
        main_symbol=match_json_path_key(
            qudt_units_param_res,
            identifier=url,
            key="symbol",
        ),
        exact_ontology_match=ontology_match_list,
        conversion_factor_from_si=conversion_multiplier,
        description=[{"text": "Description", "lang": "en"}],
    )

    return prefix_unit


i = 0
# Iteration over the unit_dict to create the QuantityUnit objects
for non_prefixed_unit_iri, unit_property_dict in unit_dict.items():
    name = non_prefixed_unit_iri.split("/")[-1]

    match_unit_dict = match_object_json_path(
        qudt_units_param_res=qudt_units_param_res,
        identifier=non_prefixed_unit_iri,
    )
    ontology_match_list = [match_unit_dict["applicableUnit"]["value"]]
    if "dbpediaMatch" in match_unit_dict.keys():
        ontology_match_list.append(match_unit_dict["dbpediaMatch"]["value"])
    if "siExactMatch" in match_unit_dict:
        ontology_match_list.append(match_unit_dict["siExactMatch"]["value"])
    conversion_multiplier = None
    if "conversionMultiplierSN" in match_unit_dict:
        conversion_multiplier = match_unit_dict["conversionMultiplierSN"][
            "value"
        ]
    # sequence of description before plainTextDescription is essential for overwriting
    description_list = None
    plainTextDescription = None
    if "description" in match_unit_dict:
        plainTextDescription = match_unit_dict["description"]["value"]
        description_list = [
            model.Description(
                text=plainTextDescription,
                lang="en",
            )
        ]
        # print(description_list)
    # overwrite description if plainTextDescription is present
    if "plainTextDescription" in match_unit_dict:
        plainTextDescription = match_unit_dict["plainTextDescription"]["value"]
        description_list = [
            model.Description(
                text=plainTextDescription,
                lang="en",
            )
        ]
        # print(description_list)
    if description_list == None:
        i += 1
        # print("No description found for ", name)

    qlabels = match_json_path_key(
        qudt_units_param_res,
        identifier=non_prefixed_unit_iri,
        key="qlabels",
    )

    label_dict = dict_from_comma_separated_list(qlabels)
    ### clean missing "en"
    if "" in label_dict.keys():
        label_dict["en"] = label_dict[""]
        del label_dict[""]

    osw_label_list = [
        model.Label(text=value, lang=key) for key, value in label_dict.items()
    ]
    symbol = match_json_path_key(
        qudt_units_param_res,
        identifier=non_prefixed_unit_iri,
        key="symbol",
    )
    _uuid = uuid.uuid5(
        namespace=uuid.NAMESPACE_URL, name=non_prefixed_unit_iri
    )
    # print(_uuid)
    prefix_unit_list = [
        get_osw_prefix_unit(
            qudt_units_param_res=qudt_units_param_res,
            prefixes_list=sidf_prefixes_de,
            url=url,
            parent_uuid=_uuid,
        )
        for url in unit_property_dict["prefixed_units"]
    ]

    unit = model.QuantityUnit(
        uuid=_uuid,
        exact_ontology_match=ontology_match_list,
        name=name,
        label=osw_label_list,
        main_symbol=symbol,
        prefix_units=prefix_unit_list,
        description=description_list,
        conversion_factor_from_si=conversion_multiplier,
    )
    units.append(unit)

pprint(units)
print("not described units couter: ", i)
print("num of non prfixed units: ", len(unit_dict.items()))

[QuantityUnit(type=['Category:OSWd2520fa016844e01af0097a85bb25b25'], uuid=UUID('7f1fb9d6-ddeb-5fb7-a8a3-9067553a6dc1'), exact_ontology_match=['http://qudt.org/vocab/unit/WB', 'http://dbpedia.org/resource/Weber', 'https://si-digital-framework.org/SI/units/weber'], close_ontology_match=None, rdf_type=None, iri=None, name='WB', label=[Label(text='Weber', lang='de'), Label(text='weber', lang='en')], short_name=None, query_label=None, description=[Description(text='The SI unit of magnetic flux. "Flux" is the rate (per unit of time) at which something crosses a surface perpendicular to the flow. The weber is a large unit, equal to $10^{8}$ maxwells, and practical fluxes are usually fractions of one weber. The weber is the magnetic flux which, linking a circuit of one turn, would produce in it an electromotive force of 1 volt if it were reduced to zero at a uniform rate in 1 second. In SI base units, the dimensions of the weber are $(kg \\cdot m^2)/(s^2 \\cdot A)$. The weber is commonly expre

## HANDLE Kilogram and Gram

In [10]:
# Step 1 - Create the QuantityUnit object of Kilogram
from pprint import pprint

# check if http://qudt.org/vocab/unit/KiloGM is in the list of all_prefixed_units
if "http://qudt.org/vocab/unit/KiloGM" in all_prefixed_units:
    print("http://qudt.org/vocab/unit/KiloGM is in all_prefixed_units")


unit_kg = model.PrefixUnit

unit = model.QuantityUnit(
    uuid=_uuid,
    exact_ontology_match=ontology_match_list,
    name=name,
    label=osw_label_list,
    main_symbol=symbol,
    # prefix_units=prefix_unit_list, # must be empty
    description=description_list,
    conversion_factor_from_si=conversion_multiplier,
)

http://qudt.org/vocab/unit/KiloGM is in all_prefixed_units


## Upload Units to DEV WIKI

In [11]:
# # Test of single unit
# # test_unit = units[6]
# # pprint(test_unit.dict())

# from osw.core import OSW

# # Upload single unit to OSW
# # osw_obj.delete_entity(test_unit)
# # osw_obj.store_entity(test_unit)

# # Upload all units to OSW
# # osw_obj.delete_entity(units)
# # osw_obj.store_entity(units)
# osw_obj.store_entity(OSW.StoreEntityParam(entities=units, overwrite=True))

# Quantity Kind

## Check uploaded units with quantities for unit reference

In [17]:
import uuid


# Already fetched all Quantity Kind from Qudt
qudt_quantitykind_bindings = qudt_qk["results"]["bindings"]
print("num of quantities: ", len(qudt_quantitykind_bindings))
# pprint(qudt_quantitykind_bindings[0])

match_uploaded_units_counter = 0
match_not_uploaded_units_counter = 0
undefined_non_prefixed_units_counter = 0
undefined_descriptions_counter = 0

for quantity_binding in qudt_quantitykind_bindings:

    # Set deterministic UUID for the quantity kind
    _uuid_quantity = uuid.uuid5(
        namespace=uuid.NAMESPACE_URL,
        name=quantity_binding["quantity"]["value"],
    )

    # Get all the prefixed and non prefixed units of the quantity kind
    non_prefixed_units, prefixed_units = split_prefixed_applicable_units(
        quantity_binding["applicableUnits"]["value"]
    )

    # Set deterministic UUIDs for the non prefixed units
    osw_item_uuids_non_prefixed_units = [
        f"Item:OSW{str(uuid.uuid5(namespace=uuid.NAMESPACE_URL, name=unit)).replace('-', '')}"
        for unit in non_prefixed_units
    ]

    # sequence of "description" before "plainTextDescription" is essential for overwriting
    description_list = None
    if "descriptions" in quantity_binding:
        description_split_list = quantity_binding["descriptions"][
            "value"
        ].split(" #,# ")
        # if len(description_split_list) > 1:
        #     # This is just one case, set default to use the first description and english
        #     print(description_split_list)
        description_list = [
            model.Description(
                text=description_split_list[0].strip(),
                lang="en",
            )
        ]
    # Overwrite description if plainTextDescription is present
    if "plainTextDescriptions" in quantity_binding:
        plain_description_split_list = quantity_binding[
            "plainTextDescriptions"
        ]["value"].split(" #,# ")
        # if len(plain_description_split_list) > 1:
        #     # This is just one case, set default to use the first description and englishSW
        #     print(plain_description_split_list)
        description_list = [
            model.Description(
                text=plain_description_split_list[0].strip(),
                lang="en",
            )
        ]

    qlabels = quantity_binding["labels"]["value"]
    label_dict = dict_from_comma_separated_list(qlabels)
    # print(label_dict)
    clean_label_dict = label_dict.copy()
    for lang, text in label_dict.items():
        # print(lang, text)
        # Remove item if "en-US" and "en" are present
        if lang == "en-US" and "en" in label_dict.keys():
            del clean_label_dict["en-US"]
        # Rename "en-US" to "en" if "en" is not present
        if lang == "en-US" and "en" not in label_dict.keys():
            clean_label_dict["en"] = text
            del clean_label_dict["en-US"]
        # Set default language to "en" if key is empty and "en" is not present
        if lang == "" and "en" not in label_dict.keys():
            clean_label_dict["en"] = clean_label_dict[""]
            del clean_label_dict[""]
        # Remove empty item if "en" is present
        if lang == "" and "en" in label_dict.keys():
            del clean_label_dict[""]

        # print(f"clean_label_dict: {clean_label_dict}")

    osw_label_list = [
        model.Label(text=value, lang=key)
        for key, value in clean_label_dict.items()
    ]

    # if "Polarizability" in quantity_binding["quantity"]["value"]:
    #     print(description_list)
    #     print(osw_label_list)
    #     print(
    #         f"osw_item_uuid_non_prefixed_units: {osw_item_uuids_non_prefixed_units}"
    #     )

    # Match uploaded Unit with applicable units of the Quantity Kind
    gramm_units = []
    matched_uploaded_units = []
    mateched_not_uploaded_units = []
    if non_prefixed_units != []:
        match_uploaded_units_counter += 1
    else:
        #
        gramm_units = remove_kilo_list(prefixed_units)
        # print(f"Gramm units: {gramm_units}")
        # print(f"Try to match with uploaded units")
        for gramm_unit in gramm_units:
            if gramm_unit in all_non_prefixed_units:
                print(
                    f"Quantity exists: {quantity_binding['quantity']['value']}"
                )
                print(f"Matched with uploaded unit: {gramm_unit}")
                # match_uploaded_units_counter += 1
                matched_uploaded_units.append(gramm_unit)

            else:
                # print(
                #     f"Non prefixed units empty for {quantity_binding['quantity']['value']}"
                # )
                # print(f"No match found for: {gramm_unit}")
                mateched_not_uploaded_units = prefixed_units

        if matched_uploaded_units != []:
            match_uploaded_units_counter += 1
        else:
            match_not_uploaded_units_counter += 1
            print(
                f"QuantityKind of not uploaded units: {quantity_binding['quantity']['value']}"
            )
            print(
                f"Mateched not uploaded units: {mateched_not_uploaded_units}"
            )

    # osw_quantity = model.QuantityKind(
    #     uuid=_uuid_quantity,
    #     label=osw_label_list,
    #     description=description_list,
    #     exact_ontology_match=[quantity_binding["quantity"]["value"]],
    #     units=osw_item_uuids_non_prefixed_units,
    # )

    # print(f"OSW Quantity Kind: {osw_quantity.dict()}")
    # throw error if osw_item_uuids_non_prefixed_units is empty

    if len(osw_item_uuids_non_prefixed_units) == 0:
        undefined_non_prefixed_units_counter += 1
        # print(f"Empty units for {osw_quantity.exact_ontology_match}")
        # print(f"prefixed_units: {prefixed_units}")

    if description_list == None:
        undefined_descriptions_counter += 1
        # print(f"Empty description for {osw_quantity.exact_ontology_match}")


print(f"match_uploaded_units_counter: {match_uploaded_units_counter}")
print(f"match_not_uploaded_units_counter: {match_not_uploaded_units_counter}")
print(
    f"undefined_non_prefixed_units_counter: {undefined_non_prefixed_units_counter}"
)
print(f"undefined_descriptions_counter: {undefined_descriptions_counter}")

num of quantities:  329
Quantity exists: http://qudt.org/vocab/quantitykind/MassConcentrationOfWaterVapour
Matched with uploaded unit: http://qudt.org/vocab/unit/GM-PER-M3
QuantityKind of not uploaded units: http://qudt.org/vocab/quantitykind/SpecificVolume
Mateched not uploaded units: ['http://qudt.org/vocab/unit/CentiM3-PER-GM', 'http://qudt.org/vocab/unit/DeciL-PER-GM', 'http://qudt.org/vocab/unit/L-PER-KiloGM', 'http://qudt.org/vocab/unit/M3-PER-KiloGM', 'http://qudt.org/vocab/unit/MilliL-PER-GM', 'http://qudt.org/vocab/unit/MilliL-PER-KiloGM', 'http://qudt.org/vocab/unit/MilliM3-PER-GM', 'http://qudt.org/vocab/unit/MilliM3-PER-KiloGM']
QuantityKind of not uploaded units: http://qudt.org/vocab/quantitykind/ExposureRate
Mateched not uploaded units: ['http://qudt.org/vocab/unit/C-PER-KiloGM-SEC']
QuantityKind of not uploaded units: http://qudt.org/vocab/quantitykind/InverseEnergy
Mateched not uploaded units: ['http://qudt.org/vocab/unit/PER-KiloV-A-HR']
QuantityKind of not uploaded u

### Helper for OSW Domain Check by unit_path

In [13]:
import uuid


def get_qudt_osl_item(
    qudt_uri="http://qudt.org/vocab/unit/GM-PER-M3",
    osl_domain="https://wiki-dev.open-semantic-lab.org/wiki/Item:OSW",
):

    return osl_domain + str(
        uuid.uuid5(
            namespace=uuid.NAMESPACE_URL,
            name=qudt_uri,
        )

    ).replace("-", "")


get_qudt_osl_item()

'https://wiki-dev.open-semantic-lab.org/wiki/Item:OSW9161ae8b61ca56a687f8db32bdf2ddd3'

### Manually update missing Qudt non-prefixed-units

In [14]:
import requests
import re

qudt_unit_domain = "http://qudt.org/vocab/unit/"
missing_non_prefixed_units_GM = [
    "M3-PER-GM",
    "GM-PER-M3",
    "L-PER-GM",
    "GM-PER-L",
    "C-PER-GM-SEC",
    "GM-M2",
    "MOL-PER-GM",
    "J-PER-GM-K-M3",
    "GM-K",
    "GM-M",
    "GM-PER-PA-SEC-M",
    "BQ-PER-GM",
    "RAD-M2-PER-GM",
]

# TODO: handle others (prefixes must be deleted, just to show where prefix is used)
missing_non_prefixed_units_others = [
    "PER-KiloV-A-HR",
    "CentiM-SEC-DEG_C",
    "MilliL-PER-CentiM2-MIN",
    "MilliMOL-PER-L",
    "MegaEV-FemtoM",
    "NanoH",
]

# For each missing non prefixed unit, check if URI exists
for unit in missing_non_prefixed_units_GM:
    res = requests.get(qudt_unit_domain + unit)
    print(f"{qudt_unit_domain + unit}: {res.status_code}")
    # If exists show the url of the osw item
    if res.status_code == 200:
        print(
            f"osw url: {get_qudt_osl_item(qudt_uri=qudt_unit_domain + unit)}\n"
        )
    # If not exists check if prefixed unit with "Kilo added to GM" exists
    if res.status_code == 404:
        # Use Regex to replace "GM" with "KiloGM"
        print(
            "above unit not found in QUDT ontology, try to replace GM with KiloGM"
        )
        unit = re.sub(r"GM", "KiloGM", unit)
        res = requests.get(qudt_unit_domain + unit)
        print(f"{qudt_unit_domain + unit}: {res.status_code}\n")

http://qudt.org/vocab/unit/M3-PER-GM: 404
above unit not found in QUDT ontology, try to replace GM with KiloGM
http://qudt.org/vocab/unit/M3-PER-KiloGM: 200

http://qudt.org/vocab/unit/GM-PER-M3: 200
osw url: https://wiki-dev.open-semantic-lab.org/wiki/Item:OSW9161ae8b61ca56a687f8db32bdf2ddd3

http://qudt.org/vocab/unit/L-PER-GM: 404
above unit not found in QUDT ontology, try to replace GM with KiloGM
http://qudt.org/vocab/unit/L-PER-KiloGM: 200

http://qudt.org/vocab/unit/GM-PER-L: 200
osw url: https://wiki-dev.open-semantic-lab.org/wiki/Item:OSW754b1a3564725113ac583f91ae2ea959

http://qudt.org/vocab/unit/C-PER-GM-SEC: 404
above unit not found in QUDT ontology, try to replace GM with KiloGM
http://qudt.org/vocab/unit/C-PER-KiloGM-SEC: 200

http://qudt.org/vocab/unit/GM-M2: 404
above unit not found in QUDT ontology, try to replace GM with KiloGM
http://qudt.org/vocab/unit/KiloGM-M2: 200

http://qudt.org/vocab/unit/MOL-PER-GM: 404
above unit not found in QUDT ontology, try to replace GM

In [15]:
print(get_qudt_osl_item(qudt_uri="http://qudt.org/vocab/unit/GM"))

https://wiki-dev.open-semantic-lab.org/wiki/Item:OSW3c9bf4c3682f5a52b6e99f7ad7949903
