In [None]:
import pandas as pd
from wbmaker import WikibaseConnection
import time

In [None]:
geokb = WikibaseConnection('GEOKB_CLOUD')

In [None]:
references = geokb.models.References()
references.add(
    geokb.datatypes.Item(
        prop_nr=geokb.prop_lookup['knowledge source'],
        value=geokb.ref_lookup['Mineral Deposit Classification Scheme - CMMI, Hofstra et. al.']
    )
)

In [None]:
cmmi_classification = pd.read_excel("data/cmmi_classification.xlsx")

In [None]:
dep_env_items = []

for dep_env in cmmi_classification['Deposit Environment'].unique():
    item = geokb.wbi.item.new()
    item.labels.set('en', f'{dep_env} mineral deposit environment')
    item.descriptions.set('en', 'a mineral deposit environment classification value developed through the Critical Minerals Mapping Initiative')
    item.aliases.set('en', dep_env)

    item.claims.add(
        geokb.datatypes.Item(
            prop_nr=geokb.prop_lookup['instance of'],
            value=geokb.class_lookup['mineral deposit environment'],
            references=references
        )
    )

    response = item.write(summary="Added new mineral deposit environment item")
    dep_env_items.append({
        "Deposit Environment": dep_env,
        "dep_env_qid": response.id
    })

In [None]:
cmmi_classification = pd.merge(
    left=cmmi_classification,
    right=pd.DataFrame(dep_env_items),
    how="left",
    on="Deposit Environment"
)

In [None]:
# dep_group_items = []

for index, row in cmmi_classification[cmmi_classification.dep_group_qid.isnull()][["Deposit Group","dep_env_qid"]].drop_duplicates().iterrows():
    item = geokb.wbi.item.new()
    item.labels.set('en', f'{row["Deposit Group"]} mineral deposit group')
    item.aliases.set('en', row['Deposit Group'])
    item.descriptions.set('en', 'a mineral deposit group classification name developed through the Critical Minerals Mapping Initiative')

    item.claims.add(
        geokb.datatypes.Item(
            prop_nr=geokb.prop_lookup['instance of'],
            value=geokb.class_lookup['mineral deposit group'],
            references=references
        )
    )

    item.claims.add(
        geokb.datatypes.Item(
            prop_nr=geokb.prop_lookup['mineral deposit environment'],
            value=row.dep_env_qid,
            references=references
        )
    )

    try:
        response = item.write(summary="Added new mineral deposit group item")
        dep_group_items.append({
            "Deposit Group": row["Deposit Group"],
            "dep_group_qid": response.id
        })
    except:
        pass

In [None]:
cmmi_classification = pd.merge(
    left=cmmi_classification.drop(columns=["dep_group_qid"]),
    right=pd.DataFrame(dep_group_items),
    how="left",
    on="Deposit Group"
)

In [None]:
for index, row in cmmi_classification.iterrows():
    aliases = [row["Deposit Type"]]
    if isinstance(row.Synonyms, str):
        aliases.extend([i.strip() for i in row.Synonyms.split(",")])
    item = geokb.wbi.item.new()
    item.labels.set('en', f'{row["Deposit Type"]} mineral deposit type')
    item.aliases.set('en', aliases)
    item.descriptions.set('en', 'a mineral deposit type classification name developed through the Critical Minerals Mapping Initiative')

    item.claims.add(
        geokb.datatypes.Item(
            prop_nr=geokb.prop_lookup['instance of'],
            value=geokb.class_lookup['mineral deposit type'],
            references=references
        )
    )

    item.claims.add(
        geokb.datatypes.Item(
            prop_nr=geokb.prop_lookup['mineral deposit environment'],
            value=row.dep_env_qid,
            references=references
        )
    )

    item.claims.add(
        geokb.datatypes.Item(
            prop_nr=geokb.prop_lookup['mineral deposit group'],
            value=row.dep_group_qid,
            references=references
        )
    )

    try:
        response = item.write(summary="Added new mineral deposit type item from source")
        print(row["Deposit Type"], response.id)
    except Exception as e:
        print(e)

I needed to go back here and add in the property that indicates all of these (deposit environment, deposit group, and deposit type) are classifiers for mineral deposits. There's probably a more elegant way to handle this with an upsert type of approach that will check all of the claims for a given item and add in anything missing. I need to benchmark the time it takes to find items based on labels and instance of classification.

I ran into a problem here with the Xentity Wikibase instance where I'm killing it with about 50 or so item write operations. The services all end up rebooting eventually, but we've got to get beyond that challenge. I'm experimenting with slowing down an already slow serial process.

In [None]:
items = geokb.url_sparql_query(sparql_url="https://sparql.demo5280.com/proxy/wdqs/bigdata/namespace/wdq/sparql?query=SELECT%20%3Fitem%0AWHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP2%20%3Fclasses%20.%0A%20%20VALUES%20%3Fclasses%20%7B%20wd%3AQ185%20wd%3AQ186%20wd%3AQ187%20%7D%0A%7D")
item_ids = [i['item']['value'].split('/')[-1] for i in items["results"]["bindings"]]

classfying_items = geokb.url_sparql_query(sparql_url="https://sparql.demo5280.com/proxy/wdqs/bigdata/namespace/wdq/sparql?query=SELECT%20%3Fitem%0AWHERE%20%7B%0A%20%20%3Fitem%20wdt%3AP61%20wd%3AQ445%20.%0A%7D")
classifying_item_ids = [i['item']['value'].split('/')[-1] for i in classfying_items["results"]["bindings"]]

for qid in item_ids:
    if qid not in classifying_item_ids:
        item = geokb.wbi.item.get(qid)
        item.claims.add(
            geokb.datatypes.Item(
                prop_nr=geokb.prop_lookup['classifies'],
                value=geokb.class_lookup['mineral deposit'],
                references=references
            )
        )
        item.write(summary="Added classifies mineral deposit statement")
        print(qid)
        time.sleep(1)