This notebook took care of a little bit of cleanup on the GeoKB items representing chemical elements, bringing the items into alignment with the classification scheme used in the Geoscience Ontology and adding same as links to GSO IRIs (along with existing links for USGS Thesaurus terms, Mindat links, and Wikidata links).

I found that there were some elements missing from the GSO GSEL module along with at least one mistake (IRI for gsel_argon has triples for arsenic with no subclass for gsel:arsenic). I will submit a pull request to correct the mistakes if I find that the project is still active. Otherwise, we may need to fork the GSO for our own purposes.

This work dropped the classifications for minerals and mineral commodities, so those will all be added separately when we add in all minerals from the GSO mineral material module.

In [31]:
import pandas as pd
from rdflib import Graph, URIRef
from rdflib.namespace import RDF, RDFS
from wbmaker import WikibaseConnection

geokb = WikibaseConnection('GEOKB_CLOUD')


In [3]:
gso_gsel = Graph()
gso_gsel.parse(
    'https://raw.githubusercontent.com/Loop3D/GKM/master/Loop3D-GSO/Modules/GSO-Element.ttl',
    format='turtle'
)

<Graph identifier=N569926bec19d440b9145b6cca52dfd05 (<class 'rdflib.graph.Graph'>)>

In [34]:
query_elements = """
SELECT *
WHERE {
    ?iri rdfs:subClassOf gsoc:Element ;
         rdfs:label ?label ;
         gsel:atomicNumber ?atomicNumber ;
         gsel:symbol ?symbol ;
         gsel:wikidataid ?wikidataid .
}
"""

gso_elements = geokb.df_from_graph(gso_gsel.query(query_elements))
gso_elements.head()


Unnamed: 0,iri,label,atomicNumber,symbol,wikidataid
0,https://w3id.org/gso/element/actinium,actinium,89,Ac,https://www.wikidata.org/wiki/Q1121
1,https://w3id.org/gso/element/aluminium,aluminium,13,Al,https://www.wikidata.org/wiki/Q663
2,https://w3id.org/gso/element/americium,americium,95,Am,https://www.wikidata.org/wiki/Q663
3,https://w3id.org/gso/element/antimony,antimony,51,Sb,http://www.wikidata.org/entity/Q1099
4,https://w3id.org/gso/element/argon,arsenic,18,As,http://www.wikidata.org/entity/Q871


In [39]:
q_geokb_elements = """
PREFIX wd: <https://geokb.wikibase.cloud/entity/>
PREFIX wdt: <https://geokb.wikibase.cloud/prop/direct/>

SELECT ?item ?itemLabel ?element_symbol ?atomic_number ?same_as
WHERE {
  ?item wdt:P1 wd:Q280 ;
        wdt:P17 ?element_symbol ;
        wdt:P101 ?atomic_number .
  OPTIONAL {
    ?item wdt:P84 ?same_as .
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
"""

geokb_elements = geokb.sparql_query(q_geokb_elements)
geokb_elements['qid'] = geokb_elements['item'].apply(lambda x: x.split('/')[-1])

geokb_elements = pd.merge(
    left=geokb_elements[['qid','itemLabel','element_symbol','atomic_number']].drop_duplicates(),
    right=geokb_elements[['qid','same_as']].groupby('qid', as_index=False).agg(list),
    how='left',
    on='qid'
)

In [43]:
geokb_element_updates = pd.merge(
    left=geokb_elements,
    right=gso_elements[['iri','wikidataid','symbol']].rename(columns={'symbol':'element_symbol'}),
    how='left',
    on='element_symbol'
)
geokb_element_updates['same_as_links'] = geokb_element_updates.apply(lambda x: x['same_as'] + [x['iri']] + [x['wikidataid']] if pd.notnull(x['iri']) else x['same_as'], axis=1)

In [45]:
gso_ref = geokb.datatypes.Item(
    prop_nr=geokb.prop_lookup['knowledge source'],
    value='Q161225'
)

for _, row in geokb_element_updates.iterrows():
    item = geokb.wbi.item.get(row['qid'])

    item.claims.remove(geokb.prop_lookup['instance of'])
    item.claims.add(
        geokb.datatypes.Item(
            prop_nr=geokb.prop_lookup['subclass of'],
            value='Q280',
            references=[gso_ref] if pd.notnull(row['iri']) else []
        ),
        action_if_exists=geokb.action_if_exists.REPLACE_ALL
    )

    same_as_claims = []
    for l in row['same_as_links']:
        same_as_claims.append(
            geokb.datatypes.URL(
                prop_nr=geokb.prop_lookup['same as'],
                value=l
            )
        )
    item.claims.add(same_as_claims, action_if_exists=geokb.action_if_exists.REPLACE_ALL)

    response = item.write(
        summary='Updating element data from GSO'
    )
    print(response.id)

Q392
Q393
Q394
Q395
Q396
Q397
Q398
Q399
Q400
Q401
Q402
Q403
Q404
Q389
Q390
Q391
Q292
Q293
Q294
Q295
Q296
Q297
Q298
Q299
Q300
Q301
Q302
Q303
Q304
Q305
Q306
Q307
Q308
Q309
Q310
Q311
Q311
Q312
Q313
Q314
Q315
Q316
Q317
Q318
Q319
Q320
Q321
Q322
Q323
Q324
Q325
Q326
Q327
Q328
Q329
Q330
Q331
Q332
Q333
Q334
Q335
Q336
Q337
Q338
Q339
Q340
Q341
Q342
Q343
Q344
Q345
Q346
Q347
Q348
Q349
Q350
Q351
Q352
Q353
Q354
Q355
Q356
Q357
Q358
Q359
Q360
Q361
Q362
Q363
Q364
Q365
Q366
Q367
Q368
Q369
Q370
Q371
Q372
Q373
Q374
Q375
Q376
Q377
Q378
Q379
Q380
Q381
Q382
Q383
Q384
Q385
Q386
Q387
Q388
