This notebook retrieves the mining districts found in the mineral deposits [website](https://mrdata.usgs.gov/deposit/). It first pulls the WGS layers found in the API endpoint and retrieves the polygon layer which has the mining districts of interest.

In [13]:
import geopandas as gpd
import requests
import fiona
import os
from fiona import BytesCollection
from wbmaker import WikibaseConnection
from dotenv import load_dotenv

load_dotenv()

fiona.drvsupport.supported_drivers['WFS'] = 'r'

In [14]:
def wfs2gp_df(layer_name, url, bbox=None, wfs_version="2.0.0", outputFormat='application/gml+xml; version=3.2'):
    params = dict(service='WFS', version=wfs_version,request='GetFeature', typeName=layer_name, outputFormat=outputFormat)
    with BytesCollection(requests.get(url,params=params).content) as f:
        df = gpd.GeoDataFrame.from_features(f)
    return df

mining_districts = wfs2gp_df('polygons', 'https://mrdata.usgs.gov/services/wfs/deposit?request=GetCapabilities&service=WFS&version=1.1.0', '1.1.0', 'json')

ConnectionError: HTTPSConnectionPool(host='mrdata.usgs.gov', port=443): Max retries exceeded with url: /services/wfs/deposit?request=GetCapabilities&service=WFS&version=1.1.0&service=WFS&version=json&request=GetFeature&typeName=polygons&outputFormat=application%2Fgml%2Bxml%3B+version%3D3.2 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fd7c4ca4640>: Failed to establish a new connection: [Errno 101] Network is unreachable'))

In [None]:
mining_districts.head()

Unnamed: 0,geometry,gml_id,site_id,ftr_id,ftr_name,last_updt,doi,area_sqkm,area_acres,remarks
0,"POLYGON ((46.746 -87.885, 46.746 -87.885, 46.7...",polygons.20,MI00001,Mo00363,Eagle East,2017-08-31,10.5066/P9V74HIU,,,
1,"POLYGON ((47.829 -91.677, 47.830 -91.680, 47.8...",polygons.25,MN00003,Mo00722,Spruce Road,2018-04-10,10.5066/P9V74HIU,,,
2,"POLYGON ((46.749 -87.897, 46.749 -87.897, 46.7...",polygons.21,MI00001,Mo00362,Eagle,2017-08-31,10.5066/P9V74HIU,,,
3,"POLYGON ((47.819 -91.722, 47.819 -91.721, 47.8...",polygons.22,MN00003,Mo00721,Maturi,2018-04-10,10.5066/P9V74HIU,,,
4,"POLYGON ((47.719 -91.819, 47.719 -91.820, 47.7...",polygons.23,MN00003,Mo00719,Birch Lake,2018-04-10,10.5066/P9V74HIU,,,


In [None]:
name = 'GEOKB_CLOUD'
geokb = WikibaseConnection(name)

In [None]:
def item_search(label: str, instance_of: str, bot_name: str):
  sparql_endpoint = os.environ[f'WB_SPARQL_{bot_name}']
  query = f'''PREFIX wdt: <https://geokb.wikibase.cloud/prop/direct/>
  PREFIX wd:  <https://geokb.wikibase.cloud/entity/>
  SELECT ?item
  WHERE {{
    ?item rdfs:label ?label ;
       wdt:P1 wd:{instance_of} .
    FILTER CONTAINS( LCASE(?label), "{label.lower()}") .

    SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }}
  }}
  '''

  params = {
      'query': query,
      'format': 'json'
  }

  res = requests.get(sparql_endpoint, params=params, timeout=100)
  json_res =res.json()
  item_result = (json_res['results']['bindings'][0]['item']['value']
                  if 'results' in json_res
                  and len(json_res['results']['bindings']) > 0
                  and 'item' in json_res['results']['bindings'][0]
                  else None)
  return item_result.split('/')[-1] if item_result is not None else None

In [None]:
districts_ref = geokb.datatypes.URL(
    prop_nr=geokb.prop_lookup['reference URL'],
    value='https://mrdata.usgs.gov/deposit/'
)

In [12]:
instance_of_val = 'Q55213'
for i in range(2,len(mining_districts)):
    fields = dict(mining_districts.iloc[i])
    qid = item_search(fields['ftr_name'], instance_of_val, name)
    if qid is None:
        item = geokb.wbi.item.new()
        references = geokb.models.References()
        updt_ref = geokb.datatypes.Time(
            prop_nr=geokb.prop_lookup['Last Update'],
            time=fields['last_updt']+'T00:00:00Z'
        )
        references.add(districts_ref)
        references.add(updt_ref)
        item.labels.set('en', fields['ftr_name'])
        item.descriptions.set('en', '')
        item.claims.add(
            geokb.datatypes.Item(
                    prop_nr=geokb.prop_lookup["instance of"],
                    value=instance_of_val
            )
        )
        item.claims.add(
            geokb.datatypes.ExternalID(
                prop_nr=geokb.prop_lookup["Feature ID"],
                value=fields['ftr_id']
            )
        )
        item.claims.add(
            geokb.datatypes.ExternalID(
                prop_nr=geokb.prop_lookup["Site ID"],
                value=fields['site_id']
            )
        )
        item.claims.add(
            geokb.datatypes.ExternalID(
                prop_nr=geokb.prop_lookup["DOI"],
                value=fields['doi']
            )
        )
        # response = item.write(
        #     summary="Added mining districts"
        # )
        # print(fields['ftr_name'], response.id)
    else:
        print(f'{fields["ftr_name"]} found in the GeoKB, ID: {qid}')
    break