This notebook retrieves the mining districts found in the mineral deposits [website](https://mrdata.usgs.gov/deposit/). It first pulls the WFS layers found in the API endpoint and retrieves the polygon layer which has the mining districts of interest.

In [50]:
import geopandas as gpd
import pandas as pd
import requests
import fiona
import os
from fiona import BytesCollection
import shapely
from shapely.geometry import Polygon
from wbmaker import WikibaseConnection
from dotenv import load_dotenv
from arcgis.gis import GIS

load_dotenv()

fiona.drvsupport.supported_drivers['WFS'] = 'r'

In [51]:
def wfs2gp_df(TYPENAMES, url, bbox=None, wfs_version="2.0.0", outputFormat='application/gml+xml; version=3.2'):
    params = dict(service='WFS', version=wfs_version,request='GetCapabilities', TYPENAMES=TYPENAMES, outputFormat=outputFormat)
    with BytesCollection(requests.get(url,params=params).content) as f:
        df = gpd.GeoDataFrame.from_features(f, crs='EPSG:4326')
    return df

mining_districts = wfs2gp_df('polygons', 'https://mrdata.usgs.gov/services/wfs/deposit',None, '1.1.0', 'json')

In [52]:
def read_wfs(url, params):
    # params = dict(service='WFS', version=wfs_version,request='GetFeature', TYPENAMES=TYPENAMES, outputFormat=outputFormat)
    with BytesCollection(requests.get(url,params=params).content) as f:
        df = gpd.GeoDataFrame.from_features(f, crs='EPSG:4326')
    return df
#request=GetFeature&service=WFS&version=1.1.0&TYPENAMES=polygons
u = 'https://mrdata.usgs.gov/services/wfs/deposit'
p = {
    'request':'GetFeature',
    'service':'WFS',
    'version':'1.1.0',
    'typeName':'polygons',
    'srsName': 'urn:ogc:def:crs:EPSG::4326'
}
df = read_wfs(u, p)

In [53]:
df

Unnamed: 0,geometry,gml_id,site_id,ftr_id,ftr_name,last_updt,area_sqkm,area_acres,remarks
0,"POLYGON ((57.62461 -136.02944, 57.62042 -136.0...",polygons.2,AK00206,Mr00144,Chichagof District,2018-04-13,,,
1,"POLYGON ((57.98463 -136.43539, 57.98468 -136.4...",polygons.5,AK00205,Mo00736,South Takanis Body,2018-04-13,,,
2,"POLYGON ((57.97468 -136.42604, 57.97468 -136.4...",polygons.6,AK00205,Mo00731,Tunnel Body,2018-04-13,,,
3,"POLYGON ((57.97661 -136.42628, 57.97658 -136.4...",polygons.7,AK00205,Mo00729,West Tripod Body,2018-04-13,,,
4,"POLYGON ((57.97542 -136.42074, 57.97550 -136.4...",polygons.8,AK00205,Mo00734,South Muskeg Body,2018-04-13,,,
...,...,...,...,...,...,...,...,...,...
476,"POLYGON ((34.84018 -118.73031, 34.84018 -118.7...",polygons.601,CA00068,Mo00727,Upper Butler,2018-04-13,,,
477,"POLYGON ((34.83695 -118.74219, 34.83695 -118.7...",polygons.602,CA00067,Mo00726,"Meeke, East gossan",2018-04-13,,,
478,"POLYGON ((34.92931 -113.90943, 34.92856 -113.9...",polygons.626,AZ00002,Mr00131,Boriana Mineral District,2020-05-01,7.31962822544,1808.71952485,
479,"POLYGON ((35.78144 -117.32381, 35.77790 -117.3...",polygons.628,CA00060,Mo00593,"Searles Lake, Searles Lake brine deposit",2018-12-13,105.021966073,25951.4929888,


In [54]:
mining_districts

Unnamed: 0,geometry,gml_id,site_id,ftr_id,ftr_name,last_updt,doi,area_sqkm,area_acres,remarks
0,"POLYGON ((46.74615 -87.88469, 46.74619 -87.884...",polygons.20,MI00001,Mo00363,Eagle East,2017-08-31,10.5066/P9V74HIU,,,
1,"POLYGON ((47.82946 -91.67692, 47.82990 -91.679...",polygons.25,MN00003,Mo00722,Spruce Road,2018-04-10,10.5066/P9V74HIU,,,
2,"POLYGON ((46.74872 -87.89662, 46.74872 -87.896...",polygons.21,MI00001,Mo00362,Eagle,2017-08-31,10.5066/P9V74HIU,,,
3,"POLYGON ((47.81905 -91.72215, 47.81936 -91.721...",polygons.22,MN00003,Mo00721,Maturi,2018-04-10,10.5066/P9V74HIU,,,
4,"POLYGON ((47.71916 -91.81861, 47.71935 -91.820...",polygons.23,MN00003,Mo00719,Birch Lake,2018-04-10,10.5066/P9V74HIU,,,
...,...,...,...,...,...,...,...,...,...,...
708,"POLYGON ((34.84020 -118.72938, 34.84020 -118.7...",polygons.601,CA00068,Mo00727,Upper Butler,2018-04-13,10.5066/P97JYNJL,,,
709,"POLYGON ((34.83697 -118.74126, 34.83697 -118.7...",polygons.602,CA00067,Mo00726,"Meeke, East gossan",2018-04-13,10.5066/P97JYNJL,,,
710,"POLYGON ((34.92930 -113.90865, 34.92856 -113.9...",polygons.626,AZ00002,Mr00131,Boriana Mineral District,2020-05-01,10.5066/P9XA8MJ4,7.31962822544,1808.71952485,
711,"POLYGON ((35.78148 -117.32292, 35.77795 -117.3...",polygons.628,CA00060,Mo00593,"Searles Lake, Searles Lake brine deposit",2018-12-13,10.5066/P9XA8MJ4,105.021966073,25951.4929888,


In [55]:
mining_districts.head()

Unnamed: 0,geometry,gml_id,site_id,ftr_id,ftr_name,last_updt,doi,area_sqkm,area_acres,remarks
0,"POLYGON ((46.74615 -87.88469, 46.74619 -87.884...",polygons.20,MI00001,Mo00363,Eagle East,2017-08-31,10.5066/P9V74HIU,,,
1,"POLYGON ((47.82946 -91.67692, 47.82990 -91.679...",polygons.25,MN00003,Mo00722,Spruce Road,2018-04-10,10.5066/P9V74HIU,,,
2,"POLYGON ((46.74872 -87.89662, 46.74872 -87.896...",polygons.21,MI00001,Mo00362,Eagle,2017-08-31,10.5066/P9V74HIU,,,
3,"POLYGON ((47.81905 -91.72215, 47.81936 -91.721...",polygons.22,MN00003,Mo00721,Maturi,2018-04-10,10.5066/P9V74HIU,,,
4,"POLYGON ((47.71916 -91.81861, 47.71935 -91.820...",polygons.23,MN00003,Mo00719,Birch Lake,2018-04-10,10.5066/P9V74HIU,,,


In [56]:
def reverse_coords(geom: Polygon) -> Polygon:
    coords = geom.exterior.coords.xy
    reverse = tuple(zip(coords[1], coords[0]))
    return Polygon(reverse)

In [57]:
mining_districts['geometry'] = [reverse_coords(x) for x in mining_districts['geometry']]
mining_districts.head()

Unnamed: 0,geometry,gml_id,site_id,ftr_id,ftr_name,last_updt,doi,area_sqkm,area_acres,remarks
0,"POLYGON ((-87.88469 46.74615, -87.88474 46.746...",polygons.20,MI00001,Mo00363,Eagle East,2017-08-31,10.5066/P9V74HIU,,,
1,"POLYGON ((-91.67692 47.82946, -91.67956 47.829...",polygons.25,MN00003,Mo00722,Spruce Road,2018-04-10,10.5066/P9V74HIU,,,
2,"POLYGON ((-87.89662 46.74872, -87.89657 46.748...",polygons.21,MI00001,Mo00362,Eagle,2017-08-31,10.5066/P9V74HIU,,,
3,"POLYGON ((-91.72215 47.81905, -91.72119 47.819...",polygons.22,MN00003,Mo00721,Maturi,2018-04-10,10.5066/P9V74HIU,,,
4,"POLYGON ((-91.81861 47.71916, -91.82029 47.719...",polygons.23,MN00003,Mo00719,Birch Lake,2018-04-10,10.5066/P9V74HIU,,,


In [58]:
mining_districts = mining_districts.drop_duplicates(subset=['site_id'])


In [59]:
gis = GIS()
counties = gis.content.get('14c5450526a8430298b2fa74da12c2f4')
counties


  max_retries=Retry(
  numViews = locale.format("%d", self.numViews, grouping=True)


In [60]:
layer = counties.layers[0]
q = layer.query(where="1=1", out_fields=['*'], as_df=True, out_sr=4326)

In [61]:
q.head()

Unnamed: 0,COUNTY_FIPS,FIPS,NAME,OBJECTID,POPULATION,POP_SQMI,SHAPE,SQMI,STATE_ABBR,STATE_FIPS,STATE_NAME,Shape__Area,Shape__Length
0,1,1001,Autauga County,1,58805,97.3,"{""rings"": [[[-86.413120727, 32.7073921370001],...",604.37,AL,1,Alabama,0.150256,2.066033
1,3,1003,Baldwin County,2,231767,141.9,"{""rings"": [[[-87.5649079999999, 30.281622], [-...",1633.14,AL,1,Alabama,0.398404,9.305629
2,5,1005,Barbour County,3,25223,27.9,"{""rings"": [[[-85.257838372, 32.147937056], [-8...",904.52,AL,1,Alabama,0.22327,2.69526
3,7,1007,Bibb County,4,22293,35.6,"{""rings"": [[[-87.06574294, 33.2469132270001], ...",626.17,AL,1,Alabama,0.156473,1.887519
4,9,1009,Blount County,5,59134,90.9,"{""rings"": [[[-86.453024823, 34.259323463], [-8...",650.63,AL,1,Alabama,0.164405,2.423466


In [62]:
from shapely import wkt
mining_districts['centroid'] = [wkt.loads(str(x)).centroid for x in mining_districts['geometry']]
mining_districts.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Unnamed: 0,geometry,gml_id,site_id,ftr_id,ftr_name,last_updt,doi,area_sqkm,area_acres,remarks,centroid
0,"POLYGON ((-87.88469 46.74615, -87.88474 46.746...",polygons.20,MI00001,Mo00363,Eagle East,2017-08-31,10.5066/P9V74HIU,,,,POINT (-87.8802497576474 46.746424629721176)
1,"POLYGON ((-91.67692 47.82946, -91.67956 47.829...",polygons.25,MN00003,Mo00722,Spruce Road,2018-04-10,10.5066/P9V74HIU,,,,POINT (-91.67306264135848 47.83455981687981)
5,"POLYGON ((-91.95769 47.62485, -91.95477 47.625...",polygons.24,MN00002,Mo00746,NorthMet,2018-04-16,10.5066/P9V74HIU,,,,POINT (-91.97153846066311 47.61665047292834)
7,"POLYGON ((-90.11476 37.61021, -90.11857 37.528...",polygons.27,MO00001,Mr00036,Fredericktown District,2018-06-07,10.5066/P9V74HIU,,,,POINT (-90.2061544546558 37.57042461027146)
9,"POLYGON ((-76.41196 40.26707, -76.41213 40.267...",polygons.41,PA00001,Mo00749,Cornwall,2018-04-18,10.5066/P9V74HIU,,,,POINT (-76.4122036035501 40.26710794585181)


In [63]:
def fix_label(label: str) -> str:
    if 'Mining District' not in label:
        return label  + ' Mining District'
    if 'District' in label and not 'Mining District' in label:
        return label.replace('District','Mining District')
    return label

labels = [ fix_label(x) for x in mining_districts['ftr_name'] ]
aliases = [ x if 'mining district' not in x.lower() else '' for x in mining_districts['ftr_name'] ]

In [64]:
mining_districts['label'] = labels
mining_districts['aliases'] = aliases

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [65]:
# make sure all shapes are valid before comparing
q['SHAPE'] = [shapely.make_valid(x.as_shapely) if not shapely.is_valid(x.as_shapely) else x.as_shapely for x in q['SHAPE']]
mining_districts['geometry'] = [shapely.make_valid(x) if not shapely.is_valid(x) else x for x in mining_districts['geometry']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [66]:
def match_shapes(poly: Polygon, rows: pd.DataFrame) -> tuple:
    if not shapely.is_valid(poly):
        poly = shapely.make_valid(poly)
    for i in range(len(rows)):
        row = rows.iloc[i]
        multipoly = row['SHAPE']
        if poly.intersects(multipoly):
            return row['STATE_NAME'], row['NAME']
    return '', ''

In [67]:
county_state = []
for i in range(len(mining_districts)):
    district = mining_districts.iloc[i]
    ct_st_match = match_shapes(district['geometry'], q)
    # if either state or county is '', then match not found
    if '' in ct_st_match:
        print('Shape Match not found for provided Polygon\n',f'District not matched: {district["ftr_name"]}')
    county_state.append(ct_st_match)

In [68]:
mining_districts['state'] = [x[0] for x in county_state]
mining_districts['county'] = [x[1] for x in county_state]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [69]:
mining_districts.head()

Unnamed: 0,geometry,gml_id,site_id,ftr_id,ftr_name,last_updt,doi,area_sqkm,area_acres,remarks,centroid,label,aliases,state,county
0,"POLYGON ((-87.88469 46.74615, -87.88474 46.746...",polygons.20,MI00001,Mo00363,Eagle East,2017-08-31,10.5066/P9V74HIU,,,,POINT (-87.8802497576474 46.746424629721176),Eagle East Mining District,Eagle East,Michigan,Marquette County
1,"POLYGON ((-91.67692 47.82946, -91.67956 47.829...",polygons.25,MN00003,Mo00722,Spruce Road,2018-04-10,10.5066/P9V74HIU,,,,POINT (-91.67306264135848 47.83455981687981),Spruce Road Mining District,Spruce Road,Minnesota,Lake County
5,"POLYGON ((-91.95769 47.62485, -91.95477 47.625...",polygons.24,MN00002,Mo00746,NorthMet,2018-04-16,10.5066/P9V74HIU,,,,POINT (-91.97153846066311 47.61665047292834),NorthMet Mining District,NorthMet,Minnesota,St. Louis County
7,"POLYGON ((-90.11476 37.61021, -90.11857 37.528...",polygons.27,MO00001,Mr00036,Fredericktown District,2018-06-07,10.5066/P9V74HIU,,,,POINT (-90.2061544546558 37.57042461027146),Fredericktown District Mining District,Fredericktown District,Missouri,Bollinger County
9,"POLYGON ((-76.41196 40.26707, -76.41213 40.267...",polygons.41,PA00001,Mo00749,Cornwall,2018-04-18,10.5066/P9V74HIU,,,,POINT (-76.4122036035501 40.26710794585181),Cornwall Mining District,Cornwall,Pennsylvania,Lebanon County


In [70]:
import re
def check_for_format_error(districts_df: gpd.GeoDataFrame) -> list:
    # return any dates that are not in YYYY-MM-DD format
    return [x for x in districts_df['last_updt'] if not re.search(r'\d\d\d\d-\d\d-\d\d', x)]

check_for_format_error(mining_districts)

['2018-3-16']

In [71]:
import datetime
def fix_time(t:str, t_format: str) -> str:
    # converts to YYYY-MM-DD format
    d = datetime.datetime.strptime(t, t_format)
    return d.strftime('%Y-%m-%d')

In [72]:
new_time = [fix_time(x, '%Y-%m-%d') if not re.search(r'\d\d\d\d-\d\d-\d\d', x) else x for x in mining_districts['last_updt']]
mining_districts['last_updt'] = new_time
# verify that date format in last_updt column has been fixed
check_for_format_error(mining_districts)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


[]

In [73]:
today = datetime.date.today()
mining_districts['date_processed'] = today.isoformat()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [74]:
mining_districts.head()

Unnamed: 0,geometry,gml_id,site_id,ftr_id,ftr_name,last_updt,doi,area_sqkm,area_acres,remarks,centroid,label,aliases,state,county,date_processed
0,"POLYGON ((-87.88469 46.74615, -87.88474 46.746...",polygons.20,MI00001,Mo00363,Eagle East,2017-08-31,10.5066/P9V74HIU,,,,POINT (-87.8802497576474 46.746424629721176),Eagle East Mining District,Eagle East,Michigan,Marquette County,2023-08-22
1,"POLYGON ((-91.67692 47.82946, -91.67956 47.829...",polygons.25,MN00003,Mo00722,Spruce Road,2018-04-10,10.5066/P9V74HIU,,,,POINT (-91.67306264135848 47.83455981687981),Spruce Road Mining District,Spruce Road,Minnesota,Lake County,2023-08-22
5,"POLYGON ((-91.95769 47.62485, -91.95477 47.625...",polygons.24,MN00002,Mo00746,NorthMet,2018-04-16,10.5066/P9V74HIU,,,,POINT (-91.97153846066311 47.61665047292834),NorthMet Mining District,NorthMet,Minnesota,St. Louis County,2023-08-22
7,"POLYGON ((-90.11476 37.61021, -90.11857 37.528...",polygons.27,MO00001,Mr00036,Fredericktown District,2018-06-07,10.5066/P9V74HIU,,,,POINT (-90.2061544546558 37.57042461027146),Fredericktown District Mining District,Fredericktown District,Missouri,Bollinger County,2023-08-22
9,"POLYGON ((-76.41196 40.26707, -76.41213 40.267...",polygons.41,PA00001,Mo00749,Cornwall,2018-04-18,10.5066/P9V74HIU,,,,POINT (-76.4122036035501 40.26710794585181),Cornwall Mining District,Cornwall,Pennsylvania,Lebanon County,2023-08-22


In [75]:
name = 'GEOKB_CLOUD'
geokb = WikibaseConnection(name)

In [76]:
def item_search(label: str, instance_of: str, bot_name: str) -> str:
  sparql_endpoint = os.environ[f'WB_SPARQL_{bot_name}']
  query = f'''PREFIX wdt: <https://geokb.wikibase.cloud/prop/direct/>
  PREFIX wd:  <https://geokb.wikibase.cloud/entity/>
  SELECT ?item
  WHERE {{
    ?item rdfs:label ?label ;
       wdt:P1 wd:{instance_of} .
    FILTER CONTAINS( LCASE(?label), "{label.lower()}") .

    SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }}
  }}
  '''

  params = {
      'query': query,
      'format': 'json'
  }

  res = requests.get(sparql_endpoint, params=params, timeout=100)
  json_res =res.json()
  item_result = (json_res['results']['bindings'][0]['item']['value']
                  if 'results' in json_res
                  and len(json_res['results']['bindings']) > 0
                  and 'item' in json_res['results']['bindings'][0]
                  else None)
  return item_result.split('/')[-1] if item_result is not None else None

In [241]:
def add_new_item(cols:dict, instance_of_val: str) -> None:
    keep = geokb.action_if_exists.KEEP
    aor = geokb.action_if_exists.APPEND_OR_REPLACE
    replace = geokb.action_if_exists.REPLACE_ALL
    
    references = geokb.models.References()
    districts_ref = geokb.datatypes.URL(
        prop_nr=geokb.prop_lookup['reference URL'],
        value='https://mrdata.usgs.gov/services/wfs/deposit?request=GetCapabilities&service=WFS&version=1.1.0',
    )
    updt_ref = geokb.datatypes.Time(
        prop_nr=geokb.prop_lookup['last update'],
        time=cols['last_updt']+'T00:00:00Z',
    )
    references.add(districts_ref)
    references.add(updt_ref)

    item = geokb.wbi.item.new()
    item.labels.set('en', cols['label'], action_if_exists=replace)
    item.descriptions.set(
        'en', 
        f'Mining district found in {cols["county"]}, {cols["state"]}',
        action_if_exists=replace
    )
    item.claims.add(
        geokb.datatypes.Item(
                prop_nr=geokb.prop_lookup["instance of"],
                value=instance_of_val,
                references=references
        )
    )
    item.claims.add(
        geokb.datatypes.ExternalID(
            prop_nr=geokb.prop_lookup["Feature ID"],
            value=cols['ftr_id'],
            references=references
        )
    )
    item.claims.add(
        geokb.datatypes.ExternalID(
            prop_nr=geokb.prop_lookup["Site ID"],
            value=cols['site_id'],
            references=references
        )
    )
    item.claims.add(
        geokb.datatypes.ExternalID(
            prop_nr=geokb.prop_lookup["DOI"],
            value=cols['doi'],
            references=references
        )
    )
    item.claims.add(
        geokb.datatypes.Item(
            prop_nr=geokb.prop_lookup["U.S. county"],
            value=cols['county'],
            references=references
        )
    )
    item.claims.add(
        geokb.datatypes.Item(
            prop_nr=geokb.prop_lookup["U.S. state"],
            value=cols['state'],
            references=references
        )
    )
    item.claims.add( geokb.datatypes.Time(
            prop_nr=geokb.prop_lookup['retrieved'],
            time=cols['date_processed']+'T00:00:00Z',
        )
    )
    response = item.write(
        summary="Added mining districts"
    )
    print(cols['ftr_name'], response.id)

In [127]:
min_copy = mining_districts.copy().reset_index(drop=True)

In [129]:
county_instance_of = 'Q481'
mining_districts['county'].replace(r'St\.', 'Saint',regex=True, inplace=True)
county_items = [
    item_search(f'{county}, {state}', county_instance_of, name) 
    for (county,state) 
    in zip(mining_districts['county'], min_copy['state'])
]

In [130]:

mining_districts['county_qid'] = county_items
mining_districts.head()

Unnamed: 0,geometry,gml_id,site_id,ftr_id,ftr_name,last_updt,doi,area_sqkm,area_acres,remarks,centroid,label,aliases,state,county,date_processed,county_qid
0,"POLYGON ((-87.88469 46.74615, -87.88474 46.746...",polygons.20,MI00001,Mo00363,Eagle East,2017-08-31,10.5066/P9V74HIU,,,,POINT (-87.8802497576474 46.746424629721176),Eagle East Mining District,Eagle East,Michigan,Marquette County,2023-08-22,Q51002
1,"POLYGON ((-91.67692 47.82946, -91.67956 47.829...",polygons.25,MN00003,Mo00722,Spruce Road,2018-04-10,10.5066/P9V74HIU,,,,POINT (-91.67306264135848 47.83455981687981),Spruce Road Mining District,Spruce Road,Minnesota,Lake County,2023-08-22,Q51956
2,"POLYGON ((-91.95769 47.62485, -91.95477 47.625...",polygons.24,MN00002,Mo00746,NorthMet,2018-04-16,10.5066/P9V74HIU,,,,POINT (-91.97153846066311 47.61665047292834),NorthMet Mining District,NorthMet,Minnesota,Saint Louis County,2023-08-22,Q51975
3,"POLYGON ((-90.11476 37.61021, -90.11857 37.528...",polygons.27,MO00001,Mr00036,Fredericktown District,2018-06-07,10.5066/P9V74HIU,,,,POINT (-90.2061544546558 37.57042461027146),Fredericktown District Mining District,Fredericktown District,Missouri,Bollinger County,2023-08-22,Q52056
4,"POLYGON ((-76.41196 40.26707, -76.41213 40.267...",polygons.41,PA00001,Mo00749,Cornwall,2018-04-18,10.5066/P9V74HIU,,,,POINT (-76.4122036035501 40.26710794585181),Cornwall Mining District,Cornwall,Pennsylvania,Lebanon County,2023-08-22,Q53874


In [84]:
min_dist_instance_of = 'Q55213'

for i in range(len(mining_districts)):
    fields = dict(mining_districts.iloc[i])
    qid = item_search(fields['ftr_name'], min_dist_instance_of, name)
    add_new_item(fields, min_dist_instance_of)
    break

[]


In [85]:
geokb.class_lookup

{'person': 'Q3',
 'document': 'Q5',
 'publication': 'Q6',
 'dataset': 'Q12',
 'scientific model': 'Q13',
 'project': 'Q14',
 'phenomenon': 'Q16',
 'geographic region': 'Q26',
 'property': 'Q25354',
 'spatio-temporal entity': 'Q25355',
 'location': 'Q25356',
 'object': 'Q25360',
 'mineral deposit environment': 'Q26093',
 'mineral deposit type': 'Q26097',
 'mineral deposit group': 'Q26111',
 'knowledgebase source': 'Q26267',
 'identifier': 'Q41264',
 'system': 'Q41254',
 'role': 'Q44322',
 'government organization': 'Q50861',
 'report': 'Q8',
 'scholarly article': 'Q7',
 'book': 'Q138410',
 'chapter': 'Q138411',
 'conference paper': 'Q138412',
 'government report': 'Q9',
 'NI 43-101 Technical Report': 'Q10',
 'thesis': 'Q138413',
 'newsletter': 'Q138414',
 'USGS Numbered Series': 'Q11',
 'USGS Open-File Report': 'Q50820',
 'USGS Scientific Investigations Report': 'Q50819',
 'USGS Fact Sheet': 'Q50821',
 'USGS Professional Paper': 'Q50822',
 'USGS Scientific Investigations Map': 'Q50823',