In [1]:
import pandas as pd
from wbmaker import WikibaseConnection
from wikibaseintegrator import models, datatypes

In [2]:
eew = WikibaseConnection('eew')

In [3]:
properties = eew.properties()
properties.head()

Unnamed: 0,property,propertyLabel,property_type,pid,p_type
0,https://eew-edgi.wikibase.cloud/entity/P1,instance of,http://wikiba.se/ontology#WikibaseItem,P1,WikibaseItem
1,https://eew-edgi.wikibase.cloud/entity/P2,subclass of,http://wikiba.se/ontology#WikibaseItem,P2,WikibaseItem
2,https://eew-edgi.wikibase.cloud/entity/P3,related wikidata property,http://wikiba.se/ontology#ExternalId,P3,ExternalId
3,https://eew-edgi.wikibase.cloud/entity/P4,related wikidata item,http://wikiba.se/ontology#ExternalId,P4,ExternalId
4,https://eew-edgi.wikibase.cloud/entity/P5,formatter URL,http://wikiba.se/ontology#String,P5,String


In [19]:
def process_table_source(ds_qid, prov_statement):
    # Get datasource config
    ds_config = eew.datasource(ds_qid, output='dict')
    c = ds_config[ds_qid]
    
    # Retrieve source data
    source_df = eew.get_html_table(c['interface_url'])

    # Pull in property reference
    properties = eew.properties()
    property_lookup = properties.set_index('propertyLabel')['pid'].to_dict()
    property_types = properties.set_index('pid')['p_type'].to_dict()
    
    # Build reference to source by pulling source QID from config dataframe
    refs = models.References()
    refs.add(
        datatypes.Item(
            prop_nr=property_lookup['data source'],
            value=ds_qid
        )
    )
    
    # Iterate through each record in source table
    for index, row in source_df.iterrows():
        # Assume new for right now
        item = eew.wbi.item.new()
        
        item.labels.set('en', row[c['label_prop']])

        if c['description_prop'] is not None:
            desc = row[c['description_prop']]
        else:
            desc = f'an item representing a {c["instance_of_label"]}'
        item.descriptions.set('en', desc)
        
        if c['alias_prop'] is not None:
            item.aliases.set('en', row[c['alias_prop']])
            
        # Add instance of claim
        item.claims.add(
            datatypes.Item(
                prop_nr=property_lookup['instance of'],
                value=c['instance_of_qid'],
                references=refs
            )
        )

        # Add individual claims indicated in the config
        for claim_config in c['claims']:
            claim_property_type = property_types[claim_config['pid']]
            
            if claim_property_type == 'WikibaseItem':
                item.claims.add(
                    datatypes.Item(
                        prop_nr=claim_config['pid'],
                        value=row[claim_config['source_prop']],
                        references=refs
                    )
                )

            elif claim_property_type == 'ExternalId':
                item.claims.add(
                    datatypes.ExternalID(
                        prop_nr=claim_config['pid'],
                        value=row[claim_config['source_prop']],
                        references=refs
                    )
                )
            elif claim_property_type == 'String':
                item.claims.add(
                    datatypes.String(
                        prop_nr=claim_config['pid'],
                        value=row[claim_config['source_prop']],
                        references=refs
                    )
                )
            elif claim_property_type == 'GlobeCoordinate':
                coord_props = claim_config['source_prop'].split(',')
                lat_value = float(row[coord_props[0]])
                lon_value = float(row[coord_props[0]])
                item.claims.add(
                    datatypes.GlobeCoordinate(
                        prop_nr=claim_config['pid'],
                        latitude=lat_value,
                        longitude=lon_value,
                        references=refs
                    )
                )
            elif claim_property_type == 'Url':
                item.claims.add(
                    datatypes.URL(
                        prop_nr=claim_config['pid'],
                        value=row[claim_config['source_prop']],
                        references=refs
                    )
                )
            elif claim_property_type == 'Quantity':
                item.claims.add(
                    datatypes.Quantity(
                        prop_nr=claim_config['pid'],
                        value=row[claim_config['source_prop']],
                        references=refs
                    )
                )

        print(item)
        # item_response = item.write(summary="added new American Indian Reservation item from TIGER source")    
        # print("PROCESSED:", row.NAME, item_response.id)

In [20]:
process_table_source('Q321', 'Maybe this will work?')

<ItemEntity @371e50 _BaseEntity__api=<wikibaseintegrator.wikibaseintegrator.WikibaseIntegrator object at 0x7f2138371210>
	 _BaseEntity__title=None
	 _BaseEntity__pageid=None
	 _BaseEntity__lastrevid=None
	 _BaseEntity__type='item'
	 _BaseEntity__id=None
	 _BaseEntity__claims=<Claims @37e950 _Claims__claims={'P1': [<Item @37f810 _Claim__mainsnak=<Snak @37e390 _Snak__snaktype=<WikibaseSnakType.KNOWN_VALUE: 'value'> _Snak__property_number='P1' _Snak__hash=None _Snak__datavalue={'value': {'entity-type': 'item', 'numeric-id': 323, 'id': 'Q323'}, 'type': 'wikibase-entityid'} _Snak__datatype='wikibase-item'> _Claim__type='statement' _Claim__qualifiers=<Qualifiers @37c350 _Qualifiers__qualifiers={}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank=<WikibaseRank.NORMAL: 'normal'> _Claim__removed=False _Claim__references=<References @4826d0 _References__references=[<Reference @373ad0 _Reference__hash=None _Reference__snaks=<Snaks @48cb50 snaks={'P6': [<Snak @49ae90 _Snak__snaktype=<Wikib

# Alaska Native Regional Corporations

In [4]:
ds_qid = 'Q321'
ds_wb_source = eew.datasource(ds_qid, output='dict')
ds_wb_source

{'Q321': {'label_prop': 'NAME',
  'description_prop': None,
  'alias_prop': 'BASENAME',
  'claims': [{'pid': 'P25', 'label': 'GNIS ID', 'source_prop': 'ANRCNS'},
   {'pid': 'P11',
    'label': 'coordinate location',
    'source_prop': 'CENTLAT,CENTLON'},
   {'pid': 'P36', 'label': 'TIGER GEOID', 'source_prop': 'GEOID'}],
  'interface_type': 'html table',
  'interface_url': 'https://tigerweb.geo.census.gov/tigerwebmain/Files/bas23/tigerweb_bas23_anrc_us.html',
  'instance_of_qid': 'Q323',
  'instance_of_label': 'Alaska Native Regional Corporation'}}

In [None]:


anrc_data = eew.get_html_table(url=anrc_data_source['html_table'])

In [None]:
# refs = models.References()
# refs.add(
#     datatypes.Item(
#         prop_nr='P6',
#         value='Q321'
#     )
# )

# for index, row in anrc_data.iterrows():
#     item = eew.wbi.item.new()
#     item.labels.set('en', row.NAME)
#     item.descriptions.set('en', 'an Alaska Native Regional Corporation identified in TIGER source data')
#     item.aliases.set('en', row.BASENAME)

#     item.claims.add(
#         datatypes.Item(
#             prop_nr='P1',
#             value='Q323',
#             references=refs
#         )
#     )

#     item.claims.add(
#         datatypes.ExternalID(
#             prop_nr='P25',
#             value=row.ANRCNS.lstrip('0'),
#             references=refs
#         )
#     )

#     item.claims.add(
#         datatypes.ExternalID(
#             prop_nr='P36',
#             value=row.GEOID,
#             references=refs
#         )
#     )

#     item.claims.add(
#         datatypes.GlobeCoordinate(
#             prop_nr='P11',
#             latitude=float(row.CENTLAT),
#             longitude=float(row.CENTLON),
#             references=refs
#         )
#     )

#     item_response = item.write(summary="added new ANRC item from TIGER source")    
#     print("PROCESSED:", row.NAME, item_response.id)

# Federal American Indian Reservations

In [None]:
indian_res_source_qid = 'Q337'
indian_res_source = datasources[indian_res_source_qid]
indian_res_data = eew.get_html_table(url=indian_res_source['html_table'])

In [None]:
indian_res_data.head()

In [None]:
refs = models.References()
refs.add(
    datatypes.Item(
        prop_nr=properties['data source'],
        value=indian_res_source_qid
    )
)

for index, row in indian_res_data.iterrows():
    item = eew.wbi.item.new()
    item.labels.set('en', row.NAME)
    item.descriptions.set('en', 'a Federal American Indian Reservation identified in TIGER source data')
    item.aliases.set('en', row.BASENAME)

    item.claims.add(
        datatypes.Item(
            prop_nr=properties['instance of'],
            value=indian_res_source['entity_classifier'].split('/')[-1],
            references=refs
        )
    )

    item.claims.add(
        datatypes.ExternalID(
            prop_nr=properties['GNIS ID'],
            value=row.AIANNHNS.lstrip('0'),
            references=refs
        )
    )

    item.claims.add(
        datatypes.ExternalID(
            prop_nr=properties['TIGER GEOID'],
            value=row.GEOID,
            references=refs
        )
    )

    item.claims.add(
        datatypes.GlobeCoordinate(
            prop_nr=properties['coordinate location'],
            latitude=float(row.CENTLAT),
            longitude=float(row.CENTLON),
            references=refs
        )
    )

    item_response = item.write(summary="added new American Indian Reservation item from TIGER source")    
    print("PROCESSED:", row.NAME, item_response.id)

# Hawaiian Home Lands

In [None]:
hawaiian_home_land_source_id = 'Q522'
hawaiian_home_land_config = html_table_datasources[hawaiian_home_land_source_id]
hawaiian_home_land_config

In [None]:
hawaiian_home_land_data = eew.get_html_table(url=hawaiian_home_land_config['source_url'])
hawaiian_home_land_data.head()

In [None]:
model = datatypes.Item()

In [None]:
item = eew.wbi.item.get('Q522')

In [None]:
item.get_json()