# Use pywikibot

[pywikibot](https://www.mediawiki.org/wiki/Manual:Pywikibot) is a python library based on the Mediawiki API.
In this notebook we will see how to use the API using Python with pywikibot and lay the groundwork to later develop a bot or tool for Wikidata.

Use pywikibot for Wikidata:

- https://www.mediawiki.org/wiki/Manual:Pywikibot/Wikidata
- https://www.mediawiki.org/wiki/Manual:Pywikibot/Scripts#Wikidata
- https://www.wikidata.org/wiki/Wikidata:Creating_a_bot

If you want to setup pywikibot on your computer, check this tutorial: https://www.wikidata.org/wiki/Wikidata:Pywikibot_-_Python_3_Tutorial/Setting_up_Shop

Quick steps:

1. Create a new directory for your project
1. Clone `pywikibot` in this directory: `git clone --recursive https://gerrit.wikimedia.org/r/pywikibot/core.git pywikibot`
1. Run `python generate_user_files.py` to create user-config.py
1. Run `python pwb.py login` to login with your account

In [17]:
import pywikibot
import requests
import csv

def load_item_from_repo(repo, item_id):
    item = pywikibot.ItemPage(repo, item_id)
    item.get()
    return item

def existing_claim_from_year(item, year):
    try:
        claims = item.claims['P1082']
        time_str = pywikibot.WbTime(year=year).toTimestr()
        for claim in claims:
            for qualifier_value in claim.qualifiers['P585']:
                if (qualifier_value.getTarget().toTimestr() == time_str):
                    return claim
    except KeyError:
        pass
    return None

In [4]:
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()

In [8]:
zurich = pywikibot.ItemPage(repo, 'Q72').get()
zurich

{u'aliases': {u'bho': [u'\u091c\u094d\u092f\u0941\u0930\u093f\u0916',
   u'\u091c\u0942\u0930\u093f\u0916',
   u'\u091c\u093c\u094d\u092f\u0942\u0930\u093f\u0916\u093c',
   u'\u091c\u094d\u092f\u0942\u0930\u093f\u0937'],
  u'en': [u'City of Zurich', u'Zurich', u'ZH'],
  u'es': [u'Zurich'],
  u'fr': [u'Z\xfcrich', u'Zuerich'],
  u'gl': [u'Z\xfcrich'],
  u'kk-cyrl': [u'\u0426\u044e\u0440\u0438\u0445 \u049b\u0430\u043b\u0430\u0441\u044b'],
  u'kn': [u'\u0c9c\u0ccd\u0caf\u0cc2\u0cb0\u0cbf\u200d\u0c95\u0ccd'],
  u'ms': [u'Zurich'],
  u'nl': [u'Zurich'],
  u'pl': [u'Miasto Zurychu', u'ZH'],
  u'sq': [u'Zyrih', u'Zyrihu'],
  u'sr': [u'\u0413\u0440\u0430\u0434 \u0426\u0438\u0440\u0438\u0445',
   u'\u0417\u0443\u0440\u0438\u0445',
   u'ZH'],
  u'ta': [u'\u0b9c\u0bc2\u0bb0\u0bbf\u0b95\u0bcd'],
  u'th': [u'\u0e0b\u0e39\u0e23\u0e34\u0e01'],
  u'zh': [u'\u8607\u9ece\u4e16']},
 u'claims': {u'P1036': [Claim.fromJSON(DataSite("wikidata", "wikidata"), {u'type': u'statement', u'mainsnak': {u'datatype': 

## Let's create a bot to keep the population counts up to date

The data is available from Open Data Zurich, in the dataset [Bevölkerung nach Stadtquartier, seit 1970](https://data.stadt-zuerich.ch/dataset/bev_bestand_jahr_quartier_seit1970_od3240).

In [31]:
result = requests.get(
    'https://data.stadt-zuerich.ch/api/3/action/package_show?id=bev_bestand_jahr_quartier_seit1970_od3240'
)
dataset = result.json()['result']
population_url = dataset['resources'][0]['url']
data = requests.get(population_url)
cr = csv.reader(data.content.splitlines(), delimiter=',')
rows = list(cr)
for row in rows:
    print(row)

['\xef\xbb\xbf"StichtagDatJahr"', 'QuarSort', 'QuarLang', 'AnzBestWir']
['2018', '123', 'Hirzenbach', '12801']
['2017', '123', 'Hirzenbach', '12627']
['2016', '123', 'Hirzenbach', '12463']
['2015', '123', 'Hirzenbach', '11930']
['2014', '123', 'Hirzenbach', '11679']
['2013', '123', 'Hirzenbach', '11153']
['2012', '123', 'Hirzenbach', '11404']
['2011', '123', 'Hirzenbach', '11516']
['2010', '123', 'Hirzenbach', '11459']
['2009', '123', 'Hirzenbach', '11610']
['2008', '123', 'Hirzenbach', '11478']
['2007', '123', 'Hirzenbach', '11343']
['2006', '123', 'Hirzenbach', '11205']
['2005', '123', 'Hirzenbach', '11265']
['2004', '123', 'Hirzenbach', '11336']
['2003', '123', 'Hirzenbach', '11432']
['2002', '123', 'Hirzenbach', '11434']
['2001', '123', 'Hirzenbach', '11302']
['2000', '123', 'Hirzenbach', '11281']
['1999', '123', 'Hirzenbach', '11119']
['1998', '123', 'Hirzenbach', '11015']
['1997', '123', 'Hirzenbach', '11013']
['1996', '123', 'Hirzenbach', '10990']
['1995', '123', 'Hirzenbach', '

In [35]:
# get rid of header
rows.pop(0)

['2018', '123', 'Hirzenbach', '12801']

In [36]:
# mapping between quarter id and wikidata item
zurich_quarters = {
    '11': "Q692511",
    '12': "Q39240",
    '13': "Q10987378",
    '14': "Q1093831",
    '21': "Q642353",
    '23': "Q691367",
    '24': "Q648218",
    '33': "Q693357",
    '31': "Q433012",
    '34': "Q370104",
    '41': "Q531899",
    '42': "Q1805410",
    '44': "Q870084",
    '51': "Q693413",
    '52': "Q687052",
    '61': "Q656446",
    '63': "Q693483",
    '71': "Q693269",
    '72': "Q693454",
    '73': "Q476940",
    '74': "Q392079",
    '81': "Q692773",
    '82': "Q693397",
    '83': "Q693321",
    '91': "Q80797",
    '92': "Q445711",
    '101': "Q455496",
    '102': "Q678030",
    '111': "Q382903",
    '115': "Q167179",
    '119': "Q276792",
    '121': "Q652455",
    '122': "Q692728",
    '123': "Q693374",
}

In [None]:
population_prop_id = 'P1082'
time_prop_id = 'P585'
url_prop_id = 'P854'

# Loop over CSV file
for row in rows:
    year = row[0]
    qnr = row[1]
    quarter = row[2]
    population_value = row[3]
    
    
    # load item
    item_id = zurich_districts[qnr]
    item = load_item_from_repo(repo, item_id)

    # check if we already have an existing claim
    population_claim = existing_claim_from_year(item, year)
    if (population_claim is None):
        # population claim
        population_claim = pywikibot.Claim(repo, population_prop_id)
        population_claim.setTarget(
            pywikibot.WbQuantity(amount=population_value))
        item.addClaim(population_claim)

        # time qualifier
        qualifier = pywikibot.Claim(repo, time_prop_id)
        yearObj = pywikibot.WbTime(year=year)
        qualifier.setTarget(yearObj)
        population_claim.addQualifier(qualifier)

        # source
        source = pywikibot.Claim(repo, url_prop_id)
        source.setTarget(CSV_FILE_URL)
        population_claim.addSource(source)
        print ("Added population claim "
               "to %s for year %d") % (item_id, year)

        # when adding a new claim wait some time to make the API happy
        time.sleep(15)
    else:
        print ("Population claim already exists "
               "on %s for year %d, skipping") % (item_id, year)
    except pywikibot.data.api.APIError as e:
        print("API Error: %s" % (e))
        break

In [None]:
population_prop_id = 'P1082'
time_prop_id = 'P585'
url_prop_id = 'P854'

# Loop over CSV file
for row in rows:
    # load item
    item_id = zurich_districts[district['qnr']]
    item = load_item_from_repo(repo, item_id)

    year_list = range(1970, 2018)
    try:
        for year in year_list:
            population_claim = existing_claim_from_year(item, year)
            if (population_claim is None):
                # population claim
                population_value = district['wbev_%d' % year]
                population_claim = pywikibot.Claim(repo, population_prop_id)
                population_claim.setTarget(
                    pywikibot.WbQuantity(amount=population_value))
                item.addClaim(population_claim)

                # time qualifier
                qualifier = pywikibot.Claim(repo, time_prop_id)
                yearObj = pywikibot.WbTime(year=year)
                qualifier.setTarget(yearObj)
                population_claim.addQualifier(qualifier)

                # source
                source = pywikibot.Claim(repo, url_prop_id)
                source.setTarget(CSV_FILE_URL)
                population_claim.addSource(source)
                print ("Added population claim "
                       "to %s for year %d") % (item_id, year)

                # when adding a new claim wait some time to make the API happy
                time.sleep(15)
            else:
                print ("Population claim already exists "
                       "on %s for year %d, skipping") % (item_id, year)
    except pywikibot.data.api.APIError as e:
        print("API Error: %s" % (e))
        break