# Extract data from Wikipedia Infobox Templates into csv file
## Overview
* list_template_usage: takes Site object and Template name and returns a Generator
* extract_template_data: takes a Generator, a Template name, a Field name and returns a List

In [1]:
import pywikibot
import re

site = pywikibot.Site("en", 'wikipedia')

In [2]:
def template_name_replace_underscores(template_name):
    return template_name.replace('_', ' ')

In [3]:
from pywikibot import pagegenerators as pg

def list_template_usage(site_obj, template_name):
    """
    Takes Site object and template name and returns a generator.

    The function expects a Site object (pywikibot.Site()) and
    a template name (String). It creates a list of all
    pages using that template and returns them as a generator.
    The generator will load 50 pages at a time for iteration.
    """
    template_name = template_name_replace_underscores(template_name)
    name = "{}:{}".format(site_obj.namespace(10), template_name)
    tmpl_page = pywikibot.Page(site_obj, name)
    ref_gen = tmpl_page.getReferences(follow_redirects=False)
    filter_gen = pg.NamespaceFilterPageGenerator(ref_gen, namespaces=[0])
    generator = site_obj.preloadpages(filter_gen, pageprops=True)
    return generator

In [4]:
def convert_field(field):
    #convert template in feet
    match = re.search(r"{{(c|C)onvert\|\d+\.?\d*\|(ft|feet)(\||})", field)
    if match:
        ft = re.search(r"\d+\.?\d*", match[0])[0]
        return(ft,'ft')
    #convert template in metre
    match = re.search(r"{{(c|C)onvert\|\d+\.?\d*\|m(\||})", field)
    if match:
        metre = re.search(r"\d+\.?\d*", match[0])[0]
        return(metre,'m')
    #convert template in mm
    match = re.search(r"{{(c|C)onvert\|\d+\.?\d*\|mm(\||})", field)
    if match:
        mm = re.search(r"\d+\.?\d*", match[0])[0]
        return(mm,'mm')
    #convert template in kg
    match = re.search(r"{{(c|C)onvert\|\d+\.?\d*\|kg(\||})", field)
    if match:
        kg = re.search(r"\d+\.?\d*", match[0])[0]
        return(kg,'kg')
    #convert template in lb
    match = re.search(r"{{(c|C)onvert\|\d+\.?\d*\|lb(\||})", field)
    if match:
        lb = re.search(r"\d+\.?\d*", match[0])[0]
        return(lb,'lb')
    #convert template in sqft
    match = re.search(r"{{(c|C)onvert\|\d+\.?\d*\|sqft(\||})", field)
    if match:
        sqft = re.search(r"\d+\.?\d*", match[0])[0]
        return(sqft,'sqft')    
    #convert template in m2
    match = re.search(r"{{(c|C)onvert\|\d+\.?\d*\|m2(\||})", field)
    if match:
        m2 = re.search(r"\d+\.?\d*", match[0])[0]
        return(m2,'m2')  
    #field in metre
    match = re.search(r"\d+\.?\d*m|\d+\.?\d*\sm", field)
    if match:
        metre = re.search(r"\d+\.?\d*", match[0])[0]
        return(metre,'m')
    #field in ft
    match = re.search(r"\d+\.?\d*ft|\d+\.?\d*\sft", field)
    if match:
        ft = re.search(r"\d+\.?\d*", match[0])[0]
        return(ft,'ft')
    #field wikipedia entry -> wikidata qid
    match = re.search(r"(\[\[.*\||\[\[.*\]\])", field)
    if match:
        article = match[0].replace("[[","").replace("]]","")
        page = pywikibot.Page(site, article)
        qid = pywikibot.ItemPage.fromPage(page).id
        return("",qid)
    else:
        return(field,"")


print(convert_field("{{convert|9.15|m|abbr=on}}"))
print(convert_field("{{Convert|36|ft|m|abbr=on}}"))
print(convert_field("5.79m"))
print(convert_field("{{convert|4915|mm|abbr=on}}"))
print(convert_field("60ft"))
print(convert_field("LOA is several meters"))
print(convert_field("{{convert|58|ft}}"))
print(convert_field("{{convert|50|feet|abbr=on}}"))
print(convert_field("[[Bruce Farr]]"))
print(convert_field("[[Bruce Farr|BF]]"))
print(convert_field("{{convert|225|lb|kg|0|abbr=on}}"))
print(convert_field("{{convert|180|kg|lb|abbr=on}}"))


('9.15', 'm')
('36', 'ft')
('5.79', 'm')
('4915', 'mm')
('60', 'ft')
('LOA is several meters', '')
('58', 'ft')
('50', 'ft')
('', 'Q709233')
('', 'Q709233')
('225', 'lb')
('180', 'kg')


In [5]:
def extract_template_data(generator, template_name, field_name):
    """
    Takes a Generator, a Template name, a Field name and returns a List

    The function can also take a header (list of strings) that will be
    the headers of the table (Needs to be the same dimension as the table).
    The first column needs to be a link to property (It will be made into
    a link. In this example the second column is a list of links to Q-items.
    """
    template_name = template_name_replace_underscores(template_name)
    template_data = []
    header = ["qid", "url", field_name, "unit"]
    template_data.append(header)
 
    for page in generator:
        try:
            qid = pywikibot.ItemPage.fromPage(page).id
        except:
            print('no qid')
            qid = 'no qid'
        page_url = page.full_url()
        tmpl_list = page.raw_extracted_templates
        for tmpl in tmpl_list:
            if template_name in tmpl:
                try:
                    dico=tmpl[1]
                    (field,unit)=convert_field(dico[field_name])
                    print("qid: %s, url: %s | %s: %s %s" % (qid, page_url, field_name, field, unit))
                    template_data.append([qid, page_url, field, unit])
                except:
                    print('no such field')
                    
    return template_data

In [6]:
import csv

def list2csv(filename, list_name):
    """
    Takes a List and write a csv file
    """
    with open(filename, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerows(list_name)

In [7]:
template_name = "Infobox_sailboat_specifications"
field_name = "ballast"

#template_name = "Infobox sailing yacht"
#field_name = "length"

filename = template_name + "." + field_name + ".csv"
tmpl_gen = list_template_usage(site, template_name)
template_data = extract_template_data(tmpl_gen, template_name, field_name)

Retrieving 50 pages from wikipedia:en.


qid: Q442120, url: https://en.wikipedia.org/wiki/Knarr_%28keelboat%29 | ballast: 2822 lb
no such field
no such field
no such field
no such field
no such field
no such field
no such field
qid: Q5463514, url: https://en.wikipedia.org/wiki/Flying_Scot_%28dinghy%29 | ballast:  
qid: Q970466, url: https://en.wikipedia.org/wiki/Nordic_Folkboat | ballast: 2205 lb
qid: Q930962, url: https://en.wikipedia.org/wiki/Etchells | ballast: 2175 lb
no such field
no such field
no such field
no such field
no such field
qid: Q3183125, url: https://en.wikipedia.org/wiki/Jolie_Brise | ballast:  
qid: Q1684207, url: https://en.wikipedia.org/wiki/Javelin_dinghy | ballast: 49 lb
qid: Q594817, url: https://en.wikipedia.org/wiki/Yngling_%28keelboat%29 | ballast: 683 lb
no such field
qid: Q2277217, url: https://en.wikipedia.org/wiki/Shark_24 | ballast: 675 lb
qid: Q7786478, url: https://en.wikipedia.org/wiki/Thistle_%28dinghy%29 | ballast:  
qid: Q1752674, url: https://en.wikipedia.org/wiki/Albin_Vega | ballast: 

Retrieving 50 pages from wikipedia:en.


no such field
no such field
no such field
qid: Q1639639, url: https://en.wikipedia.org/wiki/SB20 | ballast: 327 kg
qid: Q4982397, url: https://en.wikipedia.org/wiki/Buccaneer_18 | ballast:  
no such field
qid: Q1418841, url: https://en.wikipedia.org/wiki/Fireball_%28dinghy%29 | ballast:  
no such field
qid: Q5151583, url: https://en.wikipedia.org/wiki/Comet_%28dinghy%29 | ballast:  
qid: Q7414553, url: https://en.wikipedia.org/wiki/San_Juan_24 | ballast: 1650 lb
no such field
qid: Q7018767, url: https://en.wikipedia.org/wiki/Newport_30 | ballast: 2500 lb
qid: Q7496198, url: https://en.wikipedia.org/wiki/Shields_%28keelboat%29 | ballast: 3080 lb
qid: Q6188785, url: https://en.wikipedia.org/wiki/Jet_14 | ballast:  
no such field
no such field
qid: Q1253532, url: https://en.wikipedia.org/wiki/Formula_18 | ballast:  
no such field
qid: Q7440009, url: https://en.wikipedia.org/wiki/Sea_Sprite_34 | ballast: 5000 lb
qid: Q14524820, url: https://en.wikipedia.org/wiki/IMOCA_60 | ballast:  
qid: 

Retrieving 50 pages from wikipedia:en.


qid: Q2975943, url: https://en.wikipedia.org/wiki/Mini_Transat_6.50 | ballast:  
qid: Q6715083, url: https://en.wikipedia.org/wiki/MC_Scow | ballast:  
qid: Q5005914, url: https://en.wikipedia.org/wiki/C_Scow | ballast:  
qid: Q6720526, url: https://en.wikipedia.org/wiki/M_Scow | ballast:  
qid: Q5321452, url: https://en.wikipedia.org/wiki/E_Scow | ballast:  
no such field
no such field
qid: Q2165098, url: https://en.wikipedia.org/wiki/Tempest_%28keelboat%29 | ballast: 440 lb
no such field
qid: Q225126, url: https://en.wikipedia.org/wiki/Flying_Fifteen | ballast: 372 lb
qid: Q7686817, url: https://en.wikipedia.org/wiki/Tartan_Ten | ballast: 3340 lb
qid: Q8046193, url: https://en.wikipedia.org/wiki/Y_Flyer | ballast:  
no such field
qid: Q7414554, url: https://en.wikipedia.org/wiki/San_Juan_21 | ballast: 400 lb
no such field
no such field
qid: Q5944238, url: https://en.wikipedia.org/wiki/Hunter_140 | ballast: none 
no such field
qid: Q5944245, url: https://en.wikipedia.org/wiki/Hunter_2

Retrieving 50 pages from wikipedia:en.


qid: Q4673736, url: https://en.wikipedia.org/wiki/Achilles_24 | ballast: 1314 lb
qid: Q7292753, url: https://en.wikipedia.org/wiki/Ranger_29 | ballast: 3130 lb
qid: Q3471881, url: https://en.wikipedia.org/wiki/Sandpiper_565 | ballast: 300 lb
qid: Q5051252, url: https://en.wikipedia.org/wiki/Catalina_22 | ballast: 800 lb
no such field
no such field
qid: Q7420036, url: https://en.wikipedia.org/wiki/Santana_22 | ballast: 1230 lb
qid: Q5051254, url: https://en.wikipedia.org/wiki/Catalina_38 | ballast: 6850 lb
qid: Q85766930, url: https://en.wikipedia.org/wiki/Hobie_33 | ballast: 1900 lb
qid: Q16983431, url: https://en.wikipedia.org/wiki/Pearson_Ensign | ballast: 1200 lb
no such field
qid: Q2856497, url: https://en.wikipedia.org/wiki/Halcyon_23 | ballast: {{convert|1155|lbs|kg}} 
qid: Q8024278, url: https://en.wikipedia.org/wiki/Windmill_%28sailing_dinghy%29 | ballast:  
qid: Q8042778, url: https://en.wikipedia.org/wiki/X_Boat | ballast:  
qid: Q4857124, url: https://en.wikipedia.org/wiki/B

Retrieving 50 pages from wikipedia:en.


no such field
qid: Q106153513, url: https://en.wikipedia.org/wiki/Ericson_29 | ballast: 3900 lb
qid: Q5150437, url: https://en.wikipedia.org/wiki/Com-Pac_16 | ballast: 450 lb
no such field
qid: Q85765857, url: https://en.wikipedia.org/wiki/Hartley_TS16 | ballast:  
qid: Q16964788, url: https://en.wikipedia.org/wiki/Pearson_Vanguard | ballast: 4250 lb
qid: Q1806539, url: https://en.wikipedia.org/wiki/Laser_Vago | ballast:  
no such field
qid: Q7420035, url: https://en.wikipedia.org/wiki/Santana_20 | ballast: 550 lb
no such field
no such field
no such field
no such field
qid: Q5421656, url: https://en.wikipedia.org/wiki/Express_37 | ballast: 4600 lb
qid: Q5289608, url: https://en.wikipedia.org/wiki/Dolphin_24 | ballast: 1650 lb


Retrieving 50 pages from wikipedia:en.


no such field
no such field
no such field
qid: Q6418114, url: https://en.wikipedia.org/wiki/Kite_%28sailboat%29 | ballast:  
no such field
no such field
no such field
qid: Q4940483, url: https://en.wikipedia.org/wiki/Bombardier_Invitation | ballast:  
no such field
no such field
no such field
qid: Q5172154, url: https://en.wikipedia.org/wiki/Coronado_15 | ballast:  
no such field
no such field
no such field
no such field
no such field
qid: Q683513, url: https://en.wikipedia.org/wiki/Mistral_One_Design | ballast:  
qid: Q239383, url: https://en.wikipedia.org/wiki/Windglider | ballast:  
qid: Q4405110, url: https://en.wikipedia.org/wiki/Division_II_%28windsurf_board%29 | ballast:  
qid: Q2376809, url: https://en.wikipedia.org/wiki/Lechner_A-390 | ballast:  
no such field
qid: Q2530562, url: https://en.wikipedia.org/wiki/RS%3AX | ballast:  
qid: Q5421655, url: https://en.wikipedia.org/wiki/Express_34 | ballast: 3700 lb
qid: Q6007963, url: https://en.wikipedia.org/wiki/Impulse_%28dinghy%29

Retrieving 50 pages from wikipedia:en.


qid: Q3978582, url: https://en.wikipedia.org/wiki/Swan_45 | ballast: 3910 kg 
no such field
qid: Q8041416, url: https://en.wikipedia.org/wiki/X-41_%28yacht%29 | ballast: 2730 kg
no such field
no such field
no such field
qid: Q11118594, url: https://en.wikipedia.org/wiki/Ultimate_20 | ballast: 450 lb
no such field
no such field
qid: Q3334777, url: https://en.wikipedia.org/wiki/Nacra_17 | ballast:  
qid: Q6957337, url: https://en.wikipedia.org/wiki/Nacra_F18_Infusion | ballast:  
no such field
no such field
no such field
no such field
qid: Q15616199, url: https://en.wikipedia.org/wiki/Swan_65 | ballast: 10400 kg


Retrieving 50 pages from wikipedia:en.


qid: Q20642510, url: https://en.wikipedia.org/wiki/RS_Aero | ballast: none 
qid: Q18125152, url: https://en.wikipedia.org/wiki/AC50 | ballast: forbidden (solid or liquid) 
qid: Q19577060, url: https://en.wikipedia.org/wiki/Farrier_F-22 | ballast: none 
qid: Q20642622, url: https://en.wikipedia.org/wiki/West_Wight_Potter_15 | ballast: 165 lb
no such field
no such field


Retrieving 50 pages from wikipedia:en.


no such field
no such field
no such field
no such field
qid: Q10599229, url: https://en.wikipedia.org/wiki/Neptunkryssare | ballast: 540 kg
no such field
no such field
no such field
no such field
no such field
no such field
qid: Q795492, url: https://en.wikipedia.org/wiki/BB_10_%28keelboat%29 | ballast: 2700 lb
no such field
no such field
no such field
qid: Q22570313, url: https://en.wikipedia.org/wiki/J%2F70 | ballast: 628 lb
no such field
qid: Q10404349, url: https://en.wikipedia.org/wiki/Albin_Express | ballast: 1764 lb
no such field
no such field
no such field
no such field
qid: Q22570351, url: https://en.wikipedia.org/wiki/Swan_60_OD | ballast: 7700 kg
no such field
no such field
no such field
no such field
no such field
no such field
qid: Q3272530, url: https://en.wikipedia.org/wiki/M34_%28keelboat%29 | ballast: 2315 lb
no such field
no such field
no such field
qid: Q24964515, url: https://en.wikipedia.org/wiki/Swan_37 | ballast: 7300 lb
qid: Q24883742, url: https://en.wikipedia.

Retrieving 50 pages from wikipedia:en.


no such field
no such field
no such field
no such field
no such field
no such field
qid: Q28162923, url: https://en.wikipedia.org/wiki/Swan_36_Frers | ballast: 2540 kg
qid: Q28162924, url: https://en.wikipedia.org/wiki/Swan_44_Frers | ballast: 3500 kg
qid: Q28233870, url: https://en.wikipedia.org/wiki/Swan_40 | ballast: 3600 kg
qid: Q28233874, url: https://en.wikipedia.org/wiki/Swan_38_%28yacht%29 | ballast: 3200 kg
qid: Q28162951, url: https://en.wikipedia.org/wiki/ClubSwan_42 | ballast: 3135 kg
qid: Q28233879, url: https://en.wikipedia.org/wiki/Swan_43 | ballast: 4700 kg
qid: Q28182562, url: https://en.wikipedia.org/wiki/Swan_391 | ballast: 3084 kg
qid: Q28233883, url: https://en.wikipedia.org/wiki/Swan_55 | ballast: 7700 kg
qid: Q28233887, url: https://en.wikipedia.org/wiki/Swan_411 | ballast: 5200 kg
qid: Q28162925, url: https://en.wikipedia.org/wiki/Swan_46_Mk_III | ballast: 5500 kg
qid: Q28162926, url: https://en.wikipedia.org/wiki/Swan_53_Mk_II | ballast: 8200 kg
qid: Q28162927,

Retrieving 50 pages from wikipedia:en.


qid: Q28162936, url: https://en.wikipedia.org/wiki/Swan_61 | ballast: 9200 kg
qid: Q28162937, url: https://en.wikipedia.org/wiki/Swan_651 | ballast: 14400 kg
qid: Q28162938, url: https://en.wikipedia.org/wiki/Swan_112_RS | ballast: 28000 kg
qid: Q28162939, url: https://en.wikipedia.org/wiki/Swan_66 | ballast: 9400 kg
qid: Q28162941, url: https://en.wikipedia.org/wiki/Swan_100 | ballast: 29000 kg
qid: Q28182636, url: https://en.wikipedia.org/wiki/Nautor_105 | ballast:  
qid: Q28233931, url: https://en.wikipedia.org/wiki/Nautor_43 | ballast: 4100 kg
qid: Q28162942, url: https://en.wikipedia.org/wiki/Swan_54 | ballast: 8200 kg
qid: Q28162943, url: https://en.wikipedia.org/wiki/Swan_131 | ballast: 36000 kg
qid: Q28162945, url: https://en.wikipedia.org/wiki/Swan_115 | ballast: 32000 kg
qid: Q28162946, url: https://en.wikipedia.org/wiki/Swan_80_Mk_II | ballast:  
qid: Q28162947, url: https://en.wikipedia.org/wiki/Swan_77 | ballast: 18000 kg 
no such field
no such field
qid: Q28446349, url: h

Retrieving 50 pages from wikipedia:en.


qid: Q28448309, url: https://en.wikipedia.org/wiki/Sonata_26 | ballast: 500 lb
qid: Q28448316, url: https://en.wikipedia.org/wiki/Sonata_6.7 | ballast: 952 lb
qid: Q28448315, url: https://en.wikipedia.org/wiki/Sonata_8 | ballast: 500 lb
qid: Q3539702, url: https://en.wikipedia.org/wiki/Triton_25 | ballast: 1250 lb
qid: Q28448320, url: https://en.wikipedia.org/wiki/Wilderness_38 | ballast: 4500 lb
qid: Q28448322, url: https://en.wikipedia.org/wiki/Wilderness_40 | ballast: 3969 lb
qid: Q30597885, url: https://en.wikipedia.org/wiki/Tally_Ho_%28yacht%29 | ballast:  
qid: Q28448380, url: https://en.wikipedia.org/wiki/Prospect_900 | ballast: 3086 lb
qid: Q56273948, url: https://en.wikipedia.org/wiki/Nonsuch_26 | ballast: 2750 lb
qid: Q28448394, url: https://en.wikipedia.org/wiki/Catalina_270 | ballast: 2000 lb
qid: Q28448411, url: https://en.wikipedia.org/wiki/Catalina_34 | ballast: 5000 lb
qid: Q28448551, url: https://en.wikipedia.org/wiki/Catalina_309 | ballast: 4000 lb
qid: Q28448557, url

Retrieving 50 pages from wikipedia:en.


qid: Q29934652, url: https://en.wikipedia.org/wiki/CS_50 | ballast: 12125 lb
qid: Q30596114, url: https://en.wikipedia.org/wiki/CS_34 | ballast:  
qid: Q30596138, url: https://en.wikipedia.org/wiki/MG_335 | ballast: 3800 lb
qid: Q30632693, url: https://en.wikipedia.org/wiki/Mirage_24 | ballast: 1500 lb
qid: Q30632698, url: https://en.wikipedia.org/wiki/Northern_1%2F4_Ton | ballast: 1450 lb
qid: Q30632706, url: https://en.wikipedia.org/wiki/Mirage_26 | ballast: 2050 lb
qid: Q30633284, url: https://en.wikipedia.org/wiki/Kirby_30 | ballast: 2300 lb
qid: Q30633521, url: https://en.wikipedia.org/wiki/Mirage_33 | ballast: 3500 lb
qid: Q30633523, url: https://en.wikipedia.org/wiki/Mirage_35 | ballast: 3500 lb
qid: Q30634234, url: https://en.wikipedia.org/wiki/Mirage_30_SX | ballast:  
qid: Q30635239, url: https://en.wikipedia.org/wiki/Mirage_29 | ballast: 2300 lb
qid: Q30635377, url: https://en.wikipedia.org/wiki/Thames_Marine_Mirage_29 | ballast:  
qid: Q30636403, url: https://en.wikipedia.o

Retrieving 50 pages from wikipedia:en.


qid: Q39053664, url: https://en.wikipedia.org/wiki/Hunter_34 | ballast: 5000 lb
qid: Q39054375, url: https://en.wikipedia.org/wiki/Hunter_36-2 | ballast: 5023 lb
qid: Q39055549, url: https://en.wikipedia.org/wiki/Hunter_410 | ballast: 7400 lb
qid: Q39057163, url: https://en.wikipedia.org/wiki/Hunter_25 | ballast: 1800 lb
qid: Q38619188, url: https://en.wikipedia.org/wiki/C%26C_37%2F40 | ballast: 6750 lb
qid: Q39058630, url: https://en.wikipedia.org/wiki/Paceship_PY_23 | ballast: 945 lb
qid: Q39060595, url: https://en.wikipedia.org/wiki/Hobie_Getaway | ballast: none 
qid: Q39061055, url: https://en.wikipedia.org/wiki/Chance_32%2F28 | ballast: 4680 lb
qid: Q39061162, url: https://en.wikipedia.org/wiki/Bluejacket_23 | ballast: 900 lb
qid: Q39075976, url: https://en.wikipedia.org/wiki/M32_%28catamaran%29 | ballast:  
qid: Q42897027, url: https://en.wikipedia.org/wiki/Corvette_31 | ballast: 4000 lb
qid: Q41065312, url: https://en.wikipedia.org/wiki/Catalina_16.5 | ballast:  
qid: Q42897029,

Retrieving 50 pages from wikipedia:en.


qid: Q48854854, url: https://en.wikipedia.org/wiki/Cal_3-27 | ballast: 2000 lb
qid: Q48855209, url: https://en.wikipedia.org/wiki/Crown_28 | ballast: 2900 lb
qid: Q48861666, url: https://en.wikipedia.org/wiki/Cal_20 | ballast: 900 lb
no such field
no such field
no such field
qid: Q48862688, url: https://en.wikipedia.org/wiki/Cal_28 | ballast: 2200 lb
qid: Q55071956, url: https://en.wikipedia.org/wiki/Cal_29 | ballast: 3350 lb
qid: Q55071957, url: https://en.wikipedia.org/wiki/Crown_23 | ballast: 1550 lb
qid: Q55071958, url: https://en.wikipedia.org/wiki/Crown_34 | ballast: 4800 lb
qid: Q55071960, url: https://en.wikipedia.org/wiki/San_Juan_34 | ballast: 4800 lb
qid: Q55071961, url: https://en.wikipedia.org/wiki/C%26C_32 | ballast: 3900 lb
qid: Q55071962, url: https://en.wikipedia.org/wiki/C%26C_26 | ballast: 2040 lb
qid: Q55071963, url: https://en.wikipedia.org/wiki/C%26C_26_Wave | ballast: 1700 lb
qid: Q55071964, url: https://en.wikipedia.org/wiki/C%26C_34 | ballast: 4100 lb
qid: Q550

Retrieving 50 pages from wikipedia:en.


qid: Q55614076, url: https://en.wikipedia.org/wiki/Hunter_33-2 | ballast: 3578 lb
no such field
qid: Q56274383, url: https://en.wikipedia.org/wiki/Bayfield_30%2F32 | ballast: 4000 lb
qid: Q56274616, url: https://en.wikipedia.org/wiki/Express_35 | ballast: 5300 lb
qid: Q56274632, url: https://en.wikipedia.org/wiki/Ontario_32 | ballast: 3977 lb
qid: Q56274712, url: https://en.wikipedia.org/wiki/Starwind_223 | ballast: 700 lb
qid: Q56274721, url: https://en.wikipedia.org/wiki/Alberg_22 | ballast: 1540 lb
qid: Q56274726, url: https://en.wikipedia.org/wiki/F-31_Sport_Cruiser | ballast:  
qid: Q56275152, url: https://en.wikipedia.org/wiki/Nonsuch_22 | ballast: 1800 lb
qid: Q56276824, url: https://en.wikipedia.org/wiki/Nonsuch_30 | ballast: 4500 lb
qid: Q56277720, url: https://en.wikipedia.org/wiki/Nonsuch_324 | ballast: 4240 lb
qid: Q56278079, url: https://en.wikipedia.org/wiki/Nonsuch_33 | ballast: 6050 lb
qid: Q56278084, url: https://en.wikipedia.org/wiki/Nonsuch_36 | ballast: 6500 lb
qid:

Retrieving 50 pages from wikipedia:en.


qid: Q60746830, url: https://en.wikipedia.org/wiki/C%26C_SR_33 | ballast:  
qid: Q60747190, url: https://en.wikipedia.org/wiki/Geary_18 | ballast:  
qid: Q60747909, url: https://en.wikipedia.org/wiki/MacGregor_24 | ballast: 575 lb
qid: Q60748352, url: https://en.wikipedia.org/wiki/Spindrift_22 | ballast: 600 lb
qid: Q60748357, url: https://en.wikipedia.org/wiki/Catalina_18 | ballast: 425 lb
qid: Q60521237, url: https://en.wikipedia.org/wiki/Hunter_26.5 | ballast: 1800 lb
qid: Q60521239, url: https://en.wikipedia.org/wiki/Hunter_27-2 | ballast: 2000 lb
qid: Q60521240, url: https://en.wikipedia.org/wiki/Hunter_27-3 | ballast: 3400 lb
qid: Q60521238, url: https://en.wikipedia.org/wiki/Hunter_27_Edge | ballast: 1600 lb
qid: Q60521248, url: https://en.wikipedia.org/wiki/Hunter_30 | ballast: 4100 lb
qid: Q60521249, url: https://en.wikipedia.org/wiki/Hunter_30T | ballast: 3800 lb
qid: Q60521222, url: https://en.wikipedia.org/wiki/Hunter_146 | ballast: none 
qid: Q60521223, url: https://en.wik

Retrieving 50 pages from wikipedia:en.


qid: Q60521268, url: https://en.wikipedia.org/wiki/Hunter_380 | ballast: 5900 lb
qid: Q60521266, url: https://en.wikipedia.org/wiki/Hunter_38 | ballast: 6130 lb
qid: Q60521269, url: https://en.wikipedia.org/wiki/Hunter_39 | ballast: 5603 lb
qid: Q60521270, url: https://en.wikipedia.org/wiki/Hunter_40 | ballast: 7900 lb
qid: Q60761885, url: https://en.wikipedia.org/wiki/Marlow-Hunter_40 | ballast: 6027 lb
qid: Q60521272, url: https://en.wikipedia.org/wiki/Hunter_40.5 | ballast: 7000 lb
qid: Q60521273, url: https://en.wikipedia.org/wiki/Hunter_41 | ballast: 6506 lb
qid: Q60762407, url: https://en.wikipedia.org/wiki/Hunter_Passage_42 | ballast: 7700 lb
qid: Q60521274, url: https://en.wikipedia.org/wiki/Hunter_420 | ballast: 6700 lb
qid: Q60521275, url: https://en.wikipedia.org/wiki/Hunter_426 | ballast: 7237 lb
qid: Q60521277, url: https://en.wikipedia.org/wiki/Hunter_43_Legend | ballast: 7600 lb
qid: Q60766059, url: https://en.wikipedia.org/wiki/Hunter_430 | ballast: 7600 lb
qid: Q607666

Retrieving 50 pages from wikipedia:en.


qid: Q65056792, url: https://en.wikipedia.org/wiki/Alberg_29 | ballast: 4000 lb
qid: Q65057065, url: https://en.wikipedia.org/wiki/Annie_30 | ballast:  
qid: Q65057554, url: https://en.wikipedia.org/wiki/Leigh_30 | ballast: 4400 lb
qid: Q65057956, url: https://en.wikipedia.org/wiki/Bahama_30 | ballast: 3130 lb
qid: Q65066039, url: https://en.wikipedia.org/wiki/Bristol_29.9 | ballast: 3600 lb
qid: Q65066674, url: https://en.wikipedia.org/wiki/O%27Day_30 | ballast: 4000 lb
qid: Q65070175, url: https://en.wikipedia.org/wiki/S2_9.2 | ballast: 4000 lb
qid: Q65072746, url: https://en.wikipedia.org/wiki/Santana_30%2F30 | ballast: 2310 lb
qid: Q65073093, url: https://en.wikipedia.org/wiki/Cal_9.2 | ballast: 2730 lb
qid: Q65040892, url: https://en.wikipedia.org/wiki/Seafarer_30 | ballast: 3450 lb
qid: Q65042277, url: https://en.wikipedia.org/wiki/Edel_820 | ballast: 2646 lb
qid: Q65043774, url: https://en.wikipedia.org/wiki/Island_Packet_27 | ballast: 3000 lb
qid: Q65049058, url: https://en.wik

Retrieving 50 pages from wikipedia:en.


qid: Q85744196, url: https://en.wikipedia.org/wiki/Austral_20_%28trailer_sailer%29 | ballast: 950 kg
qid: Q68951111, url: https://en.wikipedia.org/wiki/Cornish_Shrimper_19 | ballast:  
qid: Q69850721, url: https://en.wikipedia.org/wiki/Sirocco_15 | ballast: 75 lb
qid: Q85814655, url: https://en.wikipedia.org/wiki/Watkins_32 | ballast: 5500 lb
qid: Q85814656, url: https://en.wikipedia.org/wiki/Watkins_33 | ballast: 5500 lb
qid: Q85755722, url: https://en.wikipedia.org/wiki/Dark_Harbor_17_1%2F2 | ballast:  
qid: Q85753417, url: https://en.wikipedia.org/wiki/Columbia_32 | ballast: 4050 lb
qid: Q85814647, url: https://en.wikipedia.org/wiki/Watkins_23 | ballast: 900 lb
qid: Q70924160, url: https://en.wikipedia.org/wiki/Drascombe_Lugger | ballast: 100 lb
qid: Q73117433, url: https://en.wikipedia.org/wiki/Gulfstar_43 | ballast: 5000 lb
qid: Q85814644, url: https://en.wikipedia.org/wiki/Watkins_17 | ballast: 90 lb
qid: Q85814650, url: https://en.wikipedia.org/wiki/Watkins_27 | ballast: 3500 lb

Retrieving 50 pages from wikipedia:en.


qid: Q85763810, url: https://en.wikipedia.org/wiki/Goderich_35 | ballast: 6200 lb
no such field
qid: Q85793336, url: https://en.wikipedia.org/wiki/Pilot_35 | ballast: 4600 lb
qid: Q85798304, url: https://en.wikipedia.org/wiki/S2_11.0 | ballast: 6000 lb
qid: Q85812747, url: https://en.wikipedia.org/wiki/Vancouver_36_%28Harris%29 | ballast: 8000 lb
qid: Q85766638, url: https://en.wikipedia.org/wiki/Hinterhoeller_F3 | ballast: 5730 lb
qid: Q85800238, url: https://en.wikipedia.org/wiki/Seidelmann_37 | ballast: 5900 lb
qid: Q85754486, url: https://en.wikipedia.org/wiki/Crealock_37 | ballast: 6200 lb
no such field
qid: Q85740467, url: https://en.wikipedia.org/wiki/Alajuela_33 | ballast: 4700 lb
qid: Q85745098, url: https://en.wikipedia.org/wiki/Baltic_37 | ballast: 6120 lb
qid: Q85756826, url: https://en.wikipedia.org/wiki/Dickerson_37 | ballast: 5850 lb
no such field
qid: Q85757253, url: https://en.wikipedia.org/wiki/Dockrell_37 | ballast:  
qid: Q85740553, url: https://en.wikipedia.org/wik

Retrieving 50 pages from wikipedia:en.


qid: Q96382893, url: https://en.wikipedia.org/wiki/Irwin_41_Citation | ballast: 9500 lb
qid: Q96382889, url: https://en.wikipedia.org/wiki/Irwin_27 | ballast:  
qid: Q94158092, url: https://en.wikipedia.org/wiki/Tayana_37 | ballast: 8000 lb
qid: Q3071605, url: https://en.wikipedia.org/wiki/B%C3%A9n%C3%A9teau_Figaro | ballast:  
qid: Q96374279, url: https://en.wikipedia.org/wiki/Caliber_40 | ballast: 9500 lb
qid: Q94574694, url: https://en.wikipedia.org/wiki/Nauticat_44 | ballast:  
qid: Q96415561, url: https://en.wikipedia.org/wiki/Worldcruiser_44 | ballast: 8500 lb
qid: Q96371711, url: https://en.wikipedia.org/wiki/Alden_44 | ballast: 10000 lb
qid: Q96374857, url: https://en.wikipedia.org/wiki/Cherry_16 | ballast: 41 kg
qid: Q96373287, url: https://en.wikipedia.org/wiki/Bayfield_40 | ballast: 8200 lb
qid: Q96373286, url: https://en.wikipedia.org/wiki/Bayfield_36 | ballast: 6500 lb
qid: Q96373285, url: https://en.wikipedia.org/wiki/Bayfield_29 | ballast: 3000 lb
qid: Q96374389, url: ht

Retrieving 50 pages from wikipedia:en.


qid: Q98139858, url: https://en.wikipedia.org/wiki/Skipjack_15 | ballast:  
qid: Q104846768, url: https://en.wikipedia.org/wiki/Drascombe_Scaffie | ballast:  
qid: Q104847387, url: https://en.wikipedia.org/wiki/Designers_Choice | ballast:  
qid: Q104850551, url: https://en.wikipedia.org/wiki/US1 | ballast:  
qid: Q98541075, url: https://en.wikipedia.org/wiki/IQFoil | ballast:  
qid: Q98690649, url: https://en.wikipedia.org/wiki/Herreshoff_Bull%27s_Eye | ballast: 750 lb
qid: Q98642753, url: https://en.wikipedia.org/wiki/AMF_Apollo_16 | ballast:  
qid: Q98690544, url: https://en.wikipedia.org/wiki/Cygnus_20 | ballast: 140 lb
qid: Q104853563, url: https://en.wikipedia.org/wiki/Balboa_16 | ballast: 400 lb
qid: Q104854307, url: https://en.wikipedia.org/wiki/Laguna_16 | ballast: 400 lb
qid: Q104855089, url: https://en.wikipedia.org/wiki/Isotope_%28catamaran%29 | ballast:  
qid: Q104855244, url: https://en.wikipedia.org/wiki/Leeward_16 | ballast:  
qid: Q99130329, url: https://en.wikipedia.or

Retrieving 50 pages from wikipedia:en.


qid: Q104145305, url: https://en.wikipedia.org/wiki/Scampi_30 | ballast: 2778 lb
qid: Q104144429, url: https://en.wikipedia.org/wiki/Alerion_Express_19 | ballast: 300 lb
qid: Q104880418, url: https://en.wikipedia.org/wiki/Singoalla_34 | ballast: 3750 lb
qid: Q104188765, url: https://en.wikipedia.org/wiki/Viggen_23 | ballast: 1323 lb
qid: Q10667096, url: https://en.wikipedia.org/wiki/Shipman_28 | ballast: 2667 lb
qid: Q104880824, url: https://en.wikipedia.org/wiki/Albin_7.8 | ballast: 2094 lb
qid: Q104880832, url: https://en.wikipedia.org/wiki/Alpha_29 | ballast: 3042 lb
qid: Q104880847, url: https://en.wikipedia.org/wiki/Delta_31 | ballast: 3505 lb
qid: Q104881060, url: https://en.wikipedia.org/wiki/Allegra_24 | ballast: 2200 lb
qid: Q104881584, url: https://en.wikipedia.org/wiki/Greenwich_24 | ballast: 1500 lb
qid: Q104881672, url: https://en.wikipedia.org/wiki/Alegria_67 | ballast: none 
qid: Q104881845, url: https://en.wikipedia.org/wiki/AMF_2100 | ballast: 850 lb
qid: Q104290513, u

Retrieving 50 pages from wikipedia:en.


qid: Q105360264, url: https://en.wikipedia.org/wiki/Cal_21 | ballast: 360 lb
qid: Q105393819, url: https://en.wikipedia.org/wiki/Cal_22 | ballast: 775 lb
qid: Q105082255, url: https://en.wikipedia.org/wiki/Cal_24 | ballast: 1000 lb
qid: Q105438874, url: https://en.wikipedia.org/wiki/Cal_2-24 | ballast: 1400 lb
qid: Q105453339, url: https://en.wikipedia.org/wiki/Cal_3-24 | ballast: 1175 lb
qid: Q105474389, url: https://en.wikipedia.org/wiki/Cal_T%2F4 | ballast: 2000 lb
qid: Q105561953, url: https://en.wikipedia.org/wiki/Com-Pac_Legacy | ballast: 400 lb
qid: Q105580848, url: https://en.wikipedia.org/wiki/Marlin_23 | ballast: 1400 lb
qid: Q105333135, url: https://en.wikipedia.org/wiki/Cape_Dory_25 | ballast: 1700 lb
qid: Q105356576, url: https://en.wikipedia.org/wiki/Cape_Dory_25D | ballast: 2050 lb
qid: Q105626394, url: https://en.wikipedia.org/wiki/Typhoon_Senior | ballast: 1700 lb
qid: Q105638481, url: https://en.wikipedia.org/wiki/Capri_16 | ballast: 425 lb
qid: Q104233882, url: https

Retrieving 37 pages from wikipedia:en.


qid: Q106436039, url: https://en.wikipedia.org/wiki/ETAP_28i | ballast: 1808 lb
qid: Q106436037, url: https://en.wikipedia.org/wiki/ETAP_30 | ballast: 3032 lb
qid: Q106436046, url: https://en.wikipedia.org/wiki/ETAP_30i | ballast: 2425 lb
qid: Q106436042, url: https://en.wikipedia.org/wiki/ETAP_32i | ballast: 2866 lb
qid: Q106436048, url: https://en.wikipedia.org/wiki/ETAP_39s | ballast: 4960 lb
qid: Q106436031, url: https://en.wikipedia.org/wiki/ETAP_22 | ballast: 1213 lb
qid: Q106522259, url: https://en.wikipedia.org/wiki/ETAP_28 | ballast: 2557 lb
qid: Q106436035, url: https://en.wikipedia.org/wiki/ETAP_23i | ballast: 1036 lb
qid: Q106436036, url: https://en.wikipedia.org/wiki/ETAP_26 | ballast: 1568 lb
qid: Q106436041, url: https://en.wikipedia.org/wiki/ETAP_38i | ballast: 4850 lb
qid: Q106436043, url: https://en.wikipedia.org/wiki/ETAP_35i | ballast: 3836 lb
qid: Q106436047, url: https://en.wikipedia.org/wiki/ETAP_21i | ballast: 772 lb
qid: Q106436045, url: https://en.wikipedia.or

In [8]:
list2csv(filename, template_data)

# Push to Wikidata

In [9]:
#https://www.wikidata.org/wiki/Wikidata:Pywikibot_-_Python_3_Tutorial/Setting_statements
#Setting statements
# -*- coding: utf-8  -*-
import pywikibot
"""
First check if P2373 already exists for Q15935. If not add claim P2373 with the string 'Kanye_west'.
This works for claims that only allow a single value.
"""
site_data = pywikibot.Site("wikidata", "wikidata")
repo_data = site_data.data_repository()
item_data = pywikibot.ItemPage(repo_data, "Q97179551")
claims = item_data.get('claims') #Get all the existing claims
item_data.get() #Fetch all page data, and cache it.

In [10]:
claims['claims']

<class 'pywikibot.page._collections.ClaimCollection'>({'P2671': [Claim.fromJSON(DataSite("wikidata", "wikidata"), {'mainsnak': {'snaktype': 'value', 'property': 'P2671', 'datatype': 'external-id', 'datavalue': {'value': '/g/11jbgj2p09', 'type': 'string'}}, 'type': 'statement', 'id': 'Q97179551$79DCF988-9F42-447C-9D9D-2F95BEB298AF', 'rank': 'normal'})], 'P31': [Claim.fromJSON(DataSite("wikidata", "wikidata"), {'mainsnak': {'snaktype': 'value', 'property': 'P31', 'datatype': 'wikibase-item', 'datavalue': {'value': {'entity-type': 'item', 'numeric-id': 106179098}, 'type': 'wikibase-entityid'}}, 'type': 'statement', 'id': 'Q97179551$11de689a-4d8e-9ea0-ff6b-2dc203fe3e21', 'rank': 'normal', 'references': [{'snaks': {'P854': [{'snaktype': 'value', 'property': 'P854', 'datatype': 'url', 'datavalue': {'value': 'https://jboats.com/j92s-tech-specs', 'type': 'string'}}]}, 'snaks-order': ['P854'], 'hash': '4a60fed570bf741fc8e5ea00015e6c537aab0773'}]})], 'P2043': [Claim.fromJSON(DataSite("wikidata",

In [11]:
statement_property = 'P2067'
statement_value = 2822
statement_unit = 'lb'


if statement_property in claims[u'claims']: #if a value is present already, print error.
    pywikibot.output(u'Error: statement already exists!')
else:
    stringclaim = pywikibot.Claim(repo_data, statement_property) #Else, add the value
    stringclaim.setTarget(statement_value)
    item_data.addClaim(stringclaim, summary=u'adding statement via bot')

Error: statement already exists!


In [14]:
claims['P2067']

KeyError: 'P2067'

In [78]:
# -*- coding: utf-8  -*-
import pywikibot
"""
Adding a qualifier to existing claims/statements
"""
site_data = pywikibot.Site("wikidata", "wikidata")
repo_data = site_data.data_repository()
item_data = pywikibot.ItemPage(repo_data, "Q97179551")
statement_property = 'P2067' # 'P2067' mass
statement_qualifier = 'P642' # 'P642' of
statement_qualifier_target = 'Q5636358' # 'Q5636358' displacement

statement_value = 2822
statement_unit = 'lb'


item_data.get() #Fetch all page data, and cache it.



for claim in item_data.claims[statement_property]: #Finds all statements with property (P)
    if statement_qualifier not in claim.qualifiers: #If not already exist
        print('statement_qualifier not found')
    else:
        for values in claim.qualifiers[statement_qualifier]:
            if statement_qualifier_target == values.target.id:
                print('statement_qualifier_target already defined')
            else:
                print('statement_qualifier_target not found')

        # Generate QUALIFIER FOR EXISTENCE STATEMENTS/CLAIMS
        #qualifier = pywikibot.Claim(repo, u'P642')
        #target = pywikibot.ItemPage(repo, "Q35409")
        #qualifier.setTarget(target)

        #claim.addQualifier(qualifier, summary=u'Adding a qualifier.') #Adding qualifier to all statements (P131)

AttributeError: 'list' object has no attribute 'target'

In [80]:
item_data.claims['P2067'][0].qualifiers['P642'][0].target

ItemPage('Q5636358')

In [89]:
import pprint

def prettyPrint(variable):
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(variable)

In [90]:
prettyPrint(item_data.claims['P2067'])

[   Claim.fromJSON(DataSite("wikidata", "wikidata"), {'mainsnak': {'snaktype': 'value', 'property': 'P2067', 'datatype': 'quantity', 'datavalue': {'value': {'amount': '+2550', 'upperBound': None, 'lowerBound': None, 'unit': 'http://www.wikidata.org/entity/Q11570'}, 'type': 'quantity'}}, 'type': 'statement', 'id': 'Q97179551$2cb0f681-4bcd-ede2-a147-7aa352b7ca64', 'rank': 'normal', 'qualifiers': {'P642': [{'snaktype': 'value', 'property': 'P642', 'datatype': 'wikibase-item', 'datavalue': {'value': {'entity-type': 'item', 'numeric-id': 5636358}, 'type': 'wikibase-entityid'}, 'hash': '9b35e18a0b409cbb243dcafd61faca309a7677bc'}]}, 'qualifiers-order': ['P642'], 'references': [{'snaks': {'P854': [{'snaktype': 'value', 'property': 'P854', 'datatype': 'url', 'datavalue': {'value': 'https://jboats.com/j92s-tech-specs', 'type': 'string'}}], 'P813': [{'snaktype': 'value', 'property': 'P813', 'datatype': 'time', 'datavalue': {'value': {'time': '+00000002021-03-30T00:00:00Z', 'precision': 11, 'after

# Alternative

In [None]:
def strip_dict_entries(dict_in):
    """Strip whitespace from all keys and (string) values in a dictionary."""
    dict_out = dict()
    if not isinstance(dict_in, dict):
        raise MyError('strip_dict_entries() expects a dictionary object'
                      'as input but found "%s"' % type(dict_in).__name__)
    for k, v in dict_in.items():
        if isinstance(v,str):
            v = v.strip()
        dict_out[k.strip()] = v
    return dict_out

In [None]:
def extract_template_data2(generator, template_name, field_name, header=None):
    """
    Takes a Generator, a Template name, a Field name and returns a List

    The function can also take a header (list of strings) that will be
    the headers of the table (Needs to be the same dimension as the table).
    The first column needs to be a link to property (It will be made into
    a link. In this example the second column is a list of links to Q-items.
    """
    template_data = []
    result = []

    if header != None:
        template_data.append(header)

    for page in generator:                    
        templates = pywikibot.textlib.extract_templates_and_params(page.text)
        for tp in templates:
            if tp[0] == template_name:
                #print(tp[1])
                result.append(strip_dict_entries(tp[1]))
            
    return result

In [None]:
result

In [None]:
def get_all_template_entries(wikitext, template_name):
    """Return a list of all arguments for instances of a given template."""
    templates = pywikibot.textlib.extract_templates_and_params(wikitext)
    result = []
    for tp in templates:
        if tp[0] == template_name:
            print(tp[1])
            result.append(strip_dict_entries(tp[1]))
    return result

In [None]:
toto = get_all_template_entries(page.text, template_name)
toto

In [None]:
import mwparserfromhell
import pywikibot

def parse(title):
    site = pywikibot.Site()
    page = pywikibot.Page(site, title)
    text = page.get()
    return mwparserfromhell.parse(text)



In [None]:
for page in tmpl_gen:
    item = pywikibot.ItemPage.fromPage(page)
    print(page.title(), item.getID())
    print(page)

In [None]:
type(page)

In [None]:
templates = page.templatesWithParams()
(template_page,item)=templates[0]
print(type(item))
item

In [None]:
templates_raw = page.raw_extracted_templates
(template_0, item_0)=templates_raw[0]
print(type(item_0))
item_0

In [None]:
item_0["loa"]

In [None]:
for (template, fielddict) in templates:
            # Clean up template
            try:
                template = pywikibot.Page(page.site, template,
                                          ns=10).title(with_ns=False)
            except pywikibot.exceptions.InvalidTitle:
                pywikibot.error(
                    "Failed parsing template; '{}' should be "
                    'the template name.'.format(template))
                continue
            # We found the template we were looking for
            for field, value in fielddict.items():
                field = field.strip()

In [None]:
for (template, fielddict) in templates:
    print(template)
    print(fielddict)

In [None]:
fielddict

In [None]:
(template, fielddict)=templates[0]

In [None]:
fielddict

In [None]:
(field, value) = fielddict.items(2)

In [None]:
import pywikibot
from pywikibot import pagegenerators as pg

def list_template_usage(site_obj, template_name):
    """
    Takes Site object and template name and returns a generator.

    The function expects a Site object (pywikibot.Site()) and
    a template name (String). It creates a list of all
    pages using that template and returns them as a generator.
    It only returns pages in the 121-namespace (property-talk-pages).
    The generator will load 50 pages at a time for iteration.
    """
    name = "{}:{}".format(site.namespace(10), template_name)
    tmpl_page = pywikibot.Page(site, name)
    ref_gen = pg.ReferringPageGenerator(tmpl_page, onlyTemplateInclusion=True)
    filter_gen = pg.NamespaceFilterPageGenerator(ref_gen, namespaces=[10])
    generator = site.preloadpages(filter_gen, pageprops=True)
    return generator

In [None]:
site = pywikibot.Site("en", 'wikipedia')
tmpl_gen = list_template_usage(site, "Infobox_sailboat_specifications")

# code for single page

In [21]:
import pywikibot

#template_name = "Infobox_sailboat_specifications"
#field_name = "loa"
template_name = "Infobox sailing yacht"
field_name = "designer"
filename = template_name + "." + field_name + ".csv"

site = pywikibot.Site("en", "wikipedia")
#page = pywikibot.Page(site, "ETAP_32s")
#page = pywikibot.Page(site, "Finn (dinghy)")
#page = pywikibot.Page(site, "49er (dinghy)")
#page = pywikibot.Page(site, "Squib (keelboat)")
#page = pywikibot.Page(site, "Hobie 16")
page = pywikibot.Page(site, "Maiden_%28yacht%29")
page_url = page.full_url()
qid = pywikibot.ItemPage.fromPage(page).id
print(qid)

Q60755728


In [22]:
tmpl_list = page.raw_extracted_templates
template_name = template_name_replace_underscores(template_name)
for tmpl in tmpl_list:
    if template_name in tmpl:
        try:
            dico=tmpl[1]
            field=dico[field_name]
            print("qid: %s, page: %s | %s: %s" % (qid, page_url, field_name, field))
            template_data.append([qid, page_url, field])
        except:
            print('no such field')

qid: Q60755728, page: https://en.wikipedia.org/wiki/Maiden_%28yacht%29 | designer: [[Bruce Farr]]


In [16]:
print(convert_field("{{convert|58|ft}}"))

('{{convert|58|ft}}', '')


In [23]:
field

'[[Bruce Farr]]'

In [24]:
type(field)

str

In [28]:
page = pywikibot.Page(site, field)
item = pywikibot.ItemPage.fromPage(page)

print(item)

InvalidTitle: '[[Bruce Farr]]' contains illegal char(s) '['