-
Notifications
You must be signed in to change notification settings - Fork 9
/
wikidata_enwiki_import_id_bot.py
93 lines (85 loc) · 2.52 KB
/
wikidata_enwiki_import_id_bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Import IDs from enwp
# 18 Jan 2022 Mike Peel Started
import pywikibot
from pywikibot import pagegenerators
lang = 'en'
wiki = pywikibot.Site(lang, 'wikipedia')
repo = wiki.data_repository()
debug = True
whitelist = ['Category:Australian Statistical Geography Standard 2011 ID not in Wikidata']
def editwikidata(wd_item, propertyid, value):
qid = wd_item.title()
print('http://www.wikidata.org/wiki/'+qid)
print(propertyid + ' = ' + value)
item_dict = wd_item.get()
newclaim = pywikibot.Claim(repo, propertyid)
newclaim.setTarget(value)
# print(newclaim)
text = input("Save? ")
if text == 'y':
wd_item.addClaim(newclaim, summary=u'Importing ' + str(propertyid) + ' from enwiki')
return 0
cat = pywikibot.Category(wiki, 'Category:Wikipedia categories tracking data not in Wikidata')
for subcat in pagegenerators.SubCategoriesPageGenerator(cat, recurse=False):
runthis = False
for test in whitelist:
if test in subcat.title():
runthis = True
if not runthis:
continue
if debug:
print('# ' + str(subcat.title()))
propid = ''
templatename = ''
for template in subcat.templatesWithParams():
if 'Wikidata tracking category' in template[0].title():
if debug:
print('# ' + str(template))
for val in template[1]:
if 'property' in val:
propid = val.split('=')[1].strip()
if 'template' in val:
templatename = val.split('=')[1].strip()
if debug:
print('#' + propid)
print('#' + templatename)
# If we haven't got a propid or template, then skip this category
if propid == '' or template == '':
continue
for page in pagegenerators.CategorizedPageGenerator(subcat, recurse=False):
if debug:
print('# ' + str(page))
localid = ''
for template in page.templatesWithParams():
# if debug:
# print(template[0].title())
if templatename in template[0].title():
for val in template[1]:
# if debug:
# print(val)
if '=' not in val and localid == '':
localid = val
if 'id=' in val and localid == '':
localid = val.split('=')[1].strip()
if debug:
print('#' + str(localid))
if localid != '':
# We have a local ID, check for a Wikidata value
try:
wd_item = pywikibot.ItemPage.fromPage(page)
item_dict = wd_item.get()
except:
# print("No Wikidata sitelink found")
continue
wikidataval = ''
snakid = ''
try:
wikidataval = item_dict['claims'][propid]
except:
null = 0
if wikidataval == '':
# Save to Wikidata?
test = editwikidata(wd_item, propid, localid)
page.touch()