-
-
Notifications
You must be signed in to change notification settings - Fork 5
/
const.py
226 lines (205 loc) · 5.4 KB
/
const.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import pywikibot as wp
from collections import defaultdict, namedtuple
WIKIDATA = wp.Site('wikidata', 'wikidata')
WIKIDATA_DATASITE = WIKIDATA.data_repository()
LinkIDsTuple = namedtuple("LinkIDs", "wikipedia wikidata")
# The property id and item id for "is a disambiguation page" claims
PROPERTY_ID_INSTANCE_OF = u"P31"
SKIP_INSTANCE_OF_ITEMS = ("Q4167410", # disambiguation page
"Q273057", # discography
)
PROPERTY_IDS = {
"area": "P982",
"artist": "P434",
"event": "P6423",
"genre": "P8052",
"instrument": "P1330",
"label": "P966",
"place": "P1004",
"release_group": "P436",
"series": "P1407",
"work": "P435",
}
LINK_IDS = {
"area": LinkIDsTuple(355, 358),
"artist": LinkIDsTuple(179, 352),
"event": LinkIDsTuple(789, 790),
"genre": LinkIDsTuple(None, 1087),
"instrument": LinkIDsTuple(731, 733),
"label": LinkIDsTuple(216, 354),
"place": LinkIDsTuple(595, 594),
"release_group": LinkIDsTuple(89, 353),
"series": LinkIDsTuple(744, 749),
"work": LinkIDsTuple(279, 351),
}
MUSICBRAINZ_WIKIDATAPAGE = wp.ItemPage(WIKIDATA_DATASITE, "Q14005")
MUSICBRAINZ_CLAIM = wp.Claim(WIKIDATA_DATASITE, "P248")
RETRIEVED_CLAIM = wp.Claim(WIKIDATA_DATASITE, "P813")
NAMED_AS_CLAIM = wp.Claim(WIKIDATA_DATASITE, "P1810")
GENERIC_URL_MBID_QUERY =\
"""
SELECT {etype}.gid, url.gid, url.url, l_table.id, lt.id, {etype}.name
FROM l_{etype}_url l_table
JOIN link AS l
ON l_table.link=l.id
JOIN link_type AS lt
ON lt.id=l.link_type
JOIN {etype}
ON entity0={etype}.id
JOIN url
ON l_table.entity1=url.id
WHERE
lt.id IN ({wikipedia_linkid}, {wikidata_linkid})
AND
l_table.edits_pending=0
AND
url.edits_pending=0
AND
l.ended=FALSE
LIMIT %s;
"""
GENERIC_ALREADY_PROCESSED_QUERY =\
"""
SELECT gid
FROM bot_wikidata_{etype}_processed;
"""
GENERIC_DONE_QUERY =\
"""
INSERT INTO bot_wikidata_{etype}_processed (GID)
SELECT (%(mbid)s)
WHERE NOT EXISTS (
SELECT 1
FROM bot_wikidata_{etype}_processed
WHERE gid = (%(mbid)s)
);
"""
GENERIC_CREATE_PROCESSED_TABLE_QUERY =\
"""
CREATE TABLE IF NOT EXISTS bot_wikidata_{etype}_processed (
gid uuid NOT NULL PRIMARY KEY,
processed timestamp with time zone DEFAULT now()
);
"""
QUERIES = defaultdict(lambda: None,
{
'work':
"""
SELECT w.gid, url.gid, url.url, lwu.id, lt.id, w.name
FROM l_url_work AS lwu
JOIN link AS l
ON lwu.link=l.id
JOIN link_type AS lt
ON lt.id=l.link_type
JOIN work AS w
ON entity1=w.id
JOIN url
ON lwu.entity0=url.id
WHERE
lt.id = 351
AND
lwu.edits_pending=0
AND
url.edits_pending=0
AND
l.ended=FALSE
LIMIT %s;
""",
'genre':
"""
SELECT g.gid, url.gid, url.url, lgu.id, lt.id, g.name
FROM l_genre_url AS lgu
JOIN link AS l
ON lgu.link=l.id
JOIN link_type AS lt
ON lt.id=l.link_type
JOIN genre AS g
ON entity0=g.id
JOIN url
ON lgu.entity1=url.id
WHERE
lt.id = 1087
AND
lgu.edits_pending=0
AND
url.edits_pending=0
AND
l.ended=FALSE
LIMIT %s;
""",
'area':
"""
SELECT area.gid, url.gid, url.url, l_area_url.id, lt.id, area.name
FROM l_area_url
JOIN link AS l
ON l_area_url.link=l.id
JOIN link_type AS lt
ON lt.id=l.link_type
JOIN area
ON entity0=area.id
JOIN url
ON l_area_url.entity1=url.id
WHERE
lt.id IN (355, 358)
AND
l_area_url.edits_pending=0
AND
url.edits_pending=0
AND
l.ended=FALSE
AND
area.id IN (
SELECT area
FROM place
UNION ALL
SELECT area
FROM label
UNION ALL
SELECT area
FROM artist
UNION ALL
SELECT begin_area
FROM artist
UNION ALL
SELECT end_area
FROM artist
UNION ALL
SELECT area
FROM country_area
JOIN release_country
ON release_country.country = country_area.area
UNION ALL
SELECT entity0
FROM l_area_recording
UNION ALL
SELECT entity0
FROM l_area_release
UNION ALL
SELECT entity0
FROM l_area_work
)
LIMIT %s;
""",
'release_group':
"""
SELECT rg.gid, url.gid, url.url, l_table.id, lt.id, rg.name
FROM l_release_group_url l_table
JOIN link AS l
ON l_table.link=l.id
JOIN link_type AS lt
ON lt.id=l.link_type
JOIN release_group rg
ON entity0=rg.id
JOIN url
ON l_table.entity1=url.id
WHERE
lt.id = 353
AND
l_table.edits_pending=0
AND
url.edits_pending=0
AND
l.ended=FALSE
LIMIT %s;
""",
}
)