/
utilities.py
491 lines (334 loc) · 12.6 KB
/
utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
import re
import os
import hashlib
from threading import Thread, Timer
import pickle
import json
import urllib
import datetime
import random
import itertools
from doreah import settings
from doreah import caching
from doreah.logging import log
from doreah.regular import yearly, daily
from external import api_request_track, api_request_artist
#####
## SERIALIZE
#####
def serialize(obj):
try:
return json.dumps(obj)
except:
if isinstance(obj,list) or isinstance(obj,tuple):
return "[" + ",".join(serialize(o) for o in obj) + "]"
elif isinstance(obj,dict):
return "{" + ",".join(serialize(o) + ":" + serialize(obj[o]) for o in obj) + "}"
return json.dumps(obj.hashable())
#if isinstance(obj,list) or if isinstance(obj,tuple):
# return "[" + ",".join(dumps(o) for o in obj) + "]"
#if isinstance(obj,str)
#####
## RULESTATE VALIDATION
#####
def checksumTSV(folder):
sums = ""
for f in os.listdir(folder + "/"):
if (f.endswith(".tsv")):
f = open(folder + "/" + f,"rb")
sums += hashlib.md5(f.read()).hexdigest() + "\n"
f.close()
return sums
# returns whether checksums match and sets the checksum to invalid if they don't (or sets the new one if no previous one exists)
def combineChecksums(filename,checksums):
import os
if os.path.exists(filename + ".rulestate"):
f = open(filename + ".rulestate","r")
oldchecksums = f.read()
f.close()
if oldchecksums == checksums:
# the new checksum given by the calling db server represents the rule state that all current unsaved scrobbles were created under
# if this is the same as the existing one, we're all good
return True
elif (oldchecksums != "INVALID"):
#if not, the file is not consistent to any single rule state (some scrobbles were created with an old ruleset, some not)
f = open(filename + ".rulestate","w")
f.write("INVALID") # this will never match any sha256sum
f.close()
return False
else:
#if the file already says invalid, no need to open it and rewrite
return False
else:
f = open(filename + ".rulestate","w")
f.write(checksums)
f.close()
return True
# checks ALL files for their rule state. if they are all the same as the current loaded one, the entire database can be assumed to be consistent with the current ruleset
# in any other case, get out
def consistentRulestate(folder,checksums):
result = []
for scrobblefile in os.listdir(folder + "/"):
if (scrobblefile.endswith(".tsv")):
try:
with open(folder + "/" + scrobblefile + ".rulestate","r") as f:
if f.read() != checksums:
return False
except:
return False
return True
#####
## IMAGES
#####
### Caches
cacheage = settings.get_settings("CACHE_EXPIRE_POSITIVE") * 24 * 3600
cacheage_neg = settings.get_settings("CACHE_EXPIRE_NEGATIVE") * 24 * 3600
artist_cache = caching.Cache(name="imgcache_artists",maxage=cacheage,maxage_negative=cacheage_neg,persistent=True)
track_cache = caching.Cache(name="imgcache_tracks",maxage=cacheage,maxage_negative=cacheage_neg,persistent=True)
# removes emojis and weird shit from names
def clean(name):
return "".join(c for c in name if c.isalnum() or c in []).strip()
def local_files(artist=None,artists=None,title=None):
# check if we're dealing with a track or artist, then clean up names
# (only remove non-alphanumeric, allow korean and stuff)
if title is not None and artists is not None:
track = True
title, artists = clean(title), [clean(a) for a in artists]
elif artist is not None:
track = False
artist = clean(artist)
else: return []
superfolder = "images/tracks/" if track else "images/artists/"
filenames = []
if track:
#unsafeartists = [artist.translate(None,"-_./\\") for artist in artists]
safeartists = [re.sub("[^a-zA-Z0-9]","",artist) for artist in artists]
#unsafetitle = title.translate(None,"-_./\\")
safetitle = re.sub("[^a-zA-Z0-9]","",title)
if len(artists) < 4:
unsafeperms = itertools.permutations(artists)
safeperms = itertools.permutations(safeartists)
else:
unsafeperms = [sorted(artists)]
safeperms = [sorted(safeartists)]
for unsafeartistlist in unsafeperms:
filename = "-".join(unsafeartistlist) + "_" + title
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
for safeartistlist in safeperms:
filename = "-".join(safeartistlist) + "_" + safetitle
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
filenames = list(set(filenames))
if len(filenames) == 0: filenames.append(str(hash((frozenset(artists),title))))
else:
#unsafeartist = artist.translate(None,"-_./\\")
safeartist = re.sub("[^a-zA-Z0-9]","",artist)
filename = artist
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
filename = safeartist
if filename != "":
filenames.append(filename)
filenames.append(filename.lower())
filenames = list(set(filenames))
if len(filenames) == 0: filenames.append(str(hash(artist)))
images = []
for purename in filenames:
# direct files
for ext in ["png","jpg","jpeg","gif"]:
#for num in [""] + [str(n) for n in range(0,10)]:
if os.path.exists(superfolder + purename + "." + ext):
images.append("/" + superfolder + purename + "." + ext)
# folder
try:
for f in os.listdir(superfolder + purename + "/"):
if f.split(".")[-1] in ["png","jpg","jpeg","gif"]:
images.append("/" + superfolder + purename + "/" + f)
except:
pass
return images
# these caches are there so we don't check all files every time, but return the same one
local_cache_age = settings.get_settings("LOCAL_IMAGE_ROTATE")
local_artist_cache = caching.Cache(maxage=local_cache_age)
local_track_cache = caching.Cache(maxage=local_cache_age)
def getTrackImage(artists,title,fast=False):
# obj = (frozenset(artists),title)
# filename = "-".join([re.sub("[^a-zA-Z0-9]","",artist) for artist in sorted(artists)]) + "_" + re.sub("[^a-zA-Z0-9]","",title)
# if filename == "": filename = str(hash(obj))
# filepath = "images/tracks/" + filename
if settings.get_settings("USE_LOCAL_IMAGES"):
try:
return local_track_cache.get((frozenset(artists),title))
except:
images = local_files(artists=artists,title=title)
if len(images) != 0:
#return random.choice(images)
res = random.choice(images)
local_track_cache.add((frozenset(artists),title),res)
return urllib.parse.quote(res)
# check if custom image exists
# if os.path.exists(filepath + ".png"):
# imgurl = "/" + filepath + ".png"
# return imgurl
# elif os.path.exists(filepath + ".jpg"):
# imgurl = "/" + filepath + ".jpg"
# return imgurl
# elif os.path.exists(filepath + ".jpeg"):
# imgurl = "/" + filepath + ".jpeg"
# return imgurl
# elif os.path.exists(filepath + ".gif"):
# imgurl = "/" + filepath + ".gif"
# return imgurl
try:
# check our cache
# if we have cached the nonexistence of that image, we immediately return the redirect to the artist and let the resolver handle it
# (even if we're not in a fast lookup right now)
#result = cachedTracks[(frozenset(artists),title)]
result = track_cache.get((frozenset(artists),title)) #track_from_cache(artists,title)
if result is not None: return result
else:
for a in artists:
res = getArtistImage(artist=a,fast=True)
if res != "": return res
return ""
except:
pass
# do we have an api key?
# apikey = settings.get_settings("LASTFM_API_KEY")
# if apikey is None: return "" # DO NOT CACHE THAT
# fast request only retuns cached and local results, generates redirect link for rest
if fast: return "/image?title=" + urllib.parse.quote(title) + "&" + "&".join(["artist=" + urllib.parse.quote(a) for a in artists])
# non-fast lookup (esentially only the resolver lookup)
result = api_request_track((artists,title))
# cache results (even negative ones)
#cachedTracks[(frozenset(artists),title)] = result
track_cache.add((frozenset(artists),title),result) #cache_track(artists,title,result)
# return either result or redirect to artist
if result is not None: return result
else:
for a in artists:
res = getArtistImage(artist=a,fast=False)
if res != "": return res
return ""
def getArtistImage(artist,fast=False):
# obj = artist
# filename = re.sub("[^a-zA-Z0-9]","",artist)
# if filename == "": filename = str(hash(obj))
# filepath = "images/artists/" + filename
# #filepath_cache = "info/artists_cache/" + filename
if settings.get_settings("USE_LOCAL_IMAGES"):
try:
return local_artist_cache.get(artist)
except:
images = local_files(artist=artist)
if len(images) != 0:
#return random.choice(images)
res = random.choice(images)
local_artist_cache.add(artist,res)
return urllib.parse.quote(res)
# check if custom image exists
# if os.path.exists(filepath + ".png"):
# imgurl = "/" + filepath + ".png"
# return imgurl
# elif os.path.exists(filepath + ".jpg"):
# imgurl = "/" + filepath + ".jpg"
# return imgurl
# elif os.path.exists(filepath + ".jpeg"):
# imgurl = "/" + filepath + ".jpeg"
# return imgurl
# elif os.path.exists(filepath + ".gif"):
# imgurl = "/" + filepath + ".gif"
# return imgurl
try:
#result = cachedArtists[artist]
result = artist_cache.get(artist) #artist_from_cache(artist)
if result is not None: return result
else: return ""
except:
pass
# do we have an api key?
# apikey = settings.get_settings("LASTFM_API_KEY")
# if apikey is None: return "" # DO NOT CACHE THAT
# fast request only retuns cached and local results, generates redirect link for rest
if fast: return "/image?artist=" + urllib.parse.quote(artist)
# non-fast lookup (esentially only the resolver lookup)
result = api_request_artist(artist=artist)
# cache results (even negative ones)
#cachedArtists[artist] = result
artist_cache.add(artist,result) #cache_artist(artist,result)
if result is not None: return result
else: return ""
def getTrackImages(trackobjectlist,fast=False):
threads = []
for track in trackobjectlist:
t = Thread(target=getTrackImage,args=(track["artists"],track["title"],),kwargs={"fast":fast})
t.start()
threads.append(t)
for t in threads:
t.join()
return [getTrackImage(t["artists"],t["title"]) for t in trackobjectlist]
def getArtistImages(artistlist,fast=False):
threads = []
for artist in artistlist:
t = Thread(target=getArtistImage,args=(artist,),kwargs={"fast":fast})
t.start()
threads.append(t)
for t in threads:
t.join()
# async calls only cached results, now we need to get them
return [getArtistImage(a) for a in artistlist]
# new way of serving images
# instead always generate a link locally, but redirect that on the fly
# this way the page can load faster and images will trickle in without having to resort to XHTTP requests
def resolveImage(artist=None,track=None):
if track is not None:
return getTrackImage(track["artists"],track["title"])
elif artist is not None:
return getArtistImage(artist)
#####
## PULSE MAINTENANCE
#####
@yearly
def update_medals():
from database import MEDALS, MEDALS_TRACKS, STAMPS, get_charts_artists, get_charts_tracks
currentyear = datetime.datetime.utcnow().year
try:
firstyear = datetime.datetime.utcfromtimestamp(STAMPS[0]).year
except:
firstyear = currentyear
MEDALS.clear()
for year in range(firstyear,currentyear):
charts = get_charts_artists(within=[year])
for a in charts:
artist = a["artist"]
if a["rank"] == 1: MEDALS.setdefault(artist,{}).setdefault("gold",[]).append(year)
elif a["rank"] == 2: MEDALS.setdefault(artist,{}).setdefault("silver",[]).append(year)
elif a["rank"] == 3: MEDALS.setdefault(artist,{}).setdefault("bronze",[]).append(year)
else: break
MEDALS_TRACKS.clear()
for year in range(firstyear,currentyear):
charts = get_charts_tracks(within=[year])
for t in charts:
track = (frozenset(t["track"]["artists"]),t["track"]["title"])
if t["rank"] == 1: MEDALS_TRACKS.setdefault(track,{}).setdefault("gold",[]).append(year)
elif t["rank"] == 2: MEDALS_TRACKS.setdefault(track,{}).setdefault("silver",[]).append(year)
elif t["rank"] == 3: MEDALS_TRACKS.setdefault(track,{}).setdefault("bronze",[]).append(year)
else: break
@daily
def update_weekly():
from database import WEEKLY_TOPTRACKS, WEEKLY_TOPARTISTS, get_charts_artists, get_charts_tracks
from malojatime import ranges, thisweek
WEEKLY_TOPARTISTS.clear()
WEEKLY_TOPTRACKS.clear()
for week in ranges(step="week"):
if week == thisweek(): break
for a in get_charts_artists(timerange=week):
artist = a["artist"]
if a["rank"] == 1: WEEKLY_TOPARTISTS[artist] = WEEKLY_TOPARTISTS.setdefault(artist,0) + 1
for t in get_charts_tracks(timerange=week):
track = (frozenset(t["track"]["artists"]),t["track"]["title"])
if t["rank"] == 1: WEEKLY_TOPTRACKS[track] = WEEKLY_TOPTRACKS.setdefault(track,0) + 1