/
lastfm_to_madsonic_query.py
123 lines (101 loc) · 4.56 KB
/
lastfm_to_madsonic_query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Gets all playcount stats and last played info from LastFM. Builds a series of sql UPDATE statements that can be used to update
# the madsonic DB. You will find the update statements in the generated temp.txt file.
import requests
import collections
import datetime
API_KEY = "YOUR_API_KEY_HERE"
USER = "YOUR_USER_NAME_HERE"
data = {}
f = open('temp.txt', 'w')
# These are the API parameters for our scraping requests.
per_page = 200
api_url_count = 'http://ws.audioscrobbler.com/2.0/?method=user.gettoptracks&user=%s&api_key=%s&format=json&page=%s&limit=%s'
api_url_recent = 'http://ws.audioscrobbler.com/2.0/?method=user.getrecenttracks&user=%s&api_key=%s&format=json&page=%s&limit=%s'
def top_tracks(user, api_key, page, limit):
"""Get the most recent tracks from `user` using `api_key`. Start at page `page` and limit results to `limit`."""
return requests.get(api_url_count % (user, api_key, page, limit)).json()
def recent_tracks(user, api_key, page, limit):
"""Get the most recent tracks from `user` using `api_key`. Start at page `page` and limit results to `limit`."""
return requests.get(api_url_recent % (user, api_key, page, limit)).json()
def flatten(d, parent_key=''):
"""From http://stackoverflow.com/a/6027615/254187. Modified to strip # symbols from dict keys."""
items = []
for k, v in d.items():
new_key = parent_key + '_' + k if parent_key else k
if isinstance(v, collections.MutableMapping):
items.extend(flatten(v, new_key).items())
else:
new_key = new_key.replace('#', '') # Strip pound symbols from column names
items.append((new_key, v))
return dict(items)
def process_track(track):
"""Removes `image` keys from track data. Replaces empty strings for values with None."""
if 'image' in track:
del track['image']
flattened = flatten(track)
for key, val in flattened.iteritems():
if val == '':
flattened[key] = None
return flattened
def build_count_statement(title, count):
try:
print "UPDATE media_file " + "SET PLAY_COUNT=" + count + " WHERE TYPE='MUSIC' AND UPPER(TITLE)='UPPER(" + title.replace('\'','\'\'') + "');";
except:
pass
def build_date_statement(title, date):
try:
print "UPDATE media_file " + "SET LAST_PLAYED='" + get_date(date) + "' WHERE TYPE='MUSIC' AND UPPER(TITLE)=UPPER('" + title.replace('\'','\'\'') + "');";
except:
pass
def build_statement(title, playcount, date):
try:
f.write("UPDATE media_file " + "SET PLAY_COUNT=" + playcount + ", LAST_PLAYED='" + get_date(date) + "' WHERE TYPE='MUSIC' AND UPPER(TITLE)=UPPER('" + title.replace('\'','\'\'') + "');\n");
except:
pass
def get_date(time):
return datetime.datetime.fromtimestamp(int(time)).strftime('%Y-%m-%d %H:%M:%S')
# For playcount
# We need to get the first page so we can find out how many total pages there are in our listening history.
resp = top_tracks(USER, API_KEY, 1, 200)
total_pages = int(resp['toptracks']['@attr']['totalPages'])
all_pages = []
for page_num in xrange(1, total_pages + 1):
# print 'Page', page_num, 'of', total_pages
page = top_tracks(USER, API_KEY, page_num, 200)
all_pages.append(page)
# Iterate through all pages
num_pages = len(all_pages)
for page_num, page in enumerate(all_pages):
# print 'Page', page_num + 1, 'of', num_pages
# On each page, iterate through all tracks
for track in page['toptracks']['track']:
# Process each track and insert it into the `tracks` table
track_data = process_track(track);
data[track_data['name']] = [track_data['playcount'], 0]
# For recent played
resp = recent_tracks(USER, API_KEY, 1, 200)
total_pages = int(resp['recenttracks']['@attr']['totalPages'])
all_pages = []
for page_num in xrange(1, total_pages + 1):
# print 'Page', page_num, 'of', total_pages
page = recent_tracks(USER, API_KEY, page_num, 200)
all_pages.append(page)
num_pages = len(all_pages)
for page_num, page in enumerate(all_pages):
# print 'Page', page_num + 1, 'of', num_pages
# On each page, iterate through all tracks
for track in page['recenttracks']['track']:
# Process each track and insert it into the `tracks` table
track_data = process_track(track);
try:
name = track_data['name']
date = track_data['date_uts']
if data.has_key(name):
if(data[name][1] < date):
data[name][1] = date
else:
data[name] = [1, date]
except:
print track_data
for key, value in data.iteritems():
build_statement(key, value[0], value[1])