# Downloading LastFM Listening History

My last.fm page: https://www.last.fm/user/reuben_francis

API documentation: http://www.last.fm/api

This tool separately downloads your all-time most played tracks, and albums. Then it downloads all of your scrobbles in order of recency. Each of these 4 data sets are saved to separate CSV files.

https://github.com/gboeing/data-visualization/blob/master/lastfm-listening-history/lastfm_downloader.ipynb

In [0]:
import requests, json, time, pandas as pd

In [0]:
#need to find a way to add these variables through .config
import config

key = config.key	
LASTFM_SHARED_SECRET = config.LASTFM_SHARED_SECRET
LASTFM_PASSWORD = config.LASTFM_PASSWORD
username = "reuben_francis"

In [0]:
pause_duration = 0.2

url = 'https://ws.audioscrobbler.com/2.0/?method=user.get{}&user={}&api_key={}&limit={}&extended={}&page={}&format=json'
limit = 200 #api lets you retrieve up to 200 records per call
extended = 0 #api lets you retrieve extended data for each track, 0=no, 1=yes
page = 1

## LastFM Top Tracks CSV (limit=200)

In [0]:
method = 'toptracks'
request_url = url.format(method, username, key, limit, extended, page)
artist_names = []
track_names = []
play_counts = []
response = requests.get(request_url).json()

In [0]:
for item in response[method]['track']:
    artist_names.append(item['artist']['name'])
    track_names.append(item['name'])
    play_counts.append(item['playcount'])

top_tracks = pd.DataFrame()
top_tracks['artist'] = artist_names
top_tracks['track'] = track_names
top_tracks['play_count'] = play_counts
top_tracks.to_csv('lastfm_top_tracks.csv', index=None, encoding='utf-8')
top_tracks.head()

## LastFM Top Albums CSV (limit=200)

In [0]:
method = 'topalbums'
request_url = url.format(method, username, key, limit, extended, page)
artist_names = []
album_names = []
play_counts = []
response = requests.get(request_url).json()

In [0]:

for item in response[method]['album']:
    artist_names.append(item['artist']['name'])
    album_names.append(item['name'])
    play_counts.append(item['playcount'])

top_albums = pd.DataFrame()
top_albums['artist'] = artist_names
top_albums['album'] = album_names
top_albums['play_count'] = play_counts
top_albums.to_csv('lastfm_top_albums.csv', index=None, encoding='utf-8')
top_albums.head()

Unnamed: 0,artist,album,play_count
0,Richie Hawtin,"Decks, EFX & 909",213
1,ScHoolboy Q,Blank Face LP,182
2,Injury Reserve,Floss,153
3,Yves Tumor,Safe In The Hands Of Love,153
4,Sufjan Stevens,Illinois,141


## LastFM Scrobbles CSV 

Last.fm provides this 'recenttracks' API method to get 'all' scrobbles, the code below retrieves time series data of all scrobbles, like a log file.

In [0]:
def get_scrobbles(method='recenttracks', username=username, key=key, limit=200, extended=0, page=1, pages=0):
    '''
    method: api method
    username/key: api credentials
    limit: api lets you retrieve up to 200 records per call
    extended: api lets you retrieve extended data for each track, 0=no, 1=yes
    page: page of results to start retrieving at
    pages: how many pages of results to retrieve. if 0, get as many as api can return.
    '''
    # initialize url and lists to contain response fields
    url = 'https://ws.audioscrobbler.com/2.0/?method=user.get{}&user={}&api_key={}&limit={}&extended={}&page={}&format=json'
    responses = []
    artist_names = []
    album_names = []
    track_names = []
    timestamps = []
    
    # make first request, just to get the total number of pages
    request_url = url.format(method, username, key, limit, extended, page)
    response = requests.get(request_url).json()
    total_pages = int(response[method]['@attr']['totalPages'])
    if pages > 0:
        total_pages = min([total_pages, pages])
        
    print('{} total pages to retrieve'.format(total_pages))
    
    # request each page of data one at a time
    for page in range(1, int(total_pages) + 1, 1):
        if page % 10 == 0: print(page, end=' ')
        time.sleep(pause_duration)
        request_url = url.format(method, username, key, limit, extended, page)
        responses.append(requests.get(request_url))
    
    # parse the fields out of each scrobble in each page (aka response) of scrobbles
    for response in responses:
        scrobbles = response.json()
        for scrobble in scrobbles[method]['track']:
            # only retain completed scrobbles (aka, with timestamp and not 'now playing')
            if 'date' in scrobble.keys():
                artist_names.append(scrobble['artist']['#text'])
                album_names.append(scrobble['album']['#text'])
                track_names.append(scrobble['name'])
                timestamps.append(scrobble['date']['uts'])
                
    # create and populate a dataframe to contain the data
    df = pd.DataFrame()
    df['artist'] = artist_names
    df['album'] = album_names
    df['track'] = track_names
    df['timestamp'] = timestamps
    df['datetime'] = pd.to_datetime(df['timestamp'].astype(int), unit='s')
    
    return df

In [0]:
# get all scrobbled tracks ever, in order of recency (pages=0 to get all)
scrobbles = get_scrobbles(pages=0)

275 total pages to retrieve
10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 

In [0]:
# save the dataset
scrobbles.to_csv('lastfm_scrobbles.csv', index=None, encoding='utf-8')
print('{:,} total rows'.format(len(scrobbles)))
scrobbles.head()

54,806 total rows


Unnamed: 0,artist,album,track,timestamp,datetime
0,Xiu Xiu,Knife Play,Over Over,1591526477,2020-06-07 10:41:17
1,Xiu Xiu,Knife Play,Dr. Troll,1591526241,2020-06-07 10:37:21
2,Xiu Xiu,Knife Play,Hives Hives,1591526016,2020-06-07 10:33:36
3,Xiu Xiu,Knife Play,Luber,1591525772,2020-06-07 10:29:32
4,Xiu Xiu,Knife Play,I Broke Up (SJ),1591525630,2020-06-07 10:27:10
