# Last API with pylast

[Oficial Website](https://www.last.fm/api/)

[PyLast Repo](https://github.com/pylast)

In [1]:
import pandas as pd 
import numpy as np 
import pylast

import os
import json
import sys
import requests
import time 

sys.path.append('../scripts/') 

from create_last_database import User
from create_last_database import Track
from create_last_database import Artist
from create_last_database import Album
from create_last_database import Tag
from create_last_database import Library

## Network with the API through PyLast

In [2]:
API_KEY = 'e1c7232e1969f803afc123fe4fbd7f18' #input()
API_SECRET = 'dd1404bb182228426ab6b440425dbff1' #input()

network = pylast.LastFMNetwork(api_key=API_KEY, api_secret=API_SECRET)

Let's get some random users built by `generate_lastfm_users.py`

In [3]:
def get_random_users(filepath: str, quantity: int = 1000, random_state: int = 200) -> pd.DataFrame:
    
    users = pd.read_csv(filepath)
    chosen_users = users.sample(n = quantity, replace = False, random_state = random_state, axis = 'index')
    chosen_users.index = list(range(0,len(chosen_users)))

    return chosen_users

user_path = "../data/lastfm-api/users_lastfm.csv"

users = get_random_users(user_path)
users.head()

Unnamed: 0,user_id,user_name
0,22330,elleciel
1,20893,Wolfet93
2,22057,drewogg
3,15821,Sharenge
4,14704,old_urach


# Creating the database 

It takes a long, long time. Some problems with MalResponse, Network and Connection are expected. For some especial cases, I just rerun the cell. 

We start with the users info. All the following information is saved in a json format.

In [None]:
users_class = User(network, user_path)

if not os.path.exists('../data/lastfm-api/1k_users_info_lastfm.json'):
    with open('../data/lastfm-api/1k_users_info_lastfm.json', 'w') as f:
        json.dump({}, f)
with open('../data/lastfm-api/1k_users_info_lastfm.json'):
    data = json.load(f)
for user_id, user_name in users.as_matrix():
    if str(user_id) in data:
        continue
    with open('../data/lastfm-api/1k_users_info_lastfm.json', 'r+') as f:
        data = json.load(f)
        user_info = users_class.get_user_info(user_name)
        data[user_id] = user_info
        f.seek(0)
        json.dump(data, f)
        if len(data) % 100 == 0:
            print('{} users - DONE'.format(len(data)))

With the `tracks.csv` file, I will build the tracks dataset. It may take long time!

In [None]:
track_class = Track(network)

if not os.path.exists('../data/lastfm-api/tracks_lastfm_info.json'):
    with open('../data/lastfm-api/tracks_lastfm_info.json', 'w') as f:
        json.dump({}, f)
with open('../data/lastfm-api/tracks_lastfm_info.json', 'r+') as f:
    data = json.load(f)
for track_id, data_track in track_class.tracks_df.iterrows():
    if str(track_id) in data:
        continue
    with open('../data/lastfm-api/tracks_lastfm_info.json', 'r+') as f:
        t0 = time.time()
        data = json.load(f)
        while True:
            try: 
                track_info = track_class.get_track_info(data_track.track_name, data_track.artist_name)
            except pylast.NetworkError:
                time.sleep(2)
                continue
            break
        data[track_id] = track_info
        f.seek(0)
        json.dump(data, f)
        if len(data) % 100 == 0:
            print('{} tracks - DONE'.format(len(data)))

Building the artist database

In [33]:
artist_class = Artist(network)

if not os.path.exists('../data/lastfm-api/artists_lastfm_info.json'):
    with open('../data/lastfm-api/artists_lastfm_info.json', 'w') as f:
        json.dump({}, f)
with open('../data/lastfm-api/artists_lastfm_info.json', 'r+') as f:
    data = json.load(f)
for artist_id, data_artist in artist_class.artists_df.iterrows():
    if str(artist_id) in data:
        continue
    with open('../data/lastfm-api/artists_lastfm_info.json', 'r+') as f:
        t0 = time.time()
        data = json.load(f)
        while True:
            try: 
                artist_info = artist_class.get_artist_info(data_artist.artist_name)
            except pylast.NetworkError:
                time.sleep(2)
                continue
            break
        data[artist_id] = artist_info
        f.seek(0)
        json.dump(data, f)
        if len(data) % 100 == 0:
            print('{} artists - DONE'.format(len(data)))

KeyboardInterrupt: 

Building the dabase for the tags

In [None]:
tag_class = Tag(network)

if not os.path.exists('../data/lastfm-api/tags_lastfm_info.json'):
    with open('../data/lastfm-api/tags_lastfm_info.json', 'w') as f:
        json.dump({}, f)
with open('../data/lastfm-api/tags_lastfm_info.json', 'r+') as f:
    data = json.load(f)
for tag_id, data_tag in tag_class.tags_df.iterrows():
    if str(tag_id) in data:
        continue
    with open('../data/lastfm-api/tags_lastfm_info.json', 'r+') as f:
        t0 = time.time()
        data = json.load(f)
        while True:
            try: 
                tag_info = tag_class.get_tag_info(data_tag.tag)
            except pylast.NetworkError:
                time.sleep(2)
                continue
            break
        data[tag_id] = tag_info
        f.seek(0)
        json.dump(data, f)
        if len(data) % 100 == 0:
            print('{} tag - DONE'.format(len(data)))

Converting similar tracks in track info to index. I separate of the original code cause it was lazy!

In [9]:
track_class = Track(network)
artist_class = Artist(network)

with open('../data/lastfm-api/tracks_lastfm_info.json', 'r+') as f:
    data = json.load(f)
for index_ex, key in enumerate(data.keys()):
    if len(data[key]) == 0: 
        continue
    for index, info in enumerate(data[key]['similar']): 
        data[key]['similar'][index] = [track_class.get_id_by_name(info[0], info[1]), info[2]]
    if index_ex % 100 == 0: 
        print("{} - DONE".format(index_ex))

with open('../data/lastfm-api/tracks_lastfm_info1.json', 'w') as f: 
    json.dump(data, f)

 - DONE
8180 - DONE
8181 - DONE
8182 - DONE
8183 - DONE
8184 - DONE
8185 - DONE
8186 - DONE
8187 - DONE
8188 - DONE
8189 - DONE
8190 - DONE
8191 - DONE
8192 - DONE
8193 - DONE
8194 - DONE
8195 - DONE
8196 - DONE
8197 - DONE
8198 - DONE
8199 - DONE
8200 - DONE
8201 - DONE
8202 - DONE
8203 - DONE
8204 - DONE
8205 - DONE
8206 - DONE
8207 - DONE
8208 - DONE
8209 - DONE
8210 - DONE
8211 - DONE
8212 - DONE
8213 - DONE
8214 - DONE
8215 - DONE
8216 - DONE
8217 - DONE
8218 - DONE
8219 - DONE
8220 - DONE
8221 - DONE
8222 - DONE
8223 - DONE
8224 - DONE
8225 - DONE
8226 - DONE
8227 - DONE
8228 - DONE
8229 - DONE
8230 - DONE
8231 - DONE
8232 - DONE
8233 - DONE
8234 - DONE
8235 - DONE
8236 - DONE
8237 - DONE
8238 - DONE
8239 - DONE
8240 - DONE
8241 - DONE
8242 - DONE
8243 - DONE
8244 - DONE
8245 - DONE
8246 - DONE
8247 - DONE
8248 - DONE
8250 - DONE
8251 - DONE
8252 - DONE
8253 - DONE
8256 - DONE
8257 - DONE
8258 - DONE
8259 - DONE
8260 - DONE
8261 - DONE
8262 - DONE
8263 - DONE
8264 - DONE
8266 - D

In [10]:
with open('../data/lastfm-api/artists_lastfm_info.json', 'r+') as f:
    data = json.load(f)
for key in data.keys():
    if len(data[key]) == 0: 
        continue
    for index, info in enumerate(data[key]['similar']): 
        data[key]['similar'][index] = [artist_class.get_id_by_name(info[0]), info[1]]

with open('../data/lastfm-api/artists_lastfm_info1.json', 'w') as f: 
    json.dump(data, f)

Writting the new artists and tracks 

In [11]:
artist_class.write_to_csv()
track_class.write_to_csv()

## Getting a Library for 50 users

In [19]:
artist_library = Library(network)
if not os.path.exists('../data/lastfm-api/users50_library.json'):
    with open('../data/lastfm-api/users50_library.json', 'w') as f:
        json.dump({}, f)
else: 
    with open('../data/lastfm-api/users50_library.json', 'r') as f:
        users50_library = json.load(f)

In [23]:
for i, user in users.iterrows():
    if i >= 50: break
    if str(user['user_id']) in users50_library:
        continue
    print(user['user_id'])
    user_library = artist_library.get_library(user['user_name'], printing=False)
    users50_library[user['user_id']] = user_library
    print('{} - DONE'.format(user['user_id']))

17331
There are 24 pages.
17331 - DONE
4901
There are 132 pages.
dict_keys(['error', 'message'])
dict_keys(['error', 'message'])
dict_keys(['error', 'message'])
4901 - DONE
7373
There are 8 pages.
7373 - DONE
8847
There are 12 pages.
8847 - DONE
21267
There are 83 pages.
21267 - DONE
8910
There are 45 pages.
8910 - DONE
12116
There are 6 pages.
12116 - DONE
8552
There are 11 pages.
8552 - DONE
20169
There are 8 pages.
20169 - DONE
2418
There are 45 pages.
dict_keys(['error', 'message'])
2418 - DONE
32159
There are 38 pages.
32159 - DONE
28602
There are 77 pages.
28602 - DONE
8882
There are 42 pages.
8882 - DONE
22183
There are 24 pages.
22183 - DONE
166
There are 9 pages.
166 - DONE
10895
There are 5 pages.
10895 - DONE
14268
There are 40 pages.
14268 - DONE


In [24]:
with open('../data/lastfm-api/users50_library.json', 'w') as f:
    json.dump(users50_library, f)