In [5]:
import spotipy
import random
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials

from spotify_config import config

client_credentials_manager = SpotifyClientCredentials(config['client_id'],
                                                      config['client_secret'])
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)


In [6]:
def get_random_query():
    # A list of all characters that can be chosen.
    characters = 'abcdefghijklmnopqrstuvwxyz'
    # characters = 'αβγδεζηθικλμνξοπρστυφχψως'
    # characters = 'йцукенгшщзфывапролдячсмитьбюэжхъ'

    country_codes = ['AD', 'AR', 'AU', 'AT', 'BE', 'BO', 'BR', 'BG', 'CA', 'CL', 'CO', 'CR', 'CY', 'CZ', 'DK', 'DO', 'EC', 
        'SV', 'EE', 'FI', 'FR', 'DE', 'GR', 'GT', 'HN', 'HK', 'HU', 'IS', 'ID', 'IE', 'IT', 'JP', 'LV', 'LI', 'LT', 'LU', 'MY', 'MT', 'MX', 'MC', 'NL', 
        'NZ', 'NI', 'NO', 'PA', 'PY', 'PE', 'PH', 'PL', 'PT', 'SG', 'ES', 'SK', 'SE', 'CH', 'TW', 'TR', 'GB', 'US', 'UY']
    
    # Gets a random character from the characters string.
    random_character = random.choice(characters)
    random_search = ''

    random_market = random.choice(country_codes)
    offset = random.randint(0, 500)


    # Places the wildcard character at the beginning, or both beginning and end, randomly.
    rand = random.randint(0, 1)
    if rand == 0:
        random_search = random_character + '%'
    else:
        random_search = '%' + random_character + '%'
    return random_search, random_market, offset


In [7]:
SONGS_LIMIT = 15000
songs = set()

In [8]:
while len(songs) < SONGS_LIMIT:
    random_search = get_random_query()
    random_search, random_market, offset = get_random_query()
    print(f"Searching for {random_search} {offset} in market {random_market}, current songs length {len(songs)}")
    
    result = sp.search(q=random_search, type="track", limit=50, offset=offset, market=random_market)
    items = result["tracks"]["items"]
    current_songs = {(song["name"], song["id"], song["artists"][0]["name"]) for song in items}
    songs = songs.union(current_songs)

Searching for m% 289 in market SK, current songs length 0
Searching for u% 2 in market CA, current songs length 50
Searching for %c% 187 in market CH, current songs length 98
Searching for q% 18 in market FI, current songs length 146
Searching for %d% 171 in market LU, current songs length 178
Searching for v% 400 in market SE, current songs length 226
Searching for z% 452 in market CY, current songs length 268
Searching for %c% 14 in market SG, current songs length 314
Searching for %x% 481 in market NZ, current songs length 357
Searching for %z% 451 in market PL, current songs length 400
Searching for j% 32 in market TR, current songs length 440
Searching for %z% 412 in market NL, current songs length 470
Searching for z% 485 in market IT, current songs length 513
Searching for k% 169 in market ES, current songs length 544
Searching for %i% 98 in market EC, current songs length 568
Searching for g% 438 in market CZ, current songs length 601
Searching for %g% 15 in market PH, current 

In [9]:
songs

{('Ferma A Guardare (feat. Pinguini Tattici Nucleari)',
  '1eDC4NiUYgQSKpKDIvXxi4',
  'Ernia'),
 ('Ignition', '4S45yPQU7d7TNX9sO9pr3y', 'Lokolil'),
 ('Beautiful In White', '43wROOsAEK0F3Fu46Vjn7W', 'Shane Filan'),
 ('Leave Em Alone (Layton Greene, Lil Baby feat. City Girls, PnB Rock)',
  '6Vdyu21iT8qScU1haSjWUU',
  'Quality Control'),
 ('Who Wants To Live Forever - Remastered 2011',
  '3SGP8It5WDnCONyApJKRTJ',
  'Queen'),
 ('Me Estoy Enamorando', '4COvULULVKLsMKMRKIiXUa', 'Pedro Suárez-Vértiz'),
 ('Chcę uwierzyć snom', '6MMXHGeHzp6m3mZf5BFv9K', 'Katarzyna Łaska'),
 ('Risko', '7hj318e237PHNa6RoacNrr', 'Bloody Hawk'),
 ('Orbán, Verd Ki A Ferinek', '78YRVISlzTHb894Hwu6LOx', 'Krúbi'),
 ('Cooler Than Me', '525GY0mXYnxajolwgS8Eaz', 'Lucky Luke'),
 ('Quite Miss Home', '2deFH5zveEBEUuURpqTN3C', 'James Arthur'),
 ('What I Got', '3B4q6KbHbGV51HO3GznBFF', 'Sublime'),
 ('Noche De Entierro', '6ksWMnPcHNyNoNf3y1BQCO', 'Daddy Yankee'),
 ('Pluie Musique, Pt. 104', '5ZXCZFC7RZgmSR7srKqZA1', 'Brain Soun

In [10]:
len(songs)

15011

In [11]:
df = pd.DataFrame()
df["id"] = [song[1] for song in songs]
df["title"] = [song[0] for song in songs]
df["main_artist"] = [song[2] for song in songs]

In [12]:
features = {}
all_track_ids = list(df['id'])

In [13]:
start = 0
num_tracks = 100
while start < len(all_track_ids):
    print(f'getting from {start} to {start+num_tracks}')
    tracks_batch = all_track_ids[start:start+num_tracks]
    features_batch = sp.audio_features(tracks_batch)
    features.update({ track_id : track_features 
                     for track_id, track_features in zip(tracks_batch, features_batch) })
    start += num_tracks

getting from 0 to 100
getting from 100 to 200
getting from 200 to 300
getting from 300 to 400
getting from 400 to 500
getting from 500 to 600
getting from 600 to 700
getting from 700 to 800
getting from 800 to 900
getting from 900 to 1000
getting from 1000 to 1100
getting from 1100 to 1200
getting from 1200 to 1300
getting from 1300 to 1400
getting from 1400 to 1500
getting from 1500 to 1600
getting from 1600 to 1700
getting from 1700 to 1800
getting from 1800 to 1900
getting from 1900 to 2000
getting from 2000 to 2100
getting from 2100 to 2200
getting from 2200 to 2300
getting from 2300 to 2400
getting from 2400 to 2500
getting from 2500 to 2600
getting from 2600 to 2700
getting from 2700 to 2800
getting from 2800 to 2900
getting from 2900 to 3000
getting from 3000 to 3100
getting from 3100 to 3200
getting from 3200 to 3300
getting from 3300 to 3400
getting from 3400 to 3500
getting from 3500 to 3600
getting from 3600 to 3700
getting from 3700 to 3800
getting from 3800 to 3900
getting

In [14]:
tracks = pd.DataFrame.from_dict(features)
tracks = tracks.T
tracks = tracks.reset_index().rename(columns={'index' : 'song_id'})
tracks

Unnamed: 0,song_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1eDC4NiUYgQSKpKDIvXxi4,0.585,0.724,9,-5.611,0,0.0967,0.445,0,0.1,0.547,162.05,audio_features,1eDC4NiUYgQSKpKDIvXxi4,spotify:track:1eDC4NiUYgQSKpKDIvXxi4,https://api.spotify.com/v1/tracks/1eDC4NiUYgQS...,https://api.spotify.com/v1/audio-analysis/1eDC...,213543,4
1,4S45yPQU7d7TNX9sO9pr3y,0.763,0.546,1,-7.569,1,0.348,0.0996,0,0.115,0.823,73.564,audio_features,4S45yPQU7d7TNX9sO9pr3y,spotify:track:4S45yPQU7d7TNX9sO9pr3y,https://api.spotify.com/v1/tracks/4S45yPQU7d7T...,https://api.spotify.com/v1/audio-analysis/4S45...,88398,4
2,43wROOsAEK0F3Fu46Vjn7W,0.554,0.398,7,-5.598,1,0.0245,0.359,0,0.123,0.259,76.006,audio_features,43wROOsAEK0F3Fu46Vjn7W,spotify:track:43wROOsAEK0F3Fu46Vjn7W,https://api.spotify.com/v1/tracks/43wROOsAEK0F...,https://api.spotify.com/v1/audio-analysis/43wR...,232040,4
3,6Vdyu21iT8qScU1haSjWUU,0.76,0.755,6,-4.735,0,0.19,0.0163,0.000003,0.0909,0.517,101.993,audio_features,6Vdyu21iT8qScU1haSjWUU,spotify:track:6Vdyu21iT8qScU1haSjWUU,https://api.spotify.com/v1/tracks/6Vdyu21iT8qS...,https://api.spotify.com/v1/audio-analysis/6Vdy...,171647,4
4,3SGP8It5WDnCONyApJKRTJ,0.182,0.388,4,-8.672,0,0.0324,0.544,0.00134,0.114,0.0392,139.276,audio_features,3SGP8It5WDnCONyApJKRTJ,spotify:track:3SGP8It5WDnCONyApJKRTJ,https://api.spotify.com/v1/tracks/3SGP8It5WDnC...,https://api.spotify.com/v1/audio-analysis/3SGP...,295333,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15006,13yGyeDS59tJaTQq97uw8z,0.732,0.6,8,-9.451,1,0.273,0.256,0.000168,0.0809,0.486,159.948,audio_features,13yGyeDS59tJaTQq97uw8z,spotify:track:13yGyeDS59tJaTQq97uw8z,https://api.spotify.com/v1/tracks/13yGyeDS59tJ...,https://api.spotify.com/v1/audio-analysis/13yG...,207000,4
15007,55IzVZdFOu4gS5U5RDO4g8,0.483,0.845,2,-7.908,0,0.0291,0.323,0.000129,0.135,0.535,154.196,audio_features,55IzVZdFOu4gS5U5RDO4g8,spotify:track:55IzVZdFOu4gS5U5RDO4g8,https://api.spotify.com/v1/tracks/55IzVZdFOu4g...,https://api.spotify.com/v1/audio-analysis/55Iz...,240080,4
15008,6aDsgHPZsMztSbZernzlF8,0.576,0.679,0,-7.237,1,0.034,0.0311,0,0.323,0.213,121.005,audio_features,6aDsgHPZsMztSbZernzlF8,spotify:track:6aDsgHPZsMztSbZernzlF8,https://api.spotify.com/v1/tracks/6aDsgHPZsMzt...,https://api.spotify.com/v1/audio-analysis/6aDs...,187889,4
15009,5zjW4RC0BCWBgtrtMYPc9y,0.547,0.48,11,-8.95,0,0.285,0.266,0.000031,0.167,0.28,104.758,audio_features,5zjW4RC0BCWBgtrtMYPc9y,spotify:track:5zjW4RC0BCWBgtrtMYPc9y,https://api.spotify.com/v1/tracks/5zjW4RC0BCWB...,https://api.spotify.com/v1/audio-analysis/5zjW...,189714,4


In [17]:
tracks = tracks.merge(df, left_on="song_id", right_on="id")


In [18]:
tracks

Unnamed: 0,song_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,...,type,id_x,uri,track_href,analysis_url,duration_ms,time_signature,id_y,title,main_artist
0,1eDC4NiUYgQSKpKDIvXxi4,0.585,0.724,9,-5.611,0,0.0967,0.445,0,0.1,...,audio_features,1eDC4NiUYgQSKpKDIvXxi4,spotify:track:1eDC4NiUYgQSKpKDIvXxi4,https://api.spotify.com/v1/tracks/1eDC4NiUYgQS...,https://api.spotify.com/v1/audio-analysis/1eDC...,213543,4,1eDC4NiUYgQSKpKDIvXxi4,Ferma A Guardare (feat. Pinguini Tattici Nucle...,Ernia
1,4S45yPQU7d7TNX9sO9pr3y,0.763,0.546,1,-7.569,1,0.348,0.0996,0,0.115,...,audio_features,4S45yPQU7d7TNX9sO9pr3y,spotify:track:4S45yPQU7d7TNX9sO9pr3y,https://api.spotify.com/v1/tracks/4S45yPQU7d7T...,https://api.spotify.com/v1/audio-analysis/4S45...,88398,4,4S45yPQU7d7TNX9sO9pr3y,Ignition,Lokolil
2,43wROOsAEK0F3Fu46Vjn7W,0.554,0.398,7,-5.598,1,0.0245,0.359,0,0.123,...,audio_features,43wROOsAEK0F3Fu46Vjn7W,spotify:track:43wROOsAEK0F3Fu46Vjn7W,https://api.spotify.com/v1/tracks/43wROOsAEK0F...,https://api.spotify.com/v1/audio-analysis/43wR...,232040,4,43wROOsAEK0F3Fu46Vjn7W,Beautiful In White,Shane Filan
3,6Vdyu21iT8qScU1haSjWUU,0.76,0.755,6,-4.735,0,0.19,0.0163,0.000003,0.0909,...,audio_features,6Vdyu21iT8qScU1haSjWUU,spotify:track:6Vdyu21iT8qScU1haSjWUU,https://api.spotify.com/v1/tracks/6Vdyu21iT8qS...,https://api.spotify.com/v1/audio-analysis/6Vdy...,171647,4,6Vdyu21iT8qScU1haSjWUU,"Leave Em Alone (Layton Greene, Lil Baby feat. ...",Quality Control
4,3SGP8It5WDnCONyApJKRTJ,0.182,0.388,4,-8.672,0,0.0324,0.544,0.00134,0.114,...,audio_features,3SGP8It5WDnCONyApJKRTJ,spotify:track:3SGP8It5WDnCONyApJKRTJ,https://api.spotify.com/v1/tracks/3SGP8It5WDnC...,https://api.spotify.com/v1/audio-analysis/3SGP...,295333,4,3SGP8It5WDnCONyApJKRTJ,Who Wants To Live Forever - Remastered 2011,Queen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15006,13yGyeDS59tJaTQq97uw8z,0.732,0.6,8,-9.451,1,0.273,0.256,0.000168,0.0809,...,audio_features,13yGyeDS59tJaTQq97uw8z,spotify:track:13yGyeDS59tJaTQq97uw8z,https://api.spotify.com/v1/tracks/13yGyeDS59tJ...,https://api.spotify.com/v1/audio-analysis/13yG...,207000,4,13yGyeDS59tJaTQq97uw8z,Off Season,CA$HANOVA BULHAR
15007,55IzVZdFOu4gS5U5RDO4g8,0.483,0.845,2,-7.908,0,0.0291,0.323,0.000129,0.135,...,audio_features,55IzVZdFOu4gS5U5RDO4g8,spotify:track:55IzVZdFOu4gS5U5RDO4g8,https://api.spotify.com/v1/tracks/55IzVZdFOu4g...,https://api.spotify.com/v1/audio-analysis/55Iz...,240080,4,55IzVZdFOu4gS5U5RDO4g8,Kryzysowa narzeczona,Lady Pank
15008,6aDsgHPZsMztSbZernzlF8,0.576,0.679,0,-7.237,1,0.034,0.0311,0,0.323,...,audio_features,6aDsgHPZsMztSbZernzlF8,spotify:track:6aDsgHPZsMztSbZernzlF8,https://api.spotify.com/v1/tracks/6aDsgHPZsMzt...,https://api.spotify.com/v1/audio-analysis/6aDs...,187889,4,6aDsgHPZsMztSbZernzlF8,Someday,OneRepublic
15009,5zjW4RC0BCWBgtrtMYPc9y,0.547,0.48,11,-8.95,0,0.285,0.266,0.000031,0.167,...,audio_features,5zjW4RC0BCWBgtrtMYPc9y,spotify:track:5zjW4RC0BCWBgtrtMYPc9y,https://api.spotify.com/v1/tracks/5zjW4RC0BCWB...,https://api.spotify.com/v1/audio-analysis/5zjW...,189714,4,5zjW4RC0BCWBgtrtMYPc9y,Kaip Tau Visgi Tinka,Lilas ir Innomine


In [20]:
tracks.to_csv("songs15000.csv", index=False)