In [1]:
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import config
import pprint
import pandas as pd
from bs4 import BeautifulSoup
import requests
pd.set_option('display.max_rows', None)
import random

In [2]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

## Understanding the json

## Searching a playlist

## List of SPOTIFY Playlists

### Source 1

In [3]:
url_1 = "https://audiohype.io/resources/the-best-spotify-playlists/"
response_1 = requests.get(url_1)
print(response_1.status_code) # 200 status code means OK!
soup_1 = BeautifulSoup(response_1.content, "html.parser")

playlists_list_1 = [soup_1.select(".rank-math-link")[pl]["href"].split("?")[0] 
                    for pl in range(len(soup_1.select(".rank-math-link")))]
playlists_list_1 = [url.split("/")[-1] for url in playlists_list_1][:-1]
print(f"List of {len(playlists_list_1)} playlists has been created!")

200
List of 28 playlists has been created!


### Source 2

In [4]:
url_2 = "https://www.noteburner.com/spotify-music-tips/top-10-spotify-playlist.html"
response_2 = requests.get(url_2)
print(response_2.status_code) # 200 status code means OK!
soup_2 = BeautifulSoup(response_2.content, "html.parser")

playlists_list_2 = [soup_2.select("div.contaienr > div > div > ul > li > a")[pl]["href"].split("?")[0] 
                    for pl in range(len(soup_2.select("div.contaienr > div > div > ul > li > a")))]
playlists_list_2 = [url.split("/")[-1] for url in playlists_list_2][:-1]
print(f"List of {len(playlists_list_2)} playlists has been created!")

200
List of 9 playlists has been created!


### Create complete list of all playlists_lists

In [5]:
playlists_list = []
playlists_list.extend(playlists_list_1)
playlists_list.extend(playlists_list_2)
print(f"Total number of playlist IDs is {len(playlists_list)}")

Total number of playlist IDs is 37


## Playlist functions

### Extracting `<TRACKS>`

In [6]:
def get_playlist_tracks(username, playlist_id):
    results = sp.user_playlist_tracks(username,playlist_id,market="DE")
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

### Extracting song `<IDs>`

In [7]:
def get_playlist_track_ids(playlist):
    song_ids_list = [playlist[id]["track"]["id"] for id in range(len(playlist))]
    return song_ids_list

#song_ids_list = get_playlist_track_ids(playlist)

### Extracting song `<NAMES>`

In [8]:
def get_playlist_track_names(playlist):
    song_names_list = [playlist[t]["track"]["name"] for t in range(len(playlist))]
    return song_names_list

#song_names_list = get_playlist_track_names(playlist)

### Extracting song `<ARTISTS>`

In [9]:
def get_playlist_track_artists(playlist):
    song_artist_list = [playlist[artist]["track"]["artists"][0]["name"] for artist in range(len(playlist))]
    return song_artist_list

#song_artist_list = get_playlist_track_artists(playlist)

### Extracting song `<AUDIO FEATURES>`

In [10]:
def get_playlist_track_audiofeatures(id_list):
    
    audio_features_list = []
    
    if len(song_ids_list) > 100:
        lower_bound_iterables = list(range(0, len(song_ids_list), 100))
        upper_bound_iterables = list(range(100, len(song_ids_list), 100))
        upper_bound_iterables.append(len(song_ids_list))
    
        for idx, bound in enumerate(lower_bound_iterables):
            song_audiofeatures_list = sp.audio_features(song_ids_list[lower_bound_iterables[idx]:upper_bound_iterables[idx]])
            audio_features_list.extend(song_audiofeatures_list)
    
    else:
        audio_features_list = sp.audio_features(song_ids_list)
    return audio_features_list

#song_audiofeatures_list = get_playlist_track_audiofeatures(song_ids_list)

## Create dataframe with song `<NAMEs>`, `<ARTISTs>`, `<IDs>`, `<AUDIO FEATUREs>`

In [11]:
def create_df_to_concat_to_base():

    df_1 = pd.DataFrame(data={
        "song_name": song_names_list,
        "song_artist": song_artist_list,
    })

    df_2 = pd.DataFrame(song_audiofeatures_list)    
    df_2 = df_2[["id","danceability","energy","loudness","speechiness","acousticness",
               "instrumentalness","liveness","valence","tempo","duration_ms"]]

    df = pd.concat([df_1, df_2], axis=1)
    print(f"Dataframe with {len(df)} entries was created as `df_to_concat`")
    return df

#df = create_df_to_concat_to_base()

## Extend csv with dataframe to concat

In [12]:
def extend_collection():
    
    df_base = pd.read_csv("Song_Collection_Extended.csv")
    df_extended = pd.concat([df_base, df], axis=0, ignore_index=True)
    df_extended.to_csv("Song_Collection_Extended.csv", index=False)
    print(f"Extended base dataframe (length = {len(df_base)}) by {len(df)} songs!")
    return df_extended

#df_extended = extend_collection()

### Create very first dataframe to build upon

In [13]:
def create_base_dataframe():

    df_1 = pd.DataFrame(data={
        "song_name": song_names_list,
        "song_artist": song_artist_list,
    })

    df_2 = pd.DataFrame(song_audiofeatures_list)    
    df_2 = df_2[["id","danceability","energy","loudness","speechiness","acousticness",
               "instrumentalness","liveness","valence","tempo","duration_ms"]]

    df = pd.concat([df_1, df_2], axis=1)
    df.to_csv("Song_Collection_Extended.csv", index=False)
    print(f"Dataframe with {len(df)} entries was created")
    return df

playlist=get_playlist_tracks("dimoqxn", "4wNlX4GrLXSgnuGZpI5xVa")
song_ids_list = get_playlist_track_ids(playlist)
song_names_list = get_playlist_track_names(playlist)
song_artist_list = get_playlist_track_artists(playlist)
song_audiofeatures_list = get_playlist_track_audiofeatures(song_ids_list)
df = create_df_to_concat_to_base()
df_extended = extend_collection()

## PIPELINE - Looping through list of playlists

In [11]:
playlists_list = ["61hfbFZxRLYNWmqkfBSQw1", "4wNlX4GrLXSgnuGZpI5xVa"]

In [14]:
for idx, plist in enumerate(playlists_list):
    try:
        playlist=get_playlist_tracks("spotify", plist)
        song_ids_list = get_playlist_track_ids(playlist)
        song_names_list = get_playlist_track_names(playlist)
        song_artist_list = get_playlist_track_artists(playlist)
        song_audiofeatures_list = get_playlist_track_audiofeatures(song_ids_list)
        df = create_df_to_concat_to_base()
        df_extended = extend_collection()
    except:
        pass

Dataframe with 50 entries was created as `df_to_concat`
Extended base dataframe (length = 278) by 50 songs!
Dataframe with 100 entries was created as `df_to_concat`
Extended base dataframe (length = 328) by 100 songs!
Dataframe with 50 entries was created as `df_to_concat`
Extended base dataframe (length = 428) by 50 songs!
Dataframe with 50 entries was created as `df_to_concat`
Extended base dataframe (length = 478) by 50 songs!
Dataframe with 129 entries was created as `df_to_concat`
Extended base dataframe (length = 528) by 129 songs!
Dataframe with 60 entries was created as `df_to_concat`
Extended base dataframe (length = 657) by 60 songs!
Dataframe with 39 entries was created as `df_to_concat`
Extended base dataframe (length = 717) by 39 songs!
Dataframe with 90 entries was created as `df_to_concat`
Extended base dataframe (length = 756) by 90 songs!
Dataframe with 60 entries was created as `df_to_concat`
Extended base dataframe (length = 846) by 60 songs!
Dataframe with 180 entri