# Installing Spotipy

In order to use the Spotify API (SpotiPy) we will have to create an account in Spotify and follow these steps. Once we have done it we will start initializing the API and look at the search method for which we can introduce a "query" q, in this example we will try it with Lady Gaga:

In [1]:
##!conda install -c conda-forge spotipy

## Loading credentials from another config file

In [1]:
import config

## Starting with Spotify API

In [27]:
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

In [7]:
#Initialize SpotiPy with user credentias
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [13]:
# get json of a specific playlist
username = 'maka_97'
playlist_id = '6mtYuOxzl58vSGnEDtZ9uB' # get from url of the playplist ('https://open.spotify.com/playlist/6mtYuOxzl58vSGnEDtZ9uB')

playlist = sp.user_playlist_tracks(username, playlist_id, market="GB")

In [15]:
# define a function to extract tracks of a playlist
def get_playlist_tracks(username, playlist_id):
    results = sp.user_playlist_tracks(username,playlist_id,market="GB")
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [16]:
# extract tracks from a specific playlist
tracks = get_playlist_tracks(username, playlist_id)

In [28]:
# extract audio features
list_of_audio_features=[]
for item in range(0,len(tracks)):
    # print (tracks[item]["track"]["id"])
    list_of_audio_features.append(sp.audio_features(tracks[item]["track"]["id"])[0])

In [37]:
sp.audio_features(tracks[item]["track"]["id"])[0]

{'danceability': 0.534,
 'energy': 0.87,
 'key': 11,
 'loudness': -3.078,
 'mode': 0,
 'speechiness': 0.0425,
 'acousticness': 0.000334,
 'instrumentalness': 0,
 'liveness': 0.241,
 'valence': 0.462,
 'tempo': 126.019,
 'type': 'audio_features',
 'id': '0JiY190vktuhSGN6aqJdrt',
 'uri': 'spotify:track:0JiY190vktuhSGN6aqJdrt',
 'track_href': 'https://api.spotify.com/v1/tracks/0JiY190vktuhSGN6aqJdrt',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0JiY190vktuhSGN6aqJdrt',
 'duration_ms': 215160,
 'time_signature': 4}

In [30]:
# create a dataframe for audio features
pop_feat_df = pd.DataFrame(list_of_audio_features)

# Select some features only

pop_df = pop_feat_df[["danceability","energy","loudness","speechiness","acousticness",
    "instrumentalness","liveness","valence","tempo","id","duration_ms"]]

pop_df

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms
0,0.623,0.734,-5.950,0.1070,0.016200,0.000002,0.1450,0.372,107.853,76FZM38RC8XaAjJ77CVTNe,244453
1,0.722,0.851,-3.873,0.0639,0.031900,0.000000,0.1080,0.452,126.885,5yDL13y5giogKs2fSNf7sj,197936
2,0.688,0.734,-4.569,0.0274,0.004620,0.000000,0.0756,0.460,140.004,2hns6Dv29Yrg68AVTJiAyA,208493
3,0.688,0.702,-4.792,0.0499,0.021500,0.000000,0.1280,0.740,94.006,3jomjC6H7YQBRr2CHPtc4y,192191
4,0.706,0.841,-3.956,0.0418,0.000079,0.000000,0.0688,0.861,132.032,0iGckQFyv6svOfAbAY9aWJ,220227
...,...,...,...,...,...,...,...,...,...,...,...
378,0.722,0.821,-3.856,0.1650,0.073000,0.000000,0.6310,0.721,127.944,2OsEJFTfzfjG4oC92cuP2c,210293
379,0.772,0.648,-6.222,0.0612,0.161000,0.000000,0.2690,0.741,129.966,0x4KuMldlnPMniT86Cmyep,225560
380,0.613,0.857,-5.492,0.0366,0.001200,0.000000,0.3580,0.664,124.987,56ZrTFkANjeAMiS14njg4E,182000
381,0.702,0.705,-6.666,0.0543,0.258000,0.000006,0.0879,0.743,154.911,5a7NdkF09AfD0H607eiOkX,213445


In [None]:
list_of_audio_features

In [46]:
# Check where to find artist names
pprint.pprint(tracks[0]['track']['artists'][0]['name'])

'Ariana Grande'


In [54]:
# define a function to get artists from tracks
def get_tracks_artists(username, playlist_id):
    tracks = get_playlist_tracks(username, playlist_id)
    artists = []
    for item in range(0, len(tracks)):
        artists.append(tracks[item]['track']['artists'][0]['name'])
    return artists

In [55]:
#get artists from tracks and include it to the dataframe
artists = get_playlist_tracks(username, playlist_id)

In [52]:
pop_df['artist'] = artists

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pop_df['artist'] = artists


In [63]:
# check where to find track names
pprint.pprint(tracks[0]['track']['name'])

'Into You'


In [66]:
# define a function to get track names
def get_tracks_names(username, playlist_id):
    tracks = get_playlist_tracks(username, playlist_id)
    track_names = []
    for item in range(0, len(tracks)):
        track_names.append(tracks[item]['track']['name'])
    return track_names

In [67]:
#get get track names and include it to the dataframe
track_names = get_tracks_names(username, playlist_id)
pop_df['name'] = track_names

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pop_df['name'] = track_names


In [71]:
# check where to get album names
pprint.pprint(tracks[0]['track']['album']['name'])

'Dangerous Woman'


In [72]:
# define a function to get track names
def get_tracks_albums(username, playlist_id):
    tracks = get_playlist_tracks(username, playlist_id)
    albums = []
    for item in range(0, len(tracks)):
        albums.append(tracks[item]['track']['album']['name'])
    return albums

In [73]:
#get get track names and include it to the dataframe
albums = get_tracks_albums(username, playlist_id)
pop_df['album'] = albums

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pop_df.loc[:, 'album'] = albums


In [75]:
# check where to get the popularity of tracks
pprint.pprint(tracks[0]['track']['popularity'])

74


In [76]:
# define a function to get track names
def get_tracks_popularity(username, playlist_id):
    tracks = get_playlist_tracks(username, playlist_id)
    popularity_li = []
    for item in range(0, len(tracks)):
        popularity_li.append(tracks[item]['track']['popularity'])
    return popularity_li

In [77]:
#get get track popularity rate and include it to the dataframe
popularity_li = get_tracks_popularity(username, playlist_id)
pop_df['popularity'] = popularity_li

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pop_df['popularity'] = popularity_li


In [79]:
pop_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 383 entries, 0 to 382
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   danceability      383 non-null    float64
 1   energy            383 non-null    float64
 2   loudness          383 non-null    float64
 3   speechiness       383 non-null    float64
 4   acousticness      383 non-null    float64
 5   instrumentalness  383 non-null    float64
 6   liveness          383 non-null    float64
 7   valence           383 non-null    float64
 8   tempo             383 non-null    float64
 9   id                383 non-null    object 
 10  duration_ms       383 non-null    int64  
 11  artist            383 non-null    object 
 12  name              383 non-null    object 
 13  album             383 non-null    object 
 14  popularity        383 non-null    int64  
dtypes: float64(9), int64(2), object(4)
memory usage: 45.0+ KB
