# Data Extraction using Spotify Web API

### Playlist Links

https://open.spotify.com/playlist/37i9dQZF1DWZKuerrwoAGz - 150 songs

https://open.spotify.com/playlist/4WloBZWLuV80F07SCPxs09 - 80 songs

https://open.spotify.com/playlist/3Kz5KBE3Ksupz9odBGwze6 - 54 songs

## Imports

In [1]:
import pandas as pd
import spotipy
import spotipy.oauth2 as oauth2
from spotipy.oauth2 import SpotifyOAuth
from spotipy.oauth2 import SpotifyClientCredentials

## Credentials

In [2]:
with open("spotify_credentials.txt") as f:
    credentials = f.readlines()
    cid = credentials[0][:-1]
    secret = credentials[1]

In [3]:
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

## Functions to extract data

In [4]:
def get_track_ids(user, playlist_id):
    
    track_ids = []
    
    playlist = sp.user_playlist(user, playlist_id)
    
    for item in playlist['tracks']['items']:
        track = item['track']
        track_ids.append(track['id'])
        
    return track_ids

In [5]:
def get_track_features(track_ids):
    
    meta = sp.track(track_ids)
    features = sp.audio_features(track_ids)

    track_id = meta['id']
    track_name = meta['name']
    album_name = meta['album']['name']
    artist_name = meta['album']['artists'][0]['name']
    popularity = meta['popularity']
    
    danceability = features[0]['danceability']
    loudness = features[0]['loudness']
    speechiness = features[0]['speechiness']
    acousticness = features[0]['acousticness']
    liveness = features[0]['liveness']
    instrumentalness = features[0]['instrumentalness']
    energy = features[0]['energy']
    valence = features[0]['valence']
    key = features[0]['key']
    mode = features[0]['mode']
    tempo = features[0]['tempo']
    time_signature = features[0]['time_signature']
    length = features[0]['duration_ms']
    
    art_result = sp.search(artist_name)
    art_track = art_result['tracks']['items'][0]
    artist = sp.artist(art_track["artists"][0]["external_urls"]["spotify"])
    genres = artist["genres"]

    track_data = [track_id,track_name,
                  album_name,artist_name,genres,popularity,
                  danceability,loudness,speechiness,
                  acousticness,liveness,instrumentalness,
                  energy,valence,mode,
                  key,tempo,time_signature,length
                 ]
    
    return track_data

In [6]:
def get_data(user, playlist_id):
    
    track_ids = get_track_ids(user, playlist_id)

    track_list = []

    for i in range(len(track_ids)):
        track_data = get_track_features(track_ids[i])
        track_list.append(track_data)

        df = pd.DataFrame(track_list, columns = ['track_id','track_name',
                                                 'album_name','artist_name','genres','popularity',
                                                 'danceability','loudness','speechiness',
                                                 'acousticness','liveness','instrumentalness',
                                                 'energy','valence','mode',
                                                 'key','tempo','time_signature','length'
                                                ])

    return df

In [7]:
df1 = get_data('spotify', '37i9dQZF1DWZKuerrwoAGz')

In [8]:
df2 = get_data('spotify', '4WloBZWLuV80F07SCPxs09')

In [9]:
df3 = get_data('spotify', '3Kz5KBE3Ksupz9odBGwze6')

## Saving the data to a file

In [10]:
frames = [df1, df2, df3]

In [11]:
df = pd.concat(frames)

In [12]:
df.index = pd.RangeIndex(start=0, stop=len(df.index), step=1)

In [13]:
df.head()

Unnamed: 0,track_id,track_name,album_name,artist_name,genres,popularity,danceability,loudness,speechiness,acousticness,liveness,instrumentalness,energy,valence,mode,key,tempo,time_signature,length
0,0VjIjW4GlUZAMYd2vXMi3b,Blinding Lights,After Hours,The Weeknd,[rap],87,0.514,-5.934,0.0598,0.00146,0.0897,9.5e-05,0.73,0.334,1,1,171.005,4,200040
1,6UelLqGlWMcVH1E5c4H7lY,Watermelon Sugar,Fine Line,Harry Styles,[pop],85,0.548,-4.209,0.0465,0.122,0.335,0.0,0.816,0.557,1,0,95.39,4,174000
2,7ef4DlsgrMEH11cDZd32M6,One Kiss (with Dua Lipa),One Kiss (with Dua Lipa),Calvin Harris,"[barbadian pop, pop, urban contemporary]",84,0.791,-3.24,0.11,0.037,0.0814,2.2e-05,0.862,0.592,0,9,123.994,4,214847
3,3w3y8KPTfNeOKPiqUTakBh,Locked out of Heaven,Unorthodox Jukebox,Bruno Mars,"[dance pop, pop]",83,0.726,-4.165,0.0431,0.049,0.309,0.0,0.698,0.867,1,5,143.994,4,233478
4,7qiZfU4dY1lWllzX7mPBI3,Shape of You,÷ (Deluxe),Ed Sheeran,"[pop, uk pop]",83,0.825,-3.183,0.0802,0.581,0.0931,0.0,0.652,0.931,0,1,95.977,4,233713


In [14]:
df.shape

(234, 19)

In [15]:
df.to_csv('data/spotify_data/spotify.csv', sep=',', index=False)

`Created spotify tracks data`