In [1]:
import json
import pprint
pp = pprint.PrettyPrinter(indent=4)

import pandas as pd

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials #To access authorised Spotify data

In [None]:
client_id= ""
client_secret= ""

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)

# spotify object to access API
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

try:
    user = sp.user('')
    print('Success!')
    pp.pprint(user)
except:
    print('Connection unsuccessfull')

# Simple Song

In [3]:
track_1 = pd.DataFrame(sp.audio_features('6L89mwZXSOwYl76YXfX13s')).T
track_1.columns = ['bask_case']
track_1.T

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
bask_case,0.442,0.943,3,-3.205,1,0.0602,0.00293,9e-06,0.091,0.781,85.064,audio_features,6L89mwZXSOwYl76YXfX13s,spotify:track:6L89mwZXSOwYl76YXfX13s,https://api.spotify.com/v1/tracks/6L89mwZXSOwY...,https://api.spotify.com/v1/audio-analysis/6L89...,181533,4


## Functions

In [4]:
def get_playlist(playlist_id):
    # Define Playlist
    dic = sp.playlist_tracks(playlist_id)

    # Set up empty listsa
    artists = []
    track_name = []
    track_id = []
    track_duration = []
    track_popularity = []

    # Parse to dictionary
    for track in dic['items']:
        artists.append(track['track']['artists'][0]['name'])
        track_name.append(track['track']['name'])
        track_id.append(track['track']['id'])
        track_duration.append(track['track']['duration_ms'])
        track_popularity.append(track['track']['popularity'])

    # Create pandas DataFrame
    playlist = {
      'track_name' : track_name,
      'artist' : artists,
      'track_id' : track_id,
      'track_duration' : track_duration,
      'track_popularity' : track_popularity
    }

    return pd.DataFrame(playlist)

In [5]:
def get_features(dataframe):
    # Define empty lists
    acousticness = []
    danceability = []
    energy = []
    instrumentalness = []
    liveness = []
    loudness = []
    speechiness = []
    tempo = []
    valence = []

    for index, row in dataframe.iterrows():

        # Get track id
        idx = row['track_id']

        # Get corresponding features
        features = sp.audio_features(str(idx))

        acousticness.append(features[0]['acousticness'])
        danceability.append(features[0]['danceability'])
        energy.append(features[0]['energy'])
        instrumentalness.append(features[0]['instrumentalness'])
        liveness.append(features[0]['liveness'])
        loudness.append(features[0]['loudness'])
        speechiness.append(features[0]['speechiness'])
        tempo.append(features[0]['tempo'])
        valence.append(features[0]['valence'])

    # Append the extracted infos to a new dataset

    output = dataframe.copy()

    output['acousticness'] = acousticness
    output['danceability'] = danceability
    output['energy'] = energy
    output['instrumentalness'] = instrumentalness
    output['liveness'] = liveness
    output['loudness'] = loudness
    output['speechiness'] = speechiness
    output['tempo'] = tempo
    output['valence'] = valence

    return output

### Recap

We created two functions `get_playlist` and `get_features` which allow us to extract all needed informations of a given spotify-playlist.

**Usage:**


1.   *get_playlist*:
      
      ```playlist = get_playlist('PLAYLIST_ID')```

2.   *get_features*:

      ```track_features = get_features(playlist)``` 



In [6]:
def get_track_informations(playlist_id):
    playlist = get_playlist(playlist_id)
    return get_features(playlist)

## Extract Data

As we have now functions which make it easy for us to exract features from as many playlists as we want, we will try to get a comparable dataset as we used for our own songs.

Our own dataset consits of:

* 311 songs in total
    * 37 Classic songs
    * 60 Hardstyle songs
    * 80 house songs
    * 134 punk songs
    
The goal is to create a dataset with a comparable distribution.

* Hardstyle - [Link](spotify:playlist:3bGSAHGYFEDxyEj7uXe0qq)  
**50 Songs**
* Punk - [Link](spotify:playlist:37i9dQZF1DXd6tJtr4qeot)  
**150 Songs**
* Classic - [Link](spotify:playlist:37i9dQZF1DWWEJlAGA9gs0)  
**30 songs**
* House - [Link](spotify:playlist:2otQLmbi8QWHjDfq3eL0DC)  
**60 songs**

In [7]:
hardstyle = get_track_informations('3bGSAHGYFEDxyEj7uXe0qq')

In [8]:
hardstyle.head(2)

Unnamed: 0,track_name,artist,track_id,track_duration,track_popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence
0,My Beautiful Fantasy,Phuture Noize,1a3M9MU7mmhQerLzHSGD3w,192014,60,0.132,0.319,0.865,0.0,0.152,-2.528,0.0415,155.174,0.0845
1,Feel It!,D-Block & S-te-Fan,01xdiGuSuIf5qZulm1uqkY,236903,63,0.0191,0.459,0.941,0.0,0.371,-2.053,0.0777,155.023,0.399


In [12]:
punk = get_track_informations('')

In [None]:
classic = get_track_informations('')

In [21]:
house = get_track_informations('')

In [25]:
hardstyle['genre'] = 0
classic['genre'] = 1
punk['genre'] = 2
house['genre'] = 3

In [26]:
dfs = [hardstyle, classic, punk, house]

In [29]:
features = pd.concat(dfs, axis=0)

In [None]:
features.head()

In [31]:
features.to_csv('./data/spotify_features.csv')