## Importing libraries

In [8]:
import json
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [9]:
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
    client_id='id',
    client_secret='secret'))

## Defining a function for analyzing playlists

In [10]:
def playlist_analyzer(user, playlist_id):    
    names = []
    uris = []
    artists = []
    popularity = []
    danceability = []
    energy = []
    key = []
    loudness = []
    mode = []
    speechiness = []
    acousticness = []
    instrumentalness =[]
    liveness=[]
    valence = []
    tempo =[]
    duration_ms =[]
    time_signature =[]
    playlist = spotify.user_playlist_tracks(user=user, playlist_id=playlist_id)
    tracks = playlist['items']
    
    while playlist['next']: # While statement to switch to next page of the playlist
        playlist = spotify.next(playlist)
        tracks.extend(playlist['items'])
   
    for item in tracks:
        names.append(item["track"]["name"])
        uri = item["track"]["uri"]
        uris.append(uri)
        popularity.append(item["track"]["popularity"])
        artists.append([artist["name"] for artist in item["track"]["artists"]])
        features = spotify.audio_features(uri)[0]
        danceability.append(features["danceability"])
        energy.append(features["energy"])
        key.append(features["key"])
        loudness.append(features["loudness"])
        mode.append(features["mode"])
        speechiness.append(features["speechiness"])
        acousticness.append(features["acousticness"])
        instrumentalness.append(features["instrumentalness"])
        liveness.append(features["liveness"])
        valence.append(features["valence"])
        tempo.append(features["tempo"])
        duration_ms.append(features["duration_ms"])
        time_signature.append(features["time_signature"])
    
    # Creating a df from with the song information
    df = pd.DataFrame({"names":names, 
                      "uri": uris,
                      "artists": artists,
                      "popularity": popularity,
                        "danceability":danceability,
                       "energy":energy,
                       "key": key,
                       "loudness":loudness,
                       "mode": mode,
                       "speechiness":speechiness,
                       "acousticness":acousticness,
                       "instrumentalness":instrumentalness,
                       "liveness": liveness,
                       "valence":valence,
                       "tempo": tempo,
                       "duration_ms":duration_ms,
                       "time_signature": time_signature})

    return df

### Getting data from Spotify playlist

In [11]:
playlist_df = playlist_analyzer("spotify", "spotify:playlist:6FKDzNYZ8IW1pvYVF4zUN2")
features_playlist = playlist_df.iloc[:,4:17].copy() #Creating a subset of the df with only the features

SpotifyOauthError: error: invalid_client, error_description: Invalid client

## Transforming data and creating clusters of the playlist dataframe

### StandardScaler transformation

In [7]:
# Initialize the model, fit and transform data
scaler = StandardScaler()
scaler.fit(features_playlist)
features_scaled = scaler.transform(features_playlist)
features_scaled_df = pd.DataFrame(features_scaled)

In [8]:
features_scaled_df.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.404241,1.165236,0.476421,0.980355,0.76539,0.209791,-0.739022,-0.513813,0.852844,-0.658068,-0.10266,-0.042016,0.150899
1,0.94661,0.593508,1.307724,0.062216,-1.306523,-0.043335,-0.642256,-0.522759,-0.807031,-0.972301,-0.240997,-0.083179,0.150899
2,0.688038,0.061552,-1.463286,-0.017824,-1.306523,-0.589928,0.176318,-0.522759,-0.460815,1.501788,0.082191,-0.294754,0.150899


### KMeans clustering

In [41]:
# Initializing the model, fit and predict clusters
kmeans = KMeans()
kmeans.fit(features_scaled)
clusters = kmeans.predict(features_scaled)
playlist_df["cluster"]=clusters

In [44]:
# Check the size of the clusters
pd.Series(clusters).value_counts().sort_index()

0     7718
1    12972
2     9893
3     5447
4     2411
5     8816
6     6582
7     3073
dtype: int64

## Exporting the dataframes to csv

In [None]:
features_playlist.to_csv("../Datasets/features_playlist.csv", index=False)
playlist_df.to_csv("../Datasets/playlist_df.csv", index=False)
features_scaled_df.to_csv("../Datasets/features_scales_df.csv", index=False)