
Vamos a conectarnos a un API externo para procesar datos en vivo de Spotify, usaremos *pandas* para serializar los datos y luego vamos a exportarlos a Excel



**Crea tu API**
1. Registrate en Spotify para obtener tus credenciales de API 
https://developer.spotify.com/dashboard/
2. Create an APP y asignale un nombre
3. Copia tus credenciales de spotify el client_ID y el SECRET





In [None]:
# Instala SPOTIPY
# pip es una herramienta que importa librerias externas que no tengas instaladas
! pip install spotipy

In [None]:
# Vamos a utilizar todas estas librerias
import spotipy
spotify = spotipy.Spotify()
import sys
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials
from bs4 import BeautifulSoup
import requests
import lxml
import json

# Librerías para visualización
import seaborn as sns

import matplotlib
import matplotlib.pyplot as plt

In [None]:
# Almacena en estas variables tus credenciales de spotify
client_id = ""
client_secret = ""

In [None]:
def get_spotify_credentials():
    # Esta funcion conecta spotify con tus credenciales
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    sp.trace = True
    return sp

In [None]:
def get_spotify_data(artist_name):

    # obtner la autorizaciones con espotify
    sp = get_spotify_credentials()

    # Busquemos el artista
    results = sp.search(q='artist:' + artist_name, type='artist')
    items = results['artists']['items']
    if len(items) > 0:
        artist = items[0]

    uri = artist['uri']

    # Obtengamos los albums
    results = sp.artist_albums(uri, album_type='album')
    albums = results['items']

    while results['next']:
        results = sp.next(results)
        albums.extend(results['items'])

    uris = []
    track_names = []
    album_names = []

    # Saquemos info de los tracks de los albums
    for album in albums:
        for t in sp.album(album['uri'])['tracks']['items']:
            uris.append(t['uri'])
            track_names.append(t['name'])
            album_names.append(album['name'])
    features = []
    for i in range(len(uris)// 100 + 1):
        fs = sp.audio_features(uris[i*100:min((i+1)*100, len(uris))])
        if fs[0] is not None:
            features.extend(fs)
    print(features)

    # Convirtamos esto a un DataFrame de Pandas!
    dat = pd.DataFrame(features)
    dat['track_name'] = track_names
    dat['album'] = album_names
    dat['artists'] = artist_name


    dat.set_index('track_name', inplace=True)
    dat.drop_duplicates(inplace=True)
    dat = dat[~dat.index.duplicated(keep='first')]

    return dat

In [None]:
# Busquemos canciones de algun artista y guardemoslo en una variable
canciones_de_artista = get_spotify_data('Carlos Vives')

In [None]:
# Veamos las columnas que tenemos!
print(canciones_de_artista.shape[0], "canciones")
print(canciones_de_artista.columns)


In [None]:
canciones_de_artista.tail(2)

In [None]:
# Exportemos el dataframe a Excel
canciones_de_artista.to_excel(r'canciones.xlsx', sheet_name='canciones', index = False)

# Canciones en una playlist

**Awsome!!!**

Como era de esperar, podemos ver que para cada pista hemos obtenido las propiedades acústicas deseadas (junto con alguna información sobre su ubicación en la base de datos de Spotify). 

- Ahora ve a Spotify y averigua cual es tu usuario
https://www.spotify.com/us/account/overview/



In [None]:
mi_username = "1226425134" # TIP: Prueba el usuario "spotify" tiene listas increibles!!

**Encuentra el ID del playlist que quieres analizar entre todos tus playlists**
La siguiente función imprime tus playlists y los id de cada lista, copia en el portapapeles el ID de la playlist que quieres analizar

In [None]:
def get_user_playlist(username=mi_username, sp=get_spotify_credentials()):
    playlists = sp.user_playlists(username) # Obtener todos los playlist
    while playlists['next']:
        for playlist in playlists['items']:
            print("ID: {} \t Canciones {}\t  Nombre: {} ".
                  format(playlist['id'], playlist['tracks']['total'], playlist['name']))
        playlists = sp.next(playlists)
get_user_playlist()

In [None]:
# También podemos definir una función que obtenga los datos de la lista de reproducción de un usuario.
def get_spotify_playlist_data(username, playlist_id):

    track_number_limit = 1000     # Pongamos un limite de tracks a Analizar

    # De nuevo hagamos login en spotify
    # Ven lo util de hacer funciones, no tengo que volverlo a escribir todo!!!!
    sp = get_spotify_credentials()

    results = sp.user_playlist(username, playlist_id, fields="tracks,next")['tracks']
    
    # Mira los resultados que vienen del API
    # print(json.dumps(results, indent=4)) # Comenta esta linea despues

    tracks = results['items']
    while results['next'] and len(tracks) < track_number_limit:
        results = sp.next(results)
        if results['items'][0] is not None:
            tracks.extend(results['items'])

    ts = []
    track_names = []

    for t in tracks:
        track = t['track']
        track['album'] = track['album']['name']
        track_names.append(t['track']['name'])
        artists = []
        for a in track['artists']:
            artists.append(a['name'])
        track['artists'] = ', '.join(artists)
        ts.append(track)

    uris = []
    dat = pd.DataFrame(ts)

    dat.drop(['available_markets', 'disc_number', 'external_ids', 'external_urls'], axis=1, inplace=True)

    features = []

    for i in range(len(dat)// 100 + 1):
        fs = sp.audio_features(dat.uri.iloc[i*100:min((i+1)*100, len(dat))])
        if fs[0] is not None:
            features.extend(fs)

    fs = pd.DataFrame(features)

    dat = pd.concat([dat, fs], axis=1)
    dat['track_name'] = track_names


    dat.set_index('track_name', inplace=True)
    dat = dat[~dat.index.duplicated(keep='first')]
    dat = dat.T[~dat.T.index.duplicated(keep='first')].T

    return dat

In [None]:
miplaylist = get_spotify_playlist_data(username=mi_username, playlist_id="2bjTzirqqt8U634c6CXMqY")
# Mira el formato en que viene la información del API, esto es un formato JSON, es una estructura pareccida a un diccionario.

In [None]:
miplaylist

In [None]:
miplaylist.columns

**Pasemos esto a Excel y luego a Tableau y hagamos hagamos un Dashboard de nuestro Playlist**

In [None]:
miplaylist.to_excel(r'miplaylist.xlsx', sheet_name='miplaylist', index = False)

## You can find below the explanation of each feature ( past/copy from the Spotify website).

**Acousticness:** A confidence measure from 0.0 to 1.0 of whether the track is 
acoustic. 1.0 represents high confidence the track is acoustic.
Danceability: Danceability describes how suitable a track is for dancing based on a combination of musical elements including tempo, rhythm stability, beat strength, and overall regularity. A value of 0.0 is least danceable and 1.0 is most danceable.

**Energy:** Energy is a measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy, while a Bach prelude scores low on the scale. Perceptual features contributing to this attribute include dynamic range, perceived loudness, timbre, onset rate, and general entropy.

**Instrumentalness:** Predicts whether a track contains no vocals. “Ooh” and “aah” sounds are treated as instrumental in this context. Rap or spoken word tracks are clearly “vocal”. The closer the instrumentalness value is to 1.0, the greater likelihood the track contains no vocal content. Values above 0.5 are intended to represent instrumental tracks, but confidence is higher as the value approaches 1.0.

**Liveness:** Detects the presence of an audience in the recording. Higher liveness values represent an increased probability that the track was performed live. A value above 0.8 provides a strong likelihood that the track is live.
Loudness: the overall loudness of a track in decibels (dB). Loudness values are averaged across the entire track and are useful for comparing relative loudness of tracks. Loudness is the quality of a sound that is the primary psychological correlate of physical strength (amplitude). Values typical range between -60 and 0 db.

**Speechiness:** Speechiness detects the presence of spoken words in a track. The more exclusively speech-like the recording (e.g. talk show, audiobook, poetry), the closer to 1.0 the attribute value. Values above 0.66 describe tracks that are probably made entirely of spoken words. Values between 0.33 and 0.66 describe tracks that may contain both music and speech, either in sections or layered, including such cases as rap music. Values below 0.33 most likely represent music and other non-speech-like tracks.

**Valence:** A measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track. Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g. sad, depressed, angry).

**Tempo:** The overall estimated tempo of a track in beats per minute (BPM). In musical terminology, tempo is the speed or pace of a given piece and derives directly from the average beat duration.

In [None]:
# Revisemos los tipos de datos que tenemos
miplaylist.dtypes

In [None]:
# Convierte en decimal las columnas que vamos a procesar

miplaylist['duration_ms'] = miplaylist['duration_ms'].astype(float)
miplaylist['popularity'] = miplaylist['popularity'].astype(float)
miplaylist['danceability'] = miplaylist['danceability'].astype(float)
miplaylist['energy'] = miplaylist['energy'].astype(float)
miplaylist['loudness'] = miplaylist['loudness'].astype(float)
miplaylist['mode'] = miplaylist['mode'].astype(float)
miplaylist['speechiness'] = miplaylist['speechiness'].astype(float)
miplaylist['acousticness'] = miplaylist['acousticness'].astype(float)
miplaylist['instrumentalness'] = miplaylist['instrumentalness'].astype(float)
miplaylist['valence'] = miplaylist['valence'].astype(float)
miplaylist['tempo'] = miplaylist['tempo'].astype(float)

In [None]:
# Podemos hacer una lista de todas las columnas que queremos obtener sumatorias
columnas = ['valence', 'speechiness', 'instrumentalness', 'energy', 'danceability', 'acousticness']

# Calculamos las medianas de todas las columnas
miplaylist_mean_series = miplaylist[columnas].mean()
miplaylist_mean_series

In [None]:
# Hay diferentes temas de graficos https://matplotlib.org/3.5.1/gallery/style_sheets/style_sheets_reference.html
matplotlib.style.use('fast') 

In [None]:

miplaylist_mean_series.plot(kind='bar')

In [None]:
# Convertimos la serie en dataframe
mean_dataframe = miplaylist_mean_series.to_frame()
mean_dataframe

In [None]:
mean_dataframe = miplaylist_mean_series.to_frame()

# Transponemos los valores, columnas en filas
mean_dataframe = mean_dataframe.transpose()
# Añadimos la columna playlist para poder comparar
mean_dataframe['playlist'] = "TumpaTumpa"

mean_dataframe

In [None]:
# Veamos el histograma de dos valores

miplaylist.hist(column='popularity', bins=12, grid=True, figsize=(6,4), color='#1DB954', zorder=2, rwidth=0.9)
miplaylist.hist(column='danceability', bins=12, grid=True, figsize=(6,4), color='red', zorder=2, rwidth=0.9)


In [None]:
# Usemos seaborn para combinar varios histogramas

sns.distplot(miplaylist['valence'],  kde=True, label='Valence')
sns.distplot(miplaylist['danceability'], kde=True, label='Danceability')
sns.distplot(miplaylist['energy'], kde=True, label='Energy')


plt.ylabel("Frecuencia Relativa")
plt.legend()

plt.show()

In [None]:
# Un scatter plot necesita dos variables para comparar.

miplaylist.plot.scatter(  x='valence',
                          y='danceability',
                          figsize=(4, 4),
                      )

In [None]:
# Un scatter puede tener una tercera variable
miplaylist.plot.scatter(  x='valence',
                                y='danceability',
                                c='popularity',
                                colormap='viridis',
                                figsize=(5, 5),
                              )


In [None]:
miplaylist.boxplot(column=['valence', 'danceability', 'energy', 'speechiness', 'acousticness', ],figsize=(15,3));


In [None]:
# Hagamos un dataframe con otro playlist para comparar

otroplaylist = get_spotify_playlist_data(username=mi_username, playlist_id="6KtVJuDqXNFXEj5fvqRy3J")
otroplaylist.head(4)

In [None]:
# Tambien podemos cambiar el tipo de datos de varias columnas con un diccionario y el metodo astype()

columnas_tipos = {
  'duration_ms':float,
  'popularity':float,
  'danceability':float,
  'energy':float,
  'loudness':float,
  'mode':float,
  'speechiness':float,
  'acousticness':float,
  'instrumentalness':float,
  'valence':float,
  'tempo':float
}

otroplaylist = otroplaylist.astype(columnas_tipos)

In [None]:
# Repetimos sacar las medias de las columnas del anterior
# columnas = ['valence', 'speechiness', 'instrumentalness', 'energy', 'danceability', 'acousticness']

# En una sola linea puedo calcular las medias de la lista de columnas, convertir en frame y transponer.
mean_otroplaylist_dataframe = otroplaylist[columnas].mean().to_frame().transpose()

# Añadimos la columna playlist para poder comparar
mean_otroplaylist_dataframe['playlist'] = "Boleros"

mean_otroplaylist_dataframe

In [None]:
# Recordemos el anterior dataframe de medias
mean_dataframe

In [None]:
# Unimos los dataframes

In [None]:
all_means = pd.concat([mean_dataframe,mean_otroplaylist_dataframe])
all_means

In [None]:
# el indice de las filas deberia ser el playlist
all_means = all_means.set_index('playlist')

In [None]:
all_means.plot(kind='bar')
plt.legend(loc='upper left', ncol=1, title="Caracteristica", bbox_to_anchor=(1.0, 0.5))
plt.title("Medias de las caracteristicas musicales")
plt.xlabel("Playlist")
plt.ylabel("Media")
plt.plot()

In [None]:
all_means

In [None]:
all_means.transpose().plot(kind='bar')
plt.legend(loc='upper left', ncol=1, title="Caracteristica", bbox_to_anchor=(1.0, 0.5))
plt.plot()

In [None]:
sns.distplot(miplaylist['danceability'],  kde=True, label='TumpaTumpa')
sns.distplot(otroplaylist['danceability'], kde=True, label='Sad Boleros')


plt.ylabel("Frecuencia Relativa")
plt.legend()

plt.show()

In [None]:
sns.distplot(miplaylist['energy'],  kde=True, label='TumpaTumpa')
sns.distplot(otroplaylist['energy'], kde=True, label='Bolero')


plt.ylabel("Frecuencia Relativa")
plt.legend()

plt.show()

# Ejercicio
Unifica todos los **playlists** en un unico playlit llamado **allplaylist** con una nueva columna que tenga el nombre del playlist

In [None]:
allplaylist =

In [None]:

sns.boxplot(y=allplaylist['valence'], x=allplaylist['playlist']) 

plt.show()

In [None]:
ax = sns.boxplot(data=allplaylist, x=allplaylist['popularity'], orient="h", y="playlist")

In [None]:
allplaylist.boxplot(column=['popularity', ], by='playlist', figsize=(10,6),  fontsize=12);
plt.show()

In [None]:

allplaylist.boxplot(column=['energy', 'acousticness','valence', 'danceability' ], by='playlist', figsize=(30,9) );
plt.show()

In [None]:
# Un scatter puede tener una tercera variable
plt.figure(figsize=(10,10), dpi=120)

allplaylist.plot.scatter(  x='valence',
                          y='danceability',
                         c='popularity',
                         by='playlist',
                                colormap='viridis',
                        
                              )

In [None]:
plt.figure(figsize=(10,10), dpi=120)

sns.scatterplot(x="energy", y="popularity", data=allplaylist, hue="playlist", style="playlist")
