
Vamos a conectarnos a un API externo para procesar datos en vivo de Spotify, usaremos *pandas* para serializar los datos y luego vamos a exportarlos a Excel



**Crea tu API**
1. Registrate en Spotify para obtener tus credenciales de API 
https://developer.spotify.com/dashboard/
2. Create an APP y asignale un nombre
3. Copia tus credenciales de spotify el client_ID y el SECRET






In [None]:
# Instala SPOTIPY
# pip es una herramienta que importa librerias externas que no tengas instaladas
! pip install spotipy

In [None]:
# Vamos a utilizar todas estas librerias
import spotipy
spotify = spotipy.Spotify()
import sys
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials
from bs4 import BeautifulSoup
import requests
import lxml

In [None]:
# Almacena en estas variables tus credenciales de spotify
client_id = "xxxxxxx"
client_secret = "xxxxxx"

In [None]:
def get_spotify_credentials():
    # Esta funcion conecta spotify con tus credenciales
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    sp.trace = True
    return sp

In [None]:
def get_spotify_data(artist_name):

    # obtner la autorizaciones con espotify
    sp = get_spotify_credentials()

    # Busquemos el artista
    results = sp.search(q='artist:' + artist_name, type='artist')
    items = results['artists']['items']
    if len(items) > 0:
        artist = items[0]

    uri = artist['uri']

    # Obtengamos los albums
    results = sp.artist_albums(uri, album_type='album')
    albums = results['items']

    while results['next']:
        results = sp.next(results)
        albums.extend(results['items'])

    uris = []
    track_names = []
    album_names = []

    # Saquemos info de los tracks de los albums
    for album in albums:
        for t in sp.album(album['uri'])['tracks']['items']:
            uris.append(t['uri'])
            track_names.append(t['name'])
            album_names.append(album['name'])
    features = []
    for i in range(len(uris)// 100 + 1):
        fs = sp.audio_features(uris[i*100:min((i+1)*100, len(uris))])
        if fs[0] is not None:
            features.extend(fs)

    # Convirtamos esto a un DataFrame de Pandas!
    dat = pd.DataFrame(features)
    dat['track_name'] = track_names
    dat['album'] = album_names
    dat['artists'] = artist_name

    # Ignora las canciones en vivo
    mask = [('live' not in s.lower() and 'deluxe' not in s.lower()
             and 'remix' not in s.lower() and 'rmx' not in s.lower()
            and 'remastered' not in s.lower()) for s in dat.album.values]
    dat = dat[mask]
    mask2 = [(('remix' not in s.lower()) and
              'remastered' not in s.lower() and 'live' not in s.lower()
             and 'version' not in s.lower()) for s in dat.track_name.values]
    dat = dat[mask2]

    dat.set_index('track_name', inplace=True)
    dat.drop_duplicates(inplace=True)
    dat = dat[~dat.index.duplicated(keep='first')]

    return dat

In [None]:
# Busquemos canciones de algun artista y guardemoslo en una variable
canciones_de_fonseca = get_spotify_data('Fonseca')

In [None]:
# La variable Carlos Vives tiene todas las canciones
canciones_de_fonseca.head()

In [None]:
# Exportemos las canciones de Fonseca a Excel
canciones_de_fonseca.to_excel(r'canciones_de_fonseca.xlsx', sheet_name='canciones', index = False)

In [None]:
# Veamos las columnas que tenemos
canciones_de_drake.columns

**Awsome!!!**

Como era de esperar, podemos ver que para cada pista hemos obtenido las propiedades acústicas deseadas (junto con alguna información sobre su ubicación en la base de datos de Spotify). 

- Ahora ve a Spotify y averigua cual es tu usuario
https://www.spotify.com/us/account/overview/



In [None]:
mi_username = "1226425134"

In [None]:
# También podemos definir una función que obtenga los datos de la lista de reproducción de un usuario.

def get_spotify_playlist_data(username='spotify', playlist=None, credentials_file=None):

    # Pongamos un limite de tracks a Analizar
    track_number_limit = 1000

    # De nuevo hagamos login en spotify
    # Ven lo util de hacer funciones, no tengo que volverlo a escribir todo!!!!
    sp = get_spotify_credentials()

    # Saquemos todas las playlists de este usuario
    p = None
    results = sp.user_playlists(username)
    playlists = results['items']

    if playlist is None: # Quedemonos con la primera en caso que no haya más
        playlist = playlists[0]['name']

    for pl in playlists:
        if pl['name'] is not None and pl['name'].lower() == playlist.lower():
            p = pl
            break
    while results['next'] and p is None:
        results = sp.next(results)
        playlists = results['items']
        for pl in playlists:
            if pl['name'] is not None and pl['name'].lower() == playlist.lower():
                p = pl
                break

    if p is None:
        print('No hay plalists')
        return

    results = sp.user_playlist(p['owner']['id'], p['id'], fields="tracks,next")['tracks']
    tracks = results['items']
    while results['next'] and len(tracks) < track_number_limit:
        results = sp.next(results)
        if results['items'][0] is not None:
            tracks.extend(results['items'])

    ts = []
    track_names = []

    for t in tracks:
        track = t['track']
        track['album'] = track['album']['name']
        track_names.append(t['track']['name'])
        artists = []
        for a in track['artists']:
            artists.append(a['name'])
        track['artists'] = ', '.join(artists)
        ts.append(track)

    uris = []
    dat = pd.DataFrame(ts)

    dat.drop(['available_markets', 'disc_number', 'external_ids', 'external_urls'], axis=1, inplace=True)

    features = []

    # loop to take advantage of spotify being able to get data for 100 tracks at once
    for i in range(len(dat)// 100 + 1):
        fs = sp.audio_features(dat.uri.iloc[i*100:min((i+1)*100, len(dat))])
        if fs[0] is not None:
            features.extend(fs)

    fs = pd.DataFrame(features)

    dat = pd.concat([dat, fs], axis=1)
    dat['track_name'] = track_names

    # ignore live, remix and deluxe album versions
    mask = [(('live' not in s.lower()) and ('deluxe' not in s.lower())
             and ('remix' not in s.lower())) for s in dat.album.values]
    dat = dat[mask]
    mask2 = [(('remix' not in s.lower()) and
              'remastered' not in s.lower()
             and 'version' not in s.lower()) for s in dat.track_name.values]
    dat = dat[mask2]

    dat.set_index('track_name', inplace=True)
    dat = dat[~dat.index.duplicated(keep='first')]
    dat = dat.T[~dat.T.index.duplicated(keep='first')].T

    return dat

In [None]:
miplaylist = get_spotify_playlist_data(username=mi_username, playlist="favoritos2020")

In [None]:
miplaylist.head()

In [None]:
miplaylist.columns

In [None]:
# Ahora que tal si buscamos las canciones más deprimentes
miplaylist.sort_values(by='danceability', ascending=True).head(10)

**Pasemos esto a Excel y luego a Tableau y hagamos hagamos un Dashboard de nuestro Playlist**

In [None]:
miplaylist.to_excel(r'miplaylist.xlsx', sheet_name='miplaylist', index = False)