# Analisis de exitos nuevos y exitos historicos
### Canciones numero uno de Billboard comparados y éxitos actuales de spotify

## Importamos librerías

In [1]:
import requests
import base64
import re
from urllib.parse import urlencode
import pandas as pd
from bs4 import BeautifulSoup
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import numpy as np

# Definimos todas nuestras funciones

In [2]:
#Extraemos credenciales
client_ID = open('../../spoti_client.txt','r').readline()

client_secret = open('../../spoti_secret.txt','r').readline()

In [3]:
#Funcion que da las autorizaciones de la API y el wrapper
#Se puede correr cada hora
#Regresa un 'headers' que debemos incluir en cada consulta a la API, el wrapper se queda conectado
def spoti_authorization(client_ID,client_secret):
    client_creds = f"{client_ID}:{client_secret}"    
    client_creds_base64 = base64.b64encode(client_creds.encode())
    token_url = 'https://accounts.spotify.com/api/token?grant_type=client_credentials'
    token_data = {'grant_type':'client_credentials'}
    token_headers = {'Authorization':f'Basic {client_creds_base64.decode()}'}
    r = requests.post(token_url, data=token_data, headers=token_headers)
    data=r.json()
    auth = {'authorization':f'Bearer {data["access_token"]}'}
    return(auth)

In [4]:
#Creamos una funcion para la autorizacion del wrapper
def wrap_authorization(client_ID,client_secret):    
    client_credentials_manager = SpotifyClientCredentials(client_id=client_ID, client_secret=client_secret)
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    return(sp)

In [5]:
#Defino la funcion que scrappea mi sitio de Billboard
def billboard_scrap ():
    songs_list = []
    rank=51
    for i in range(6,11):
        url_billb = f'https://stacker.com/stories/3384/songs-dominated-billboard-charts-longest?page={i}'
        billboard = requests.get(url_billb).content
        music_soup = BeautifulSoup(billboard,'html5')
        songs=music_soup.select('div[class="slideshow-slide__title"]>h2')
        songs_text = [song.text for song in songs]
        for  song in songs_text:
            rank=rank-1
            song=re.sub(r'\#\d*. ' ,'',song)
            song=re.sub(r'\'','',song)
            templist=[rank, song.split(' by ')[0], song.split('by ')[1]]
            songs_list.append(templist)
            songs_list.sort()
    billboard_scrapped=pd.DataFrame(list(songs_list),columns=['rank','title','artist'])
    return(billboard_scrapped)

In [6]:
#Creamos una funcion que busque canciones de billboard en spotify
def search_billboard_songs(billboard_df,sp):
    df_list = []
    for i in billboard_df['title'].index:
        busca=sp.search(billboard_df['title'][i]+' '+(billboard_df['artist'][i].split(' ',)[0]))
        df_list.append(pd.json_normalize(busca['tracks']['items'][0]))
    billboard_songs = pd.concat(df_list).set_index('id')[['name', 'artists', 'popularity']]
    return(billboard_songs)

In [7]:
#Creo una funcion para traer las caracteristicas de las canciones (Usare este proceso dos. veces)
#Busca por el id de Spotify
def get_audio_features (songs,headers):
    songs_features={}
    for track in songs:
        busca_audio_analysis = requests.get(f'https://api.spotify.com/v1/audio-features/{track}', headers=headers)
        songs_features[track] = busca_audio_analysis.json()
    audio_features = pd.DataFrame(songs_features).T.set_index('id')
    return audio_features

In [8]:
#Creo una funcion que une mi df d canciones y de features
def join_songs_features(df_songs,df_features,chart_type):
    songsandfeat = df_songs.join(df_features)
    songsandfeat['Chart_Type']=chart_type
    return(songsandfeat)

In [9]:
#Regresa un df de las canciones en la lista de top 50 global en spotify
def busca_top50spotify(auth):
    query = urlencode({'q':'top 50 global','type':'playlist','limit':5})
    endpoint = "https://api.spotify.com/v1/"
    lookup = f'{endpoint}search?{query}'
    busca = requests.get(lookup, headers=auth)
    prueba_listas = pd.json_normalize(busca.json()['playlists']['items'])
    prueba_listas2 = prueba_listas.loc[prueba_listas['owner.id']=='spotifycharts']
    top50spotify = prueba_listas2.iloc[0]
    busca_playlist = requests.get(f'https://api.spotify.com/v1/playlists/{top50spotify["id"]}/tracks', headers=auth)
    playlist_tracks = pd.json_normalize(busca_playlist.json()['items']).sort_values(['track.popularity'],ascending=False)
    spotify_songs = playlist_tracks[['track.id', 'track.name', 'track.album.artists','track.popularity']].set_index('track.id')
    return(spotify_songs)

# Funciones de limpieza

In [10]:
#Esta funcion elimina las columnas no deseadas y asigna los nombres que queremos
def columns_clean(df):
    drop_cols = ['type', 'uri','track_href', 'analysis_url', 'time_signature']
    df.drop(drop_cols,axis=1, inplace=True)
    columns = ['Track_Name', 'Artists', 'Popularity', 'Danceability',
       'Energy', 'Key', 'Loudness', 'Mode', 'Speechiness', 'Acousticness',
       'Instrumentalness', 'Liveness', 'Valence', 'Tempo', 'Duration', 'Chart_Type']
    df.columns=columns
    return(df)

In [11]:
 #Definimos una funcion para extraer los artistas de la lista de diccionarios en esta columna
def artist_extract(list_dict):
    return(' - '.join([artist['name'] for artist in list_dict]))

In [12]:
#Definimos una funcion para cambiar los campos q necesitamos a numerico para poder hacer analisis 
def data_to_numeric (df):
    numeric_cols = ['Danceability',
           'Energy', 'Key', 'Loudness', 'Mode', 'Speechiness', 'Acousticness',
           'Instrumentalness', 'Liveness', 'Valence', 'Tempo', 'Duration']
    for col in numeric_cols:
        df[col]=df[col].astype('float64')
        return(df) 

In [13]:
#Definimos una funcion para convertir ms a segundos
def seconds_convertion (df):
    df['Duration']=df['Duration']/1000

In [14]:
#Creamos una sola funcion q haga toda la limpieza
def data_cleansing(df):
    df=columns_clean(df)
    df = data_to_numeric(df)
    df['Artists']=df['Artists'].apply(artist_extract)
    df['Duration']=df['Duration']/1000
    return(df)

## Creamos un pipeline q corra todas las funciones para el proceso de Billboard

In [15]:
#Creamos una funcion que haga todo el pipeline para el chart d Billboard 
#Esta funcion regresara un DF que ademas guardara como CSV
def billboard_pipeline(client_ID,client_secret):
    #Obtiene autorizaciones de la API
    auth=spoti_authorization(client_ID,client_secret)
    #Obtiene autorizaciones del wrapper
    sp=wrap_authorization(client_ID,client_secret)
    #Obtiene el listado de Billboard
    billboard_scrapped = billboard_scrap()
    #Busca las canciones de Billboard en Spotify
    billboard_songs = search_billboard_songs(billboard_scrapped,sp)
    #Trae las caracteristicas de las canciones 
    billboard_feat = get_audio_features(billboard_songs.index,auth)
    #Une el DF d canciones con el de featuress
    billboard_df = join_songs_features(billboard_songs,billboard_feat,'Billboard_chart')
    #Limpiamos DF
    billboard_clean=data_cleansing(billboard_df)
    #Guardamos en CSV
    billboard_clean.to_csv('Billboard_output')
    #Regresamos el DF para cualquier consulta
    return(billboard_clean)

In [16]:
#Creamos una funcion que haga todo el pipeline para el top 50 d spotify 
#Esta funcion regresara un DF que ademas guardara como CSV
def spotify_pipeline(client_ID,client_secret):
    #Obtiene autorizaciones de la API
    auth=spoti_authorization(client_ID,client_secret)
    #Busca las canciones del top 50 de Spotify
    spotify_songs = busca_top50spotify(auth)
    #Trae las caracteristicas de las canciones 
    spotify_feat = get_audio_features(spotify_songs.index,auth)
    #Une el DF d canciones con el de featuress
    spotify_df = join_songs_features(spotify_songs,spotify_feat,'Spotify_chart')
    #Limpiamos DF    
    spotify_clean=data_cleansing(spotify_df)
    #Guardamos en CSV
    spotify_clean.to_csv('Spotify_output')
    #Regresamos el DF para cualquier consulta
    return(spotify_clean)

In [17]:
#Definimos una funcion que nos traiga toda la data

def get_data(client_ID,client_secret):

    spotify_df = spotify_pipeline(client_ID,client_secret)
    
    billboard_df = billboard_pipeline(client_ID,client_secret)
    
    return(spotify_df,billboard_df)

In [18]:
#Corremos funcion
df_list = get_data(client_ID,client_secret)

In [19]:
#Asignamos los df para revisarlos
spotify_df = df_list[0]

billboard_df = df_list[1]

In [20]:
spotify_df.head()

Unnamed: 0_level_0,Track_Name,Artists,Popularity,Danceability,Energy,Key,Loudness,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Tempo,Duration,Chart_Type
track.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
3tjFYV6RSFtuktYl3ZtYcq,Mood (feat. iann dior),24kGoldn - iann dior,100,0.7,0.722,7,-3.558,0,0.0369,0.221,0.0,0.272,0.756,90.989,140.526,Spotify_chart
47EiUVwUp4C9fGccaPuUCS,Dakiti,Bad Bunny - Jhay Cortez,99,0.731,0.573,4,-10.059,0,0.0544,0.401,5.22e-05,0.113,0.145,109.928,205.09,Spotify_chart
0t1kP63rueHleOhQkYSXFY,Dynamite,BTS,97,0.746,0.765,6,-4.41,0,0.0993,0.0112,0.0,0.0936,0.737,114.044,199.054,Spotify_chart
0VjIjW4GlUZAMYd2vXMi3b,Blinding Lights,The Weeknd,96,0.514,0.73,1,-5.934,1,0.0598,0.00146,9.54e-05,0.0897,0.334,171.005,200.04,Spotify_chart
4Oun2ylbjFKMPTiaSbbCih,WAP (feat. Megan Thee Stallion),Cardi B - Megan Thee Stallion,96,0.935,0.454,1,-7.509,1,0.375,0.0194,0.0,0.0824,0.357,133.073,187.541,Spotify_chart


In [21]:
billboard_df.head()

Unnamed: 0_level_0,Track_Name,Artists,Popularity,Danceability,Energy,Key,Loudness,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Tempo,Duration,Chart_Type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2YpeDb67231RjR0MgVLzsG,Old Town Road - Remix,Lil Nas X - Billy Ray Cyrus,83,0.878,0.619,6,-5.56,1,0.102,0.0533,0.0,0.113,0.639,136.041,157.067,Billboard_chart
6habFhsOp2NvshLv26DqMb,Despacito,Luis Fonsi - Daddy Yankee,79,0.655,0.797,2,-4.787,1,0.153,0.198,0.0,0.067,0.839,177.928,229.36,Billboard_chart
7ySbfLwdCwl1EM0zNCJZ38,One Sweet Day,Mariah Carey - Boyz II Men,69,0.568,0.495,1,-8.964,1,0.0299,0.353,0.0,0.0839,0.303,128.234,281.067,Billboard_chart
2GhA8OAODRkEMUq8RYgCeF,Macarena (Bayside Boys Mix) - (Tribute to Los ...,Studio Allstars,0,0.837,0.514,1,-14.883,1,0.116,0.0539,0.000112,0.0999,0.676,104.076,223.893,Billboard_chart
3LmvfNUQtglbTrydsdIqFU,We Belong Together,Mariah Carey,70,0.837,0.462,0,-7.89,1,0.0601,0.0339,0.0,0.09,0.762,139.986,201.4,Billboard_chart
