# Obteniendo metadata de las canciones utilizando Spotify APIs

## Definimos la función `get_info` que busca canciones en la base de datos de spotify a través del API `v1/search` utilizando nombre de la canción y album por default

### Al pasar el parámetro `strict=False`, busca solamente por artista. Esto solo se utiliza en el caso del conjunto de datos relativos a 2020, en donde se revisó una por una para ver que fueran las canciones correctas



In [6]:
import requests, json, logging
import pandas as pd

def get_info(song_name = '', artist_name = '', req_type = 'track',force=True):
    r = requests.post('https://accounts.spotify.com/api/token', headers = {'Authorization': 'Basic NDM0YmFiM2VhNmM2NDg2MmI3NmJkYWUwOTA0NmU2Njg6ZjFlZmFhZmM5MjA1NDFiYzkyZGNlMTk2MzBhZjk1NzE='}, data= {'grant_type': 'client_credentials'})
    token = 'Bearer {}'.format(r.json()['access_token'])
    headers = {'Authorization': token, "Accept": 'application/json', 'Content-Type': "application/json"}
    try:
        payload = {"q" : "artist:{} track:{}".format(artist_name, song_name), "type": req_type, "limit": "1"}
        res = requests.get('https://api.spotify.com/v1/search', params = payload, headers = headers)
        res = res.json()['tracks']['items'][0]
        year = res['album']['release_date'][:4]
        artist_id = res['artists'][0]['id']
        track_id = res['id']
        track_pop = res['popularity'] 
    except:
        print(f'Unable to get {song_name} from {artist_name}')
        if force:
            print(f'Trying {song_name} by only artist {artist_name}')
            payload = {"q" : "artist:{}".format(artist_name, song_name), "type": req_type, "limit": "1"}
            res = requests.get('https://api.spotify.com/v1/search', params = payload, headers = headers)
            res = res.json()['tracks']['items'][0]
            year = res['album']['release_date'][:4]
            artist_id = res['artists'][0]['id']
            track_id = res['id']
            track_pop = res['popularity'] 
        
            
        
    res = requests.get('https://api.spotify.com/v1/audio-analysis/{}'.format(track_id), headers = headers)
    res = res.json()['track']
    
    res2 = requests.get('https://api.spotify.com/v1/audio-features/{}'.format(track_id), headers = headers)
    res2 = res2.json()
    
    duration = res['duration']
    end_fade = res['end_of_fade_in']
    key = res['key']
    key_con = res['key_confidence']
    loud = res['loudness']
    mode = res['mode']
    mode_con = res['mode_confidence']
    start_fade = res['start_of_fade_out']
    temp = res['tempo']
    time_sig = res['time_signature']
    time_sig_con = res['time_signature_confidence']
    danceability=res2['danceability']
    energy=res2['energy']
    key=res2['key']
    loudness=res2['loudness']
    mode=res2['mode']
    speechiness=res2['speechiness']
    acousticness=res2['acousticness']
    instrumentalness=res2['instrumentalness']
    liveness=res2['liveness']
    valence=res2['valence']
    tempo=res2['tempo']
    duration_ms=res2['duration_ms']
    time_signature=res2['time_signature']
    
    res = requests.get('https://api.spotify.com/v1/artists/{}'.format(artist_id), headers = headers)
    artist_hot = res.json()['popularity']/100
    
    return {'duration': duration,
            'key': key,
            'loudness': loud,
            'mode': mode,
            'tempo': temp,
            'artist_hotttnesss': artist_hot,
            'end_of_fade_in': end_fade,
            'start_of_fade_out': start_fade,
            'mode_confidence': mode_con,
            'key_confidence': key_con,
            'time_signature': time_sig,
            'time_signature_confidence': time_sig_con,
            'year': year,
            'popularity': track_pop,
            'danceability':danceability,
            'energy':energy,
            'speechiness':speechiness,
            'acousticness':acousticness,
            'instrumentalness':instrumentalness,
            'liveness':liveness,
            'valence':valence,
            'tempo':tempo,
            'duration_ms':duration_ms,
           }

### Bucle para iterar sobre todos los archivos `20*.csv` es decir (2002.csv, 2003.csv...,2019.csv)

In [7]:
import glob
lista=[]
for filename in glob.iglob('20*.csv'):
    lista.append(pd.read_csv(filename))
    
todos=pd.concat(lista)
todos.reset_index(drop=True,inplace=True)
todos

Unnamed: 0,Draw,Country,Artist,Song,Language,Place,Points,Language(s)
0,1,Romania,Mădălina and Andrada,Salvați planeta!,Romanian,9.0,58.0,
1,2,Armenia,Monica,Im Ergi Hnchyune,Armenian,8.0,59.0,
2,3,Belarus,"Dasha, Alina & Karyna",Serdtse Belarusi,"Russian, Belarusian[22]",6.0,86.0,
3,4,Russia,Mihail Puntov,Spit angel,Russian,7.0,73.0,
4,5,Greece,Niki Yiannouchu,Kapoia nychta,Greek,14.0,19.0,
...,...,...,...,...,...,...,...,...
271,12,Belgium,Lindsay,Mes rêves,French,10.0,63.0,
272,13,Malta,Thea and Friends,Make It Right!,English,16.0,18.0,
273,14,Norway,Malin,Sommer og skolefri[a],Norwegian,3.0,123.0,
274,15,Spain,Antonio José,Te traigo flores,Spanish,2.0,146.0,


### Obtenemos 276 filas, pero terminaremos con muchas menos ya que un conjunto pequeño reducido de canciones se encuentra dentro de la biblioteca de Spotify (lo cual es necesario para llamar a su API y obtener los datos que nos interesan)

### Después de llamar a los APIs de Spotify, terminamos con nuestro conjunto de entrenamient

In [8]:
lista=[]
for index, row in todos.iterrows():
    try:
        dic = get_info(song_name=row["Song"],artist_name=row["Artist"])
        dic['Points']=row['Points']
        dic['Country']=row['Country']
        lista.append(pd.DataFrame(dic,index=[index]))
        
    except Exception as e:
        print(e,f"missing {row['Song']}")

Unable to get Salvați planeta! from Mădălina and Andrada
Trying Salvați planeta! by only artist Mădălina and Andrada
list index out of range missing Salvați planeta!
Unable to get Im Ergi Hnchyune from Monica
Trying Im Ergi Hnchyune by only artist Monica
Unable to get Serdtse Belarusi  from Dasha, Alina & Karyna
Trying Serdtse Belarusi  by only artist Dasha, Alina & Karyna
list index out of range missing Serdtse Belarusi 
Unable to get Spit angel  from Mihail Puntov
Trying Spit angel  by only artist Mihail Puntov
list index out of range missing Spit angel 
Unable to get Kapoia nychta  from Niki Yiannouchu
Trying Kapoia nychta  by only artist Niki Yiannouchu
list index out of range missing Kapoia nychta 
Unable to get Bzz.. from Bzikebi
Trying Bzz.. by only artist Bzikebi
list index out of range missing Bzz..
Unable to get Edna mechta  from Krastyana Krasteva
Trying Edna mechta  by only artist Krastyana Krasteva
list index out of range missing Edna mechta 
Unable to get Uvek kad u nebo 

list index out of range missing Fili gia panta
Unable to get Mia efhi from Theodora Rafti
Trying Mia efhi by only artist Theodora Rafti
list index out of range missing Mia efhi
Unable to get Tantsuy from Volha Satsiuk
Trying Tantsuy by only artist Volha Satsiuk
list index out of range missing Tantsuy
Unable to get Ti ne me poznavaš  from Marija & Viktorija
Trying Ti ne me poznavaš  by only artist Marija & Viktorija
list index out of range missing Ti ne me poznavaš 
Unable to get Coś mnie nosi from Katarzyna Żurawik
Trying Coś mnie nosi by only artist Katarzyna Żurawik
list index out of range missing Coś mnie nosi
Unable to get Sinnsykt gal forelsket from 2U
Trying Sinnsykt gal forelsket by only artist 2U
Unable to get Tobele sunt viața mea from Bubu
Trying Tobele sunt viața mea by only artist Bubu
Unable to get De vriendschapsband from X!NK
Trying De vriendschapsband by only artist X!NK
Unable to get My Song for the World from Tom Morley
Trying My Song for the World by only artist Tom 

Unable to get Mitt mod from Lova Sönnerbo
Trying Mitt mod by only artist Lova Sönnerbo
list index out of range missing Mitt mod
Unable to get Girls and Boys  from Omar and Suada
Trying Girls and Boys  by only artist Omar and Suada
list index out of range missing Girls and Boys 
Unable to get Abracadabra from Fabian
Trying Abracadabra by only artist Fabian
Unable to get Let the Music Win from Kids.il
Trying Let the Music Win by only artist Kids.il
list index out of range missing Let the Music Win
Unable to get Kam një këngë vetëm për ju from Igzidora Gjeta
Trying Kam një këngë vetëm për ju by only artist Igzidora Gjeta
list index out of range missing Kam një këngë vetëm për ju
Unable to get Sweetie Baby from Compass Band
Trying Sweetie Baby by only artist Compass Band
Unable to get Nebo  from Anastasiya Petryk
Trying Nebo  by only artist Anastasiya Petryk
list index out of range missing Nebo 
Unable to get Funky Lemonade from Funkids
Trying Funky Lemonade by only artist Funkids
Unable t

In [9]:
train=pd.concat(lista)
train.to_csv("training6.csv",index=False)
train

Unnamed: 0,duration,key,loudness,mode,tempo,artist_hotttnesss,end_of_fade_in,start_of_fade_out,mode_confidence,key_confidence,...,danceability,energy,speechiness,acousticness,instrumentalness,liveness,valence,duration_ms,Points,Country
1,242.77333,8,-6.863,1,81.975,0.69,0.00000,230.35938,0.224,0.192,...,0.618,0.5520,0.0981,0.176000,0.000000,0.252,0.586,242773,59.0,Armenia
6,219.96263,7,-6.869,1,129.991,0.00,0.00000,207.69089,0.755,0.954,...,0.771,0.8700,0.0456,0.000592,0.827000,0.162,0.175,219963,45.0,Belgium
9,164.78517,0,-3.892,0,173.703,0.03,0.00000,164.78517,0.487,0.308,...,0.453,0.8880,0.0634,0.098300,0.000000,0.411,0.812,164785,100.0,Malta
10,195.61333,4,-6.824,1,98.162,0.52,0.15673,190.02630,0.541,0.379,...,0.572,0.3870,0.0269,0.561000,0.000000,0.114,0.193,195613,27.0,Netherlands
11,180.97633,0,-4.275,1,131.059,0.00,0.39533,174.81143,0.529,0.523,...,0.575,0.8710,0.0740,0.069100,0.000000,0.477,0.455,180976,135.0,Ukraine
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
268,167.45332,6,-4.085,1,112.028,0.33,0.10730,163.57297,0.556,0.686,...,0.688,0.6880,0.0268,0.445000,0.000000,0.306,0.687,167453,82.0,Netherlands
271,193.04000,4,-4.160,1,156.133,0.61,3.00698,179.52509,0.440,0.309,...,0.540,0.6950,0.0656,0.167000,0.000062,0.110,0.667,193040,63.0,Belgium
272,217.79741,8,-3.324,1,144.953,0.04,0.00000,210.14640,0.395,0.348,...,0.763,0.8650,0.1010,0.169000,0.000000,0.691,0.859,217797,18.0,Malta
273,227.52100,9,-9.012,0,119.005,0.61,0.00000,216.30550,0.424,0.468,...,0.717,0.6950,0.0334,0.000465,0.000041,0.380,0.409,227521,123.0,Norway


## Ahora importaremos el CSV con la información de los participantes del 2020 (sin puntuación ya que el evento todavía no ha ocurrido, esto es lo que nos gustaría predecir).

In [5]:
df_2020=pd.read_csv("2020.csv")

In [6]:
lista2=[]
for index, row in df_2020.iterrows():
    try:
        dic = get_info(song_name=row["Song"],artist_name=row["Artist"],force=True)
        dic['Country']=row['Country']
        lista2.append(pd.DataFrame(dic,index=[index]))
        
    except Exception as e:
        print(e,f"missing {row['Artist'],row['Song']}")

Unable to get Forever from Karakat Bashanova
Trying Forever by only artist Karakat Bashanova
list index out of range missing ('Karakat Bashanova', 'Forever')
Unable to get Chasing Sunsets from Chanel Monseigneur
Trying Chasing Sunsets by only artist Chanel Monseigneur
list index out of range missing ('Chanel Monseigneur', 'Chasing Sunsets')
Unable to get Ill Be Standing from Ala Tracz
Trying Ill Be Standing by only artist Ala Tracz
Unable to get Heartbeat from Petar Aničić
Trying Heartbeat by only artist Petar Aničić
list index out of range missing ('Petar Aničić', 'Heartbeat')


### Finalmente terminamos con nuestro conjunto del año 2020 enriquecido por las APIs de Spotify (sin la columna de puntuación)

In [7]:
final=pd.concat(lista2)
pd.set_option('display.max_columns', 500)
final.to_csv('final.csv',index=False)
final

Unnamed: 0,duration,key,loudness,mode,tempo,artist_hotttnesss,end_of_fade_in,start_of_fade_out,mode_confidence,key_confidence,time_signature,time_signature_confidence,year,popularity,danceability,energy,speechiness,acousticness,instrumentalness,liveness,valence,duration_ms,Country
0,181.99773,7,-9.342,0,95.001,0.23,0.0,178.39311,0.873,0.876,4,0.941,2020,35,0.565,0.563,0.0296,0.131,2e-06,0.253,0.114,181998,Belarus
1,179.98036,0,-3.938,1,113.932,0.26,0.0,173.35439,0.636,0.742,4,1.0,2020,38,0.758,0.647,0.0419,0.433,0.0,0.172,0.597,179980,France
2,182.60023,2,-7.322,1,113.981,0.17,0.24989,176.25687,0.687,0.619,4,0.783,2020,29,0.667,0.405,0.0292,0.547,0.0,0.192,0.329,182600,Germany
5,173.1683,6,-6.834,1,101.021,0.35,0.15116,168.11247,0.768,0.742,4,1.0,2020,47,0.611,0.623,0.0367,0.0569,4.5e-05,0.093,0.428,173168,Netherlands
6,180.54675,0,-2.799,1,122.028,0.31,0.0,173.8594,0.49,0.546,4,0.899,2020,43,0.523,0.851,0.0373,0.0148,1e-06,0.292,0.181,180547,Poland
7,177.33333,8,-6.671,1,180.02,0.15,0.0,169.28508,0.311,0.507,4,1.0,2020,26,0.258,0.499,0.0377,0.455,0.0,0.0773,0.428,177333,Russia
9,167.98611,6,-6.184,0,100.04,0.3,2.61805,162.60934,0.379,0.062,4,1.0,2020,42,0.744,0.574,0.167,0.0555,0.0,0.0817,0.353,167986,Spain
10,157.90765,11,-7.37,0,153.369,0.19,0.53991,152.74086,0.655,0.696,4,0.973,2020,31,0.359,0.497,0.0439,0.436,0.0,0.0787,0.328,157908,Ukraine
