# Obteniendo metadata de las canciones utilizando Spotify APIs

## Definimos la función `get_info` que busca canciones en la base de datos de spotify a través del API `v1/search` utilizando nombre de la canción y album por default

### Al pasar el parámetro `strict=False`, busca solamente por artista. Esto solo se utiliza en el caso del conjunto de datos relativos a 2020, en donde se revisó una por una para ver que fueran las canciones correctas



In [1]:
import requests, json, logging
import pandas as pd

def get_info(song_name = '', artist_name = '', req_type = 'track',force=False):
    r = requests.post('https://accounts.spotify.com/api/token', headers = {'Authorization': 'Basic NDM0YmFiM2VhNmM2NDg2MmI3NmJkYWUwOTA0NmU2Njg6ZjFlZmFhZmM5MjA1NDFiYzkyZGNlMTk2MzBhZjk1NzE='}, data= {'grant_type': 'client_credentials'})
    token = 'Bearer {}'.format(r.json()['access_token'])
    headers = {'Authorization': token, "Accept": 'application/json', 'Content-Type': "application/json"}
    try:
        payload = {"q" : "artist:{} track:{}".format(artist_name, song_name), "type": req_type, "limit": "1"}
        res = requests.get('https://api.spotify.com/v1/search', params = payload, headers = headers)
        res = res.json()['tracks']['items'][0]
        year = res['album']['release_date'][:4]
        artist_id = res['artists'][0]['id']
        track_id = res['id']
        track_pop = res['popularity'] 
    except:
        print(f'Unable to get {song_name} from {artist_name}')
        if force:
            print(f'Trying {song_name} by only artist {artist_name}')
            payload = {"q" : "artist:{}".format(artist_name, song_name), "type": req_type, "limit": "1"}
            res = requests.get('https://api.spotify.com/v1/search', params = payload, headers = headers)
            res = res.json()['tracks']['items'][0]
            year = res['album']['release_date'][:4]
            artist_id = res['artists'][0]['id']
            track_id = res['id']
            track_pop = res['popularity'] 
        
            
        
    res = requests.get('https://api.spotify.com/v1/audio-analysis/{}'.format(track_id), headers = headers)
    res = res.json()['track']
    
    res2 = requests.get('https://api.spotify.com/v1/audio-features/{}'.format(track_id), headers = headers)
    res2 = res2.json()
    
    duration = res['duration']
    end_fade = res['end_of_fade_in']
    key = res['key']
    key_con = res['key_confidence']
    loud = res['loudness']
    mode = res['mode']
    mode_con = res['mode_confidence']
    start_fade = res['start_of_fade_out']
    temp = res['tempo']
    time_sig = res['time_signature']
    time_sig_con = res['time_signature_confidence']
    danceability=res2['danceability']
    energy=res2['energy']
    key=res2['key']
    loudness=res2['loudness']
    mode=res2['mode']
    speechiness=res2['speechiness']
    acousticness=res2['acousticness']
    instrumentalness=res2['instrumentalness']
    liveness=res2['liveness']
    valence=res2['valence']
    tempo=res2['tempo']
    duration_ms=res2['duration_ms']
    time_signature=res2['time_signature']
    
    res = requests.get('https://api.spotify.com/v1/artists/{}'.format(artist_id), headers = headers)
    artist_hot = res.json()['popularity']/100
    
    return {'duration': duration,
            'key': key,
            'loudness': loud,
            'mode': mode,
            'tempo': temp,
            'artist_hotttnesss': artist_hot,
            'end_of_fade_in': end_fade,
            'start_of_fade_out': start_fade,
            'mode_confidence': mode_con,
            'key_confidence': key_con,
            'time_signature': time_sig,
            'time_signature_confidence': time_sig_con,
            'year': year,
            'popularity': track_pop,
            'danceability':danceability,
            'energy':energy,
            'speechiness':speechiness,
            'acousticness':acousticness,
            'instrumentalness':instrumentalness,
            'liveness':liveness,
            'valence':valence,
            'tempo':tempo,
            'duration_ms':duration_ms,
           }

### Bucle para iterar sobre todos los archivos `20*.csv` es decir (2002.csv, 2003.csv...,2019.csv)

In [2]:
import glob
lista=[]
for filename in glob.iglob('20*.csv'):
    lista.append(pd.read_csv(filename))
    
todos=pd.concat(lista)
todos.reset_index(drop=True,inplace=True)
todos

Unnamed: 0,Draw,Country,Artist,Song,Language(s),Language,Place,Points
0,1,Belarus,Арина Пехтерева,Пришельцы,"Russian, English",,,
1,2,France,Valentina,Jimagine,French,,,
2,4,Germany,Susan,Stronger with You,"German, English",,,
3,5,Kazakhstan,Karakat Bashanova,Forever,"Kazakh, English",,,
4,6,Malta,Chanel Monseigneur,Chasing Sunsets,English,,,
...,...,...,...,...,...,...,...,...
77,12,Malta,Gianluca Cilia,Dawra Tond,,"English, Maltese",9.0,107.0
78,13,Russia,Polina Bogusevich,Wings,,"Russian, English",1.0,188.0
79,14,Serbia,Irina Brodić & Jana Paunović,Ceo svet je naš,,Serbian,10.0,92.0
80,15,Australia,Isabella Clarke,Speak Up,,English,3.0,172.0


### Obtenemos 276 filas, pero terminaremos con muchas menos ya que un conjunto pequeño reducido de canciones se encuentra dentro de la biblioteca de Spotify (lo cual es necesario para llamar a su API y obtener los datos que nos interesan)

### Después de llamar a los APIs de Spotify, terminamos con nuestro conjunto de entrenamient

In [3]:
lista=[]
for index, row in todos.iterrows():
    try:
        dic = get_info(song_name=row["Song"],artist_name=row["Artist"])
        dic['Points']=row['Points']
        dic['Country']=row['Country']
        lista.append(pd.DataFrame(dic,index=[index]))
        
    except Exception as e:
        print(e,f"missing {row['Song']}")

Unable to get Forever from Karakat Bashanova
local variable 'track_id' referenced before assignment missing Forever
Unable to get Chasing Sunsets from Chanel Monseigneur
local variable 'track_id' referenced before assignment missing Chasing Sunsets
Unable to get Ill Be Standing from Ala Tracz
local variable 'track_id' referenced before assignment missing Ill Be Standing
Unable to get Heartbeat from Petar Aničić
local variable 'track_id' referenced before assignment missing Heartbeat
Unable to get Marte from Melani García
local variable 'track_id' referenced before assignment missing Marte
Unable to get Pepelny   from Liza Misnikova
local variable 'track_id' referenced before assignment missing Pepelny  
Unable to get Armanyńnan qalma  from Yerzhan Maksim
local variable 'track_id' referenced before assignment missing Armanyńnan qalma 
Unable to get Colours of Your Dream from Karina Ignatyan
local variable 'track_id' referenced before assignment missing Colours of Your Dream
Unable to ge

In [4]:
train=pd.concat(lista)
train.to_csv("training6.csv",index=False)
train

Unnamed: 0,duration,key,loudness,mode,tempo,artist_hotttnesss,end_of_fade_in,start_of_fade_out,mode_confidence,key_confidence,...,danceability,energy,speechiness,acousticness,instrumentalness,liveness,valence,duration_ms,Points,Country
0,181.99773,7,-9.342,0,95.001,0.23,0.0,178.39311,0.873,0.876,...,0.565,0.563,0.0296,0.131,2e-06,0.253,0.114,181998,,Belarus
1,179.98036,0,-3.938,1,113.932,0.26,0.0,173.35439,0.636,0.742,...,0.758,0.647,0.0419,0.433,0.0,0.172,0.597,179980,,France
2,182.60023,2,-7.322,1,113.981,0.18,0.24989,176.25687,0.687,0.619,...,0.667,0.405,0.0292,0.547,0.0,0.192,0.329,182600,,Germany
5,173.1683,6,-6.834,1,101.021,0.35,0.15116,168.11247,0.768,0.742,...,0.611,0.623,0.0367,0.0569,4.5e-05,0.093,0.428,173168,,Netherlands
7,177.33333,8,-6.671,1,180.02,0.15,0.0,169.28508,0.311,0.507,...,0.258,0.499,0.0377,0.455,0.0,0.0773,0.428,177333,,Russia
9,167.98611,6,-6.184,0,100.04,0.3,2.61805,162.60934,0.379,0.062,...,0.744,0.574,0.167,0.0555,0.0,0.0817,0.353,167986,,Spain
10,157.90765,11,-7.37,0,153.369,0.19,0.53991,152.74086,0.655,0.696,...,0.359,0.497,0.0439,0.436,0.0,0.0787,0.328,157908,,Ukraine
11,180.93202,10,-3.59,1,117.904,0.26,0.0,178.31764,0.49,0.567,...,0.561,0.778,0.0377,0.218,0.0,0.158,0.315,180932,121.0,Australia
12,175.67088,1,-4.705,1,123.052,0.5,0.36844,167.85706,0.632,0.655,...,0.828,0.732,0.0581,0.167,7e-06,0.0324,0.467,175671,169.0,France
13,180.37302,6,-7.396,1,173.87,0.07,2.48454,175.49062,0.606,0.683,...,0.468,0.553,0.0292,0.0852,0.0,0.0709,0.246,180373,72.0,Russia


## Ahora importaremos el CSV con la información de los participantes del 2020 (sin puntuación ya que el evento todavía no ha ocurrido, esto es lo que nos gustaría predecir).

In [5]:
df_2020=pd.read_csv("2020.csv")

In [6]:
lista2=[]
for index, row in df_2020.iterrows():
    try:
        dic = get_info(song_name=row["Song"],artist_name=row["Artist"],force=True)
        dic['Country']=row['Country']
        lista2.append(pd.DataFrame(dic,index=[index]))
        
    except Exception as e:
        print(e,f"missing {row['Artist'],row['Song']}")

Unable to get Forever from Karakat Bashanova
Trying Forever by only artist Karakat Bashanova
list index out of range missing ('Karakat Bashanova', 'Forever')
Unable to get Chasing Sunsets from Chanel Monseigneur
Trying Chasing Sunsets by only artist Chanel Monseigneur
list index out of range missing ('Chanel Monseigneur', 'Chasing Sunsets')
Unable to get Ill Be Standing from Ala Tracz
Trying Ill Be Standing by only artist Ala Tracz
Unable to get Heartbeat from Petar Aničić
Trying Heartbeat by only artist Petar Aničić
list index out of range missing ('Petar Aničić', 'Heartbeat')


### Finalmente terminamos con nuestro conjunto del año 2020 enriquecido por las APIs de Spotify (sin la columna de puntuación)

In [7]:
final=pd.concat(lista2)
pd.set_option('display.max_columns', 500)
final.to_csv('final.csv',index=False)
final

Unnamed: 0,duration,key,loudness,mode,tempo,artist_hotttnesss,end_of_fade_in,start_of_fade_out,mode_confidence,key_confidence,time_signature,time_signature_confidence,year,popularity,danceability,energy,speechiness,acousticness,instrumentalness,liveness,valence,duration_ms,Country
0,181.99773,7,-9.342,0,95.001,0.23,0.0,178.39311,0.873,0.876,4,0.941,2020,35,0.565,0.563,0.0296,0.131,2e-06,0.253,0.114,181998,Belarus
1,179.98036,0,-3.938,1,113.932,0.26,0.0,173.35439,0.636,0.742,4,1.0,2020,38,0.758,0.647,0.0419,0.433,0.0,0.172,0.597,179980,France
2,182.60023,2,-7.322,1,113.981,0.18,0.24989,176.25687,0.687,0.619,4,0.783,2020,30,0.667,0.405,0.0292,0.547,0.0,0.192,0.329,182600,Germany
5,173.1683,6,-6.834,1,101.021,0.35,0.15116,168.11247,0.768,0.742,4,1.0,2020,47,0.611,0.623,0.0367,0.0569,4.5e-05,0.093,0.428,173168,Netherlands
6,180.54675,0,-2.799,1,122.028,0.31,0.0,173.8594,0.49,0.546,4,0.899,2020,43,0.523,0.851,0.0373,0.0148,1e-06,0.292,0.181,180547,Poland
7,177.33333,8,-6.671,1,180.02,0.15,0.0,169.28508,0.311,0.507,4,1.0,2020,26,0.258,0.499,0.0377,0.455,0.0,0.0773,0.428,177333,Russia
9,167.98611,6,-6.184,0,100.04,0.3,2.61805,162.60934,0.379,0.062,4,1.0,2020,42,0.744,0.574,0.167,0.0555,0.0,0.0817,0.353,167986,Spain
10,157.90765,11,-7.37,0,153.369,0.19,0.53991,152.74086,0.655,0.696,4,0.973,2020,31,0.359,0.497,0.0439,0.436,0.0,0.0787,0.328,157908,Ukraine
