# Preprocesing to upload to db

In [1]:
import numpy as np
import pandas as pd
import yaml

df = pd.read_csv("valence_arousal_dataset.csv")

df.head()

Unnamed: 0,id,genre,track_name,artist_name,valence,energy
0,2ZQyksYO4zzhyHNcueL0CP,acoustic,Home,Phillip Phillips,0.293,0.792
1,3U9FMzSjrXFS0AnElgcw0j,acoustic,On My Way,Boyce Avenue,0.174,0.564
2,6vrUTGn5p8IrfTZ0J6sIVM,acoustic,Iris,The Goo Goo Dolls,0.513,0.79
3,4psC5agYNMRBscGeLTqhc3,acoustic,20 Years,The Civil Wars,0.346,0.296
4,3jH92uUHDEPQfjfQKYErj1,acoustic,"You and I Both - Live at the Eagles Ballroom, ...",Jason Mraz,0.457,0.798


## Generos

In [2]:
generos = df['genre'].unique().tolist()
print(generos)
df_gen = pd.DataFrame({'name': generos,'imagen':'static/img/genres/song-default.jpg','type': 'song'})
df_gen.head()

['acoustic', 'alternative', 'ambient', 'anime', 'black-metal', 'blues', 'bossanova', 'children', 'chill', 'classical', 'disco', 'dubstep', 'electro', 'funk', 'hard-rock', 'heavy-metal', 'hip-hop', 'house', 'idm', 'j-pop', 'j-rock', 'jazz', 'k-pop', 'latino', 'metal', 'party', 'piano', 'pop', 'punk', 'reggae', 'reggaeton', 'rock', 'rock-n-roll', 'romance', 'salsa', 'samba', 'sleep', 'soul', 'soundtracks', 'study', 'tango', 'techno']


Unnamed: 0,name,imagen,type
0,acoustic,static/img/genres/song-default.jpg,song
1,alternative,static/img/genres/song-default.jpg,song
2,ambient,static/img/genres/song-default.jpg,song
3,anime,static/img/genres/song-default.jpg,song
4,black-metal,static/img/genres/song-default.jpg,song


In [3]:
# Definir la función lambda para aplicar la lógica condicional
def asignar_emocion(x, y):
    if  x >= 0.5:
        if y > 0.5:#happy
            return 1
        else:#angry
            return 4
    else:
        if y > 0.5:#fear-ansiedad
            return 3
        else:#sad
            return 2

In [4]:
df['emotion'] = df.apply(lambda row: asignar_emocion(row['valence'], row['energy']), axis=1)
df.head()

Unnamed: 0,id,genre,track_name,artist_name,valence,energy,emotion
0,2ZQyksYO4zzhyHNcueL0CP,acoustic,Home,Phillip Phillips,0.293,0.792,3
1,3U9FMzSjrXFS0AnElgcw0j,acoustic,On My Way,Boyce Avenue,0.174,0.564,3
2,6vrUTGn5p8IrfTZ0J6sIVM,acoustic,Iris,The Goo Goo Dolls,0.513,0.79,1
3,4psC5agYNMRBscGeLTqhc3,acoustic,20 Years,The Civil Wars,0.346,0.296,2
4,3jH92uUHDEPQfjfQKYErj1,acoustic,"You and I Both - Live at the Eagles Ballroom, ...",Jason Mraz,0.457,0.798,3


In [5]:
df2 = df.drop(columns=["valence","energy"])
prefix = "https://open.spotify.com/track/"
df2 = df2.rename(columns={'id': 'link','track_name':'title','artist_name':'author'})
df2['link'] = df2['link'].apply(lambda x: prefix + x)
df2 = df2.assign(type='song')

mapeo = {valor: indice + 1 for indice, valor in enumerate(generos)}
df2['genre'] = df2['genre'].replace(mapeo)

df2.head()

Unnamed: 0,link,genre,title,author,emotion,type
0,https://open.spotify.com/track/2ZQyksYO4zzhyHN...,1,Home,Phillip Phillips,3,song
1,https://open.spotify.com/track/3U9FMzSjrXFS0An...,1,On My Way,Boyce Avenue,3,song
2,https://open.spotify.com/track/6vrUTGn5p8IrfTZ...,1,Iris,The Goo Goo Dolls,1,song
3,https://open.spotify.com/track/4psC5agYNMRBscG...,1,20 Years,The Civil Wars,2,song
4,https://open.spotify.com/track/3jH92uUHDEPQfjf...,1,"You and I Both - Live at the Eagles Ballroom, ...",Jason Mraz,3,song


## Crea los archivos fixture

In [9]:
df_yaml = pd.DataFrame({"model": "moodMatch.genre", "fields": df_gen.apply(lambda row: row.to_dict(), axis=1),"pk": range(1, len(generos)+1)})
df_yaml.head()

Unnamed: 0,model,fields,pk
0,moodMatch.genre,"{'name': 'acoustic', 'imagen': 'static/img/gen...",1
1,moodMatch.genre,"{'name': 'alternative', 'imagen': 'static/img/...",2
2,moodMatch.genre,"{'name': 'ambient', 'imagen': 'static/img/genr...",3
3,moodMatch.genre,"{'name': 'anime', 'imagen': 'static/img/genres...",4
4,moodMatch.genre,"{'name': 'black-metal', 'imagen': 'static/img/...",5


In [10]:
# Convertir el DataFrame a un diccionario
data_dict = df_yaml.to_dict(orient='records')

# Exportar el diccionario como un archivo YAML
with open('genres-song.yaml', 'w') as file:
    yaml.dump(data_dict, file)

In [7]:
df_yaml = pd.DataFrame({"model": "moodMatch.content", "fields": df2.apply(lambda row: row.to_dict(), axis=1),"pk": range(1, df2.shape[0]+1)})
df_yaml.head()

Unnamed: 0,model,fields,pk
0,moodMatch.content,{'link': 'https://open.spotify.com/track/2ZQyk...,1
1,moodMatch.content,{'link': 'https://open.spotify.com/track/3U9FM...,2
2,moodMatch.content,{'link': 'https://open.spotify.com/track/6vrUT...,3
3,moodMatch.content,{'link': 'https://open.spotify.com/track/4psC5...,4
4,moodMatch.content,{'link': 'https://open.spotify.com/track/3jH92...,5


In [8]:
# Convertir el DataFrame a un diccionario
data_dict = df_yaml.to_dict(orient='records')

# Exportar el diccionario como un archivo YAML
with open('songs.yaml', 'w') as file:
    yaml.dump(data_dict, file)
