In [1]:
import requests
import numpy as np
import pandas as pd
from itertools import permutations

In [225]:
df = pd.read_csv('data_recopalooza.csv')
df.head()

Unnamed: 0,day,time,stage,artist,is_argentinian,energy,danceability,valence,acousticness,cluster_name,cluster_name_spanish
0,1,1:20 – 2:00,Alternative,Cimafunk,0,0.69,0.82,0.72,0.18,Latin trap,Reggaetón & trap en español
1,1,10:30 – 11.30,Alternative,A Day to Remember,0,0.91,0.45,0.45,0.01,International indie & rock,Rock & Indie internacional
2,1,12:30 – 1:00,Alternative,Alejo y Valentin,1,0.48,0.51,0.26,0.17,Argentinian indie,Indie en español
3,1,2:15 – 2:45,Alternative,Axel Fiks,1,0.52,0.7,0.43,0.22,Argentinian indie,Indie en español
4,1,3:15 – 4:00,Alternative,Two Feet,0,0.5,0.63,0.31,0.43,International indie & rock,Rock & Indie internacional


In [3]:
lista_artistas = [
    ['Guns N’ Roses', 'The Strokes'], # Rock & Indie internacional
    ['Gwen Stefani', 'Lana del Rey'], # Pop internacional
    ['Travis Scott'], # Hip hop & rap
    ['Martin Garrix'], # Electrónica
    ['Los Fabulosos Cadillacs'], # Rock argentino
    ['DUKI'], # Trap argentino
    ['Louta'], # Indie en español
    ['James Blake'], # R&B
    ['Rels B'] # Reggaetón & trap en español
]

audio_features = ['energy', 'danceability', 'valence', 'acousticness']

In [8]:
roadmaps = []
c = 0
for artistas in lista_artistas:
    for features in permutations(audio_features, 4):
        for shuffle in [0,1]:
            params = {
                'artists': ','.join(artistas),
                'audio_feats': ','.join(features),
                'shuffle': shuffle
                }
            r = requests.get('http://127.0.0.1:5000/get_roadmap', params)
            roadmaps.append(r.json()['roadmap'])

In [216]:
clusters = ['Rock & Indie internacional', 'Pop internacional', 'Hip hop & rap', 'Electrónica',
            'Rock argentino', 'Trap argentino', 'Indie en español', 'R&B', 'Reggaetón & trap en español']

In [217]:
roads_dfs = []
cluster_counts = []
for i, c in enumerate(clusters):
    roads = roadmaps[i*48:(i+1)*48]
    roads_df = pd.DataFrame([r for road in roads for r in road])
    roads_df = roads_df.reindex(columns=[column for column in df.columns if column != 'cluster_name'])
    roads_df['archetype'] = c
    roads_df['n_roadmap'] = pd.Series([n for i in range(1,49) for n in np.repeat(i, 41)])
    cluster_count = roads_df['cluster_name_spanish'].value_counts(normalize=True)
    roads_dfs.append(roads_df)
    cluster_counts.append(cluster_count)

In [218]:
tabla = pd.concat(cluster_counts, axis=1, sort=True)
tabla.columns = clusters
tabla.applymap(lambda x: str(np.around(x*100,2))+'%')

Unnamed: 0,Rock & Indie internacional,Pop internacional,Hip hop & rap,Electrónica,Rock argentino,Trap argentino,Indie en español,R&B,Reggaetón & trap en español
Electrónica,14.74%,14.89%,23.42%,36.59%,17.48%,19.0%,15.45%,18.5%,17.68%
Hip hop & rap,1.27%,1.68%,9.76%,3.86%,1.47%,3.86%,3.96%,3.96%,4.67%
Indie en español,13.16%,16.21%,14.53%,13.41%,13.47%,14.79%,26.83%,17.89%,16.62%
Pop en español,2.44%,7.57%,3.71%,2.64%,6.76%,4.67%,5.28%,5.23%,4.93%
Pop internacional,6.81%,24.39%,8.59%,7.22%,7.42%,8.08%,8.49%,9.5%,8.28%
R&B,2.54%,2.08%,3.25%,2.69%,2.59%,1.93%,3.2%,7.32%,4.32%
Reggaetón & trap en español,3.15%,9.71%,12.09%,8.33%,8.28%,13.36%,10.62%,9.76%,19.51%
Rock & Indie internacional,39.02%,8.74%,13.77%,13.87%,11.84%,10.52%,11.59%,16.16%,8.03%
Rock argentino,11.89%,8.13%,4.12%,5.89%,19.51%,6.71%,5.64%,7.32%,5.13%
Trap argentino,4.98%,6.61%,6.76%,5.49%,11.18%,17.07%,8.94%,4.37%,10.82%


In [219]:
writer = pd.ExcelWriter(f'tests/Roadmaps_típicos.xlsx')

for i, c in enumerate(clusters):
    roads_dfs[i].to_excel(writer, c, index=False)

writer.save()

In [220]:
for c in tabla.columns:
    print('Perfil {}:'.format(c))
    print(tabla[c].sort_values(ascending=False)[:5].apply(lambda x: '({:.0f}%)'.format(x*100)))
    print('\n')

Perfil Rock & Indie internacional:
Rock & Indie internacional    (39%)
Electrónica                   (15%)
Indie en español              (13%)
Rock argentino                (12%)
Pop internacional              (7%)
Name: Rock & Indie internacional, dtype: object


Perfil Pop internacional:
Pop internacional              (24%)
Indie en español               (16%)
Electrónica                    (15%)
Reggaetón & trap en español    (10%)
Rock & Indie internacional      (9%)
Name: Pop internacional, dtype: object


Perfil Hip hop & rap:
Electrónica                    (23%)
Indie en español               (15%)
Rock & Indie internacional     (14%)
Reggaetón & trap en español    (12%)
Hip hop & rap                  (10%)
Name: Hip hop & rap, dtype: object


Perfil Electrónica:
Electrónica                    (37%)
Rock & Indie internacional     (14%)
Indie en español               (13%)
Reggaetón & trap en español     (8%)
Pop internacional               (7%)
Name: Electrónica, dtype: object



In [221]:
tabla.to_csv('tests/all_roadmaps.csv')

In [222]:
diffs = []
for i in range(0,len(roadmaps),2):
    diffs.append((np.array(roadmaps[i]) != np.array(roadmaps[i+1])).sum() / len(roadmaps[i]))

In [223]:
np.mean(diffs)

0.39397018970189707