In [1]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import euclidean

In [2]:
clusters = pd.read_csv('clusters.csv', index_col = 'artist')
clusters

Unnamed: 0_level_0,tsne_0,tsne_1,cluster,cluster_name,followers
artist,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Travis Scott,-49.622486,-63.165020,0,Hip hop & rap,7705985
Martin Garrix,23.056034,46.405690,1,EDM & house,12574151
Los Fabulosos Cadillacs,-27.807325,33.983185,2-0,Argentinian rock,1672897
Brockhampton,-33.395466,-67.094890,0,Hip hop & rap,1186410
DUKI,84.602455,-40.784866,3,Argentinian trap,2715207
...,...,...,...,...,...
Florian,-85.219360,23.525816,2-1,Argentinian indie,2011
Miranda Johansen,-76.660540,30.444010,2-1,Argentinian indie,2006
DABOW,62.493393,62.657950,1,EDM & house,5677
El Buen Salvaje,-68.169250,-6.845174,2-1,Argentinian indie,791


In [3]:
cluster_centroids = clusters.groupby('cluster_name')[['tsne_0', 'tsne_1']].mean()
cluster_centroids

Unnamed: 0_level_0,tsne_0,tsne_1
cluster_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Argentinian indie,-77.929432,19.052341
Argentinian pop,-52.877103,74.02142
Argentinian rock,-39.010427,42.015199
Argentinian trap,90.884187,-30.202852
Art pop & R&B,60.218677,19.109616
EDM & house,33.756525,54.836116
Hip hop & rap,-41.519409,-65.712097
International indie & rock,-1.587642,-27.362592
International pop,42.345227,-7.598888
Latin trap,43.099348,-70.788056


In [4]:
clusters.index

Index(['Travis Scott', 'Martin Garrix', 'Los Fabulosos Cadillacs',
       'Brockhampton', 'DUKI', 'Madeon', 'Rita Ora', 'A Day to Remember',
       'King Princess', 'LP', 'WOS', 'Denzel Curry', 'Yungblud',
       'Nathy Peluso', 'J mena', 'AJR', 'Louta', 'Two Feet', 'Bizarrap',
       'Fuego', 'La Delio Valdez', 'Dani', 'Kaydy Cain', 'Maye',
       'Boombox Cartel', 'Cimafunk', 'Ms Nina', 'Feli Colina', 'Axel Fiks',
       'Lucia Tacchetti', 'DJ Sky', 'Alejo y Valentin', 'The Strokes',
       'Gwen Stefani', 'Armin Van Buuren', 'Vampire Weekend',
       'Ratones Paranoicos', 'ILLENIUM', 'Kacey Musgraves', 'Litto Nebbia',
       'Charli XCX', 'Jaden Smith', 'R3HAB', 'Hayley Kiyoko',
       'El Mató a un Policía Motorizado', 'Rels B', 'Emmanuel Horvilleur',
       'Kali Uchis', 'Paloma Mami', 'Fabiana Cantilo', 'Trueno', 'Amaia',
       'Wallows', 'Yung Beef', 'Zoe Gotusso', 'Girl Ultra',
       'Las Ligas Menores', 'Ainda', 'Ghetto Kids', 'D3FAI', 'Paco Leiva',
       'LIMON', 'Louly', 

In [5]:
grid = pd.read_csv('horarios.csv')
grid['choice'] = np.nan
grid

Unnamed: 0,Main stage 1,Main stage 2,Alternative,Perry’s,choice
0,Cimafunk,,Alejo y Valentin,,
1,,Fuego,,Axel Fiks,
2,Maye,,Lucia Tacchetti,,
3,,J mena,,DJ Sky,
4,A Day to Remember,,Feli Colina,Nathy Peluso,
5,,La Delio Valdez,,Boombox Cartel,
6,WOS,,King Princess,,
7,,Rita Ora,Louta,Dani,
8,Yungblud,,,Kaydy Cain,
9,,DUKI,Two Feet,Bizarrap,


In [6]:
chosen_artists = ['La Delio Valdez', 'Los Fabulosos Cadillacs', 'Guns N’ Roses']

In [7]:
chosen_clusters = clusters.loc[chosen_artists, 'cluster_name'].value_counts().rename('cluster_weight')
chosen_clusters

Argentinian rock              2
International indie & rock    1
Name: cluster_weight, dtype: int64

In [8]:
def check_artist_name(artist):
    if artist not in clusters.index:
        raise ValueError('Artist not found ', artist)
    return True

In [9]:
for artist in chosen_artists:
    check_artist_name(artist)

In [10]:
def closest_to_centroids(options):
    '''
    Receives a list of options and calculates pairwise distances between those options
    and the centroids of the clusters corresponding to the chosen bands
    '''
    distances = []
    artists = []
    for opt in options:
        for cluster in chosen_clusters.index:
            u = clusters.loc[opt, ['tsne_0', 'tsne_1']]
            v = cluster_centroids.loc[cluster]
            distances.append(euclidean(u, v))
            artists.append(opt)
#     print(list(zip(distances, artists)))
#     print(artists[np.argmin(distances)])
    return artists[np.argmin(distances)]

In [11]:
def solve_tie(options, verbose=False):
    '''
    Solves a tie between options that are equally likely to be chosen 
    '''
    if verbose:
        print('\tVoting based on chosen clusters...')
    # if there are options in the same clusters as the chosen artists' clusters,
    # all the chosen artists vote for their cluster
    votes = pd.merge(clusters.loc[options, 'cluster_name'], 
                 chosen_clusters, 
                 left_on='cluster_name', 
                 right_index=True,
                ).sort_values('cluster_weight')
    # if there's a winner
    if len(votes) > 0:
        #if there's an ONLY winner
        if len(votes) == 1:
            if verbose:
                print('\tThere is a winner')
            # return the winner
            return votes.index[0]
        # if there's a tie between the winners
        if votes.loc[0, 'cluster_weight'] == votes.loc[1, 'cluster_weight']: 
            if verbose:
                print('\tThere is a tie. Choosing closest distance.')
            # return closest to centroid between winners
            winners = votes.index[votes['cluster_weight'] == votes.iloc[0, -1]]#.to_list()
            return closest_to_centroids(winners)
    # if there's no winner, return closest to centroid between options
    if verbose:
        print('\tThere is no winner. Choosing closest distance.')
    return closest_to_centroids(options)
    

In [15]:
def fill_slot(slot, verbose = False):
    if verbose:
        print('Filling slot ', slot)
    options = grid.iloc[slot,:-1]
    if verbose:
        print('Choosing between ', list(options))
    # if any option is in the chosen bands
    if options.isin(chosen_artists).sum() == 1:
        # fill with chosen artist
        chosen = options[options.isin(chosen_artists)].values[0]
        if verbose:
            print(chosen, ' is among the chosen artists')
        return chosen
    # if there are no chosen artists between the options
    elif options.isin(chosen_artists).sum() == 0:
        if verbose:
            print('There are no chosen artists between the options. Breaking tie...')
        # break tie between all options
        chosen = solve_tie(options.dropna(), verbose=verbose)
        if verbose:
            print('Tie broken. Winner: ', chosen)
        return chosen
    # if there are more than one chosen artists between the options
    else:
        # solve tie between chosen options
        if verbose:
            print('There are more than one artists between the chosen ones. Breaking tie...')
        chosen = solve_tie(options[options.isin(chosen_artists)], verbose=verbose)
        if verbose:
            print('Tie broken. Winner: ', chosen)
        return chosen


In [16]:
for index in range(len(grid)):
    grid.iloc[index, -1] = fill_slot(index, verbose=True)


Filling slot  0
Choosing between  ['Cimafunk', nan, 'Alejo y Valentin', nan]
There are no chosen artists between the options. Breaking tie...
	Voting based on chosen clusters...
	There is no winner. Choosing closest distance.
Tie broken. Winner:  Cimafunk
Filling slot  1
Choosing between  [nan, 'Fuego', nan, 'Axel Fiks']
There are no chosen artists between the options. Breaking tie...
	Voting based on chosen clusters...
	There is no winner. Choosing closest distance.
Tie broken. Winner:  Axel Fiks
Filling slot  2
Choosing between  ['Maye', nan, 'Lucia Tacchetti', nan]
There are no chosen artists between the options. Breaking tie...
	Voting based on chosen clusters...
	There is a winner
Tie broken. Winner:  Maye
Filling slot  3
Choosing between  [nan, 'J mena', nan, 'DJ Sky']
There are no chosen artists between the options. Breaking tie...
	Voting based on chosen clusters...
	There is a winner
Tie broken. Winner:  DJ Sky
Filling slot  4
Choosing between  ['A Day to Remember', nan, 'Feli

In [14]:
grid

Unnamed: 0,Main stage 1,Main stage 2,Alternative,Perry’s,choice
0,Cimafunk,,Alejo y Valentin,,Cimafunk
1,,Fuego,,Axel Fiks,Axel Fiks
2,Maye,,Lucia Tacchetti,,Maye
3,,J mena,,DJ Sky,DJ Sky
4,A Day to Remember,,Feli Colina,Nathy Peluso,A Day to Remember
5,,La Delio Valdez,,Boombox Cartel,La Delio Valdez
6,WOS,,King Princess,,King Princess
7,,Rita Ora,Louta,Dani,Louta
8,Yungblud,,,Kaydy Cain,Yungblud
9,,DUKI,Two Feet,Bizarrap,Two Feet
