In [16]:
import psycopg2, joblib, os, ast
import pandas as pd
import numpy as np
import networkx as nx
from random import seed


In [17]:
seed(4)

In [18]:
INPUT_PATH = '../data'

In [19]:
def get_query(_sql_cnx, query, fetchall:bool=True, return_cursor:bool=False):
    
    cursor = _sql_cnx.cursor()
    cursor.execute(query)
    if fetchall:
        result = cursor.fetchall()
    else:
        result = cursor.fetchone()
   
    if return_cursor:
        return result, cursor
    
    else:
        cursor.close()
        return result

In [20]:
SQL_CNX = psycopg2.connect(host='localhost', database='rapdashboard', user='leo')

In [21]:
query = 'SELECT * FROM artist'
result, cursor = get_query(SQL_CNX, query, return_cursor=True)

columns = [column.name for column in cursor.description]
cursor.close()

artist_table = pd.DataFrame(result, columns=columns)
artist_table.replace('NULL', np.NaN, inplace=True)

for column in artist_table.select_dtypes('object').columns:
    artist_table[column] = artist_table[column].astype(str).str.replace('%22', '\'', regex=True).str.replace('%27', '\"', regex=True)
    
query = 'SELECT * FROM album'
    
result, cursor = get_query(SQL_CNX, query, return_cursor=True)

columns = [column.name for column in cursor.description]
cursor.close()

album_table = pd.DataFrame(result, columns=columns)
album_table.replace('NULL', np.NaN, inplace=True)

for column in album_table.select_dtypes('object').columns:
    album_table[column] = album_table[column].astype(str).str.replace('%22', '\'', regex=True).str.replace('%27', '\"', regex=True)

query = 'SELECT * FROM artist_album'

result, cursor = get_query(SQL_CNX, query, return_cursor=True)

columns = [column.name for column in cursor.description]
cursor.close()

artist_album_table = pd.DataFrame(result, columns=columns)
artist_album_table.replace('NULL', np.NaN, inplace=True)

In [22]:
# create an table with artist id, name and nb featuring
data = album_table.merge(artist_album_table, on='album_id', how='left')
data = data.merge(artist_table, on='artist_id', how='left')

In [23]:
artist_table.to_csv(os.path.join(INPUT_PATH, 'artist.csv'), columns=['artist_id', 'artist_name'], index=False)

In [24]:
data.loc[:, 'featuring_id_list'] = data.featuring_id_list.apply(lambda x: ast.literal_eval(x))
data = data.groupby('artist_id').featuring_id_list.sum().to_frame().reset_index()
data.artist_id = data.artist_id.astype(int)

In [25]:
# remove when the artist is in the featuring list
data.loc[:, 'featuring_id_list'] = data.apply(lambda x: [a for a in x['featuring_id_list'] if a != x['artist_id']], axis=1)

In [26]:
data['featuring_count'] = data.featuring_id_list.apply(len)
data['featuring_unique'] = data.featuring_id_list.apply(lambda x: len(set(x)))

In [27]:
# remove artist without featuring
data = data.loc[data.featuring_unique > 0]

In [28]:
# complete with artist in featuring list but not in artist id 

In [29]:
all_featurings = data.featuring_id_list.explode().unique()
featuring_not_in_artist_ids = all_featurings[~np.isin(all_featurings, data.artist_id)]

In [30]:
add_data = dict()
for index, row in data.iterrows():
    
    for artist_id in row['featuring_id_list']:
        if artist_id in featuring_not_in_artist_ids:
            if not artist_id in add_data:
                add_data[artist_id] = [row['artist_id']]
            else:
                if not row['artist_id'] in add_data[artist_id]:
                    add_data[artist_id].append(row['artist_id'])

In [31]:
add_data = pd.Series(add_data).to_frame().reset_index().rename(columns={'index': 'artist_id', 0: 'featuring_id_list'})

In [32]:
data = pd.concat([data, add_data], ignore_index=True)

In [33]:
data['featuring_count'] = data.featuring_id_list.apply(len)
data['featuring_unique'] = data.featuring_id_list.apply(lambda x: len(set(x)))

In [34]:
data = data.merge(artist_table, on='artist_id', how='left')

In [35]:
def rolland_gamos(artist_id:int, featuring_list:list, level:int=5, p:int=0, log:list=list()):
        
    print('p:', p)
    print('log:', log)
    
    if not artist_id in featuring_list:
        print('Fail ! No featuring between these artists.')
        
    elif not artist_id in log:
        print('Fail ! Artist already played.')
    
    else:
        log.append(artist_id)
        
        n_featurings = data.loc[data.artist_id == artist_id, 'featuring_unique'].iloc[0]
        print(n_featurings)
        if n_featurings < level:
            print('Win ! Level win.')
        else:
            l = np.random.randint(0, 100)
            if p >= l:
                print('Win ! Prob fail reach.')
            else:
                featuring_list = data.loc[data.artist_id == artist_id, "featuring_id_list"].iloc[0]
                featuring_list = data.loc[
                    (data.artist_id.isin(featuring_list)) 
                    & (data.featuring_unique >= level) 
                    & (~ data.artist_id.isin(log)),
                    'artist_id']
                
                if len(featuring_list) == 0:
                    print('Win ! No featuring found.')
                else:
                    artist_id = np.random.choice(featuring_list)
                    log.append(artist_id)
                    featuring_list = data.loc[data.artist_id == artist_id, "featuring_id_list"].iloc[0]

                    print(featuring_list)
                    artist_id = int(input("Enter an artist ID:\n"))
                    rolland_gamos(artist_id, featuring_list, level, p+1, log)

In [36]:
artist_dict = dict(artist_table.set_index('artist_id').artist_name)

In [42]:
joblib.dump(artist_dict, '../data/artist_names.p')

['../data/artist_names.p']

In [37]:
G = joblib.load(os.path.join(INPUT_PATH, 'graph.p'))

In [45]:
G.has_edge(248, 12)

True

In [42]:
G.edges()[12][248]

TypeError: cannot unpack non-iterable int object

In [20]:
def rolland_gamos(artist_id:int, featuring_list:list, level:int=5, p:int=0, log:list=list()):
        
    print(f'p: {p} %')

    if artist_id in log:
        print('Fail ! Artist already played.')
        
    elif not artist_id in featuring_list:
        print('Fail ! No featuring between these artists.')
            
    else:
        log.append(artist_id)
        
        featuring_list = [n1 for n0, n1 in G.edges([artist_id])]
                
        if len(featuring_list) < level:
            print('Win ! Level win.')
        else:
            l = np.random.randint(0, 100)
            if p >= l:
                print('Win ! Prob fail reach.')
            else:
                print('log:', log)
                featuring_list = [node for node in featuring_list if len(G.edges([node])) > level and not node in log]
                
                if len(featuring_list) == 0:
                    print('Win ! No featuring found.')
                else:
                    artist_id = np.random.choice(featuring_list)
                    log.append(artist_id)
                    print('New artist:', artist_id)
                    
                    featuring_list = [n1 for n0, n1 in G.edges([artist_id])]
                    print(featuring_list)
                    
                    artist_id = int(input("Enter an artist ID:\n"))
                    rolland_gamos(artist_id, featuring_list, level, p+1, log)
        
    

In [21]:
start_artist_list = [node for node in G.nodes() if len(G.edges([node])) > 50]

In [25]:
seed = 42
first_artist_id = np.random.choice(start_artist_list)
featuring_list = [n1 for n0, n1 in G.edges([first_artist_id])]
print(first_artist_id)
print(featuring_list)
print()
artist_id = int(input("Enter an artist ID:\n"))
rolland_gamos(artist_id, featuring_list, log=[first_artist_id])

32
[7297, 2817, 40, 5811, 7445, 5494, 918, 3930, 123, 22, 74, 229, 232, 3162, 7472, 112, 409, 282, 3003, 2175, 4479, 5674, 7456, 80, 6940, 6626, 2052, 518, 391, 6252, 6124, 6926, 2641, 6802, 4028, 330, 325, 352, 333, 16, 82, 52, 157, 407, 31, 780, 30, 78, 33, 3245, 1681, 1815, 2524, 1638, 1640]

Enter an artist ID:
22
p: 0 %
log: [32, 22]
New artist: 232
[45, 56, 44, 102, 205, 220, 32, 482, 43, 5005, 2842, 3003, 348, 3197, 31, 192, 4613, 17, 27, 5534, 33, 5680, 7473, 1973, 2364, 201, 1740, 4178, 2145, 7393, 2543, 367, 4467, 6011, 295, 2404, 2884, 88, 7514, 412, 80, 242, 13, 398, 144, 6931, 7342, 312, 64, 6084, 204, 213, 1112, 100, 6125, 118, 321, 5057, 391, 408, 91, 7581, 434, 478, 489, 566, 352, 159, 75, 520, 2275, 4245, 22, 762, 261, 764, 719, 5813, 46, 36, 6, 320, 766, 831, 5579, 549, 507]
Enter an artist ID:
31
p: 1 %
log: [32, 22, 232, 31]
New artist: 322
[6630, 905, 409, 3355, 188, 1123, 7, 5363, 4729, 4858, 31, 4457, 5869, 3379, 2868, 1109, 3935]
Enter an artist ID:
4457
p: 2 %


In [38]:
start_artist_id_list = [node for node in G.nodes() if len(G.edges([node])) > 70]

In [11]:
artist_dict = joblib.load('../data/artist_names.p')

In [13]:
artist_dict_r = {artist_name:artist_id for artist_id, artist_name in artist_dict.items()}


In [33]:
len(G.edges([126]))

52

In [39]:
[artist_dict[a] for a in start_artist_id_list]

['Booba',
 'Lino',
 'Gims',
 'LIM',
 'Oxmo Puccino',
 'Youssoupha',
 'Disiz',
 'Alonzo',
 'Kery James',
 'Seth Gueko',
 'Grödash',
 'Ol’ Kainry',
 'Mystik',
 'Rim’K',
 'La Fouine',
 'Swift Guad',
 'Sofiane',
 'Dosseh',
 'Alkpote',
 'Sadek',
 'Soprano',
 'Disiz',
 'Rockin’ Squat',
 'Mister You',
 'Rohff',
 'Lacrim',
 'DJ Hamida',
 'Leto',
 'JuL',
 'Lacraps',
 'DJ Weedim',
 'Jok’Air',
 'Néochrome',
 'Grünt',
 'Rentre dans le Cercle',
 'Le Classico Organisé']

In [None]:
pd.re

In [None]:
G.nodes()

In [32]:
artist_dict_r["Zesau"] in start_artist_id_list

True

In [36]:
len(start_artist_id_list)

48