In [1]:
import pandas as pd
import numpy as np
import json
import networkx as nx

In [2]:
df = pd.read_csv("musicBrainz/albums_top5000.csv",sep=",")
df = df.reset_index()

In [163]:
df['album_id'].nunique()

3494

In [3]:
albums_json = None
with open('data/albums/missing_albums_details.json',encoding="UTF-8") as f:
    albums_json = json.load(f)

In [4]:
def get_albums_info_from_json(tuple):
    #print(tuple)
    albums_id = np.int64(tuple["id"])
    album_title = tuple["title"]
    extra_artists = tuple["extraartists"]
    artists = list()
    for art in extra_artists : 
       artists.append({"id":art["id"],"name":art["name"],"role": art["role"]})
    #label
    #rank  
    return (albums_id,album_title,artists)  
        

In [7]:
def create_subgraph(G,album_id,album_title,artist_id,artist_name,nb_album,missing,role) :
    artist_id = np.int64("999" + str(artist_id))
    if not G.has_node(album_id) :
        nb_album += 1
        missing.append(album_id)
        G.add_node(album_id,title= album_title, type='Album' )

    if role.upper().startswith("A&R") :
           return (G,nb_album,missing)    
    if not G.has_node(artist_id) : 
        G.add_node(artist_id,name = artist_name, type='Artist' )

    if not G.has_edge(album_id,artist_id) :   
       G.add_edge(album_id,artist_id,weight = 1)    
    else :
        G[album_id][artist_id]['weight'] = G[album_id][artist_id]['weight'] + 1    

    return (G,nb_album,missing)    

In [8]:
def create_album_artist_bipartite_graph(missing) :
    G = nx.Graph()
    nb_album = 0

    for i in range(0,len(df)):

        role = df['role'][i]
        #if role.upper().startswith("A&R") :
         #   continue
        album_id = df['album_id'][i]
        album_title = df['title'][i]
        artist_id = df['artist_id'][i]
        artist_name = df['artist_name'][i]
        G,nb_album,missing = create_subgraph(G,album_id,album_title,artist_id,artist_name,nb_album,missing,role)
        
    #Json part 
    for alb_tuple in albums_json :      
        tuple_album = get_albums_info_from_json(alb_tuple) 
        album_id = tuple_album[0]
        album_title = tuple_album[1]
        for art in tuple_album[2] :
            role = art["role"]
            #if role.upper().startswith("A&R") :
             #  continue
            artist_id = art["id"]
            artist_name = art["name"]
            role = art["role"]
            G,nb_album,missing= create_subgraph(G,album_id,album_title,artist_id,artist_name,nb_album,missing,role)

    return (G,nb_album,missing)

In [9]:
missing = list()
bipartite_g,nb_album,missing= create_album_artist_bipartite_graph(missing)
print(len(missing))

4651


In [147]:
ranks= pd.DataFrame({'album_id':missing})
results = df.loc[~df['album_id'].isin(ranks['album_id']), 'album_id'].unique()

In [148]:
for i in results : 
    print(i)

In [39]:
nx.write_gexf(bipartite_g, "./networks/gephi/discogs_bipartit_graph.gexf")


In [10]:
print("nb nodes: ",len(bipartite_g.nodes))
print("nb edges: ",len(bipartite_g.edges))
deg = dict(nx.degree(bipartite_g))
deg = list(deg.values())
print("Min node degree: ",np.min(deg))
print("Max node degree: ",np.max(deg))
print("Avg node degree: ",np.mean(deg))

nb nodes:  29122
nb edges:  56768
Min node degree:  1
Max node degree:  525
Avg node degree:  3.8986333356225535


In [178]:
topmusician = sorted(bipartite_g.degree, key=lambda x: x[1], reverse=True)[1]

In [179]:
topmusician

(8427988, 333)

In [116]:
import uuid

In [117]:
for i in range(0,5040) :
    print(str(uuid.uuid4()))

3730382d-1837-41b0-abc7-52eb8b667840
9219860f-45de-4185-a798-d60a3345ac49
92c5e99f-7683-4bde-8856-74c327ef78d3
bb80a1e6-30e3-4197-ae20-e1b710c47fa3
f5fa1029-0a0f-456a-87f0-93f2b006beab
5a9468bd-fd63-4d7e-be9c-192c5f788a5b
62f44d0d-7b53-4aac-92ac-7204b125cd15
7d9c6c6e-f2a8-401a-a15f-4a45ad6b19eb
91b21788-2c12-4ce3-aa65-e6dfa1b473d5
a6cde2c1-6a55-4d4b-846d-52cbb462d604
04098f21-c4aa-4e12-ac9f-543954f84e92
eb281e02-f5fb-49de-b386-9bf41cbef49b
08684fbb-6f41-45da-8425-2b5ca7f51508
c89951d7-7274-4abf-b668-f7167400b7c0
6bc4120d-5d1a-4f5e-977a-69b81ec1699f
0f6948b6-aacb-4247-9e92-c99ea4cca58b
67dd363a-fba2-46c1-8235-dd1a8e8d54a2
a359f049-f3b5-445b-b9df-a21dc6930f3f
a1056e0b-39b2-4f76-80c0-2340ef05fda6
5fa8827e-21d0-4443-8077-7295768f5724
64a2d6e6-a0ff-40d5-8bd4-97b9cb3e9187
6594914e-dbf9-47bb-bff1-caf6df0f7c1d
d7372d79-01bc-468e-8357-1a61f2fa8de4
78118eec-2282-45c6-a8d8-7cb0f6435966
f5f32cf4-3898-40fd-b437-b9530abd6fe0
12adcb8e-28b8-4d5b-9b69-01c5b421422a
f489704c-1a0f-4552-87f5-ab16e830587d
e