In [12]:
# Imports
import numpy as np
import pandas as pd 
import networkx as nx
from networkx.algorithms import community
import community


import ast
import matplotlib.pyplot as plt
from joblib import Parallel, delayed


from collections import defaultdict
from scipy.cluster import hierarchy
from scipy.spatial import distance
from networkx.drawing.nx_agraph import graphviz_layout



In [13]:
df_allG = pd.read_csv('all_genres.csv')
df_allG = df_allG.set_index('GENRE')
df_allG.head()

Unnamed: 0_level_0,Unnamed: 0,SIM_GENRES,SIM_WEIGHTS,OPP_GENRES,OPP_WEIGHTS,REL_ARTISTS,ARTIST_WEIGHTS,SPOTIFY_URL
GENRE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
backgroundmusic,0,"['reiki', 'calminginstrumental', 'yoga', 'back...","['103', '120', '101', '240', '102', '125', '11...","['mahraganat', 'divahouse', 'oldschoolukhiphop...","['160', '100', '121', '110', '135', '136', '12...","['NoemiNucci', 'JuditheAbelsen', 'FabienneWill...","['113', '117', '106', '131', '109', '115', '11...",https://open.spotify.com/user/thesoundsofspoti...
classicmoroccanpop,1,"['bluesrock', 'andalusianclassical', 'chaabima...","['103', '107', '109', '100', '102', '103', '10...","['shiverpop', 'gravewave', 'tassieindie', 'est...","['112', '112', '105', '102', '124', '100', '16...","['HajHusseinToulali', 'AbdelhadiBelkheyat', 'H...","['102', '103', '103', '108', '108', '102', '10...",https://open.spotify.com/user/thesoundsofspoti...
orthodoxchant,2,"['universitychoir', 'polishchoir', 'ukrainianc...","['102', '105', '107', '104', '106', '240', '10...","['sambass', 'funkybreaks', 'hardbass', 'hardco...","['107', '110', '158', '100', '119', '111', '10...","['LegeArtis', 'PeterMichaelides', 'CapellaGreg...","['114', '100', '160', '127', '115', '115', '10...",https://open.spotify.com/user/thesoundsofspoti...
tar,3,"['oud', 'persiantraditional', 'bansuri', 'tar'...","['106', '120', '101', '240', '103', '113', '10...","['happyhardcore', 'aggrotech', 'nightcore', 'b...","['109', '107', '127', '114', '115', '120', '12...","['KiyarashSaket', 'YusefForutan', 'HamidMoteba...","['100', '100', '111', '100', '100', '100', '10...",https://open.spotify.com/user/thesoundsofspoti...
reggae,4,"['dancehall', 'brazilianreggae', 'skarevival',...","['109', '105', '114', '101', '100', '127', '11...","['brazilianlo-firock', 'martialindustrial', 'p...","['106', '107', '155', '121', '160', '108', '14...","['TheItals', 'MykalRose', 'JohnnieClark', 'Asw...","['101', '102', '100', '105', '101', '105', '10...",https://open.spotify.com/user/thesoundsofspoti...


In [14]:
popular_genres = pd.read_csv('top_genres_scored.csv', header = None)
popular_genres.head()

Unnamed: 0,0,1
0,pop,1885.0
1,poprap,1864.0
2,hiphop,1557.0
3,rap,1511.0
4,southernhiphop,1196.0


In [15]:
df_allG = df_allG.loc[popular_genres[0].values]

In [16]:
len(df_allG)

75

In [22]:
'''
df: Dataframe
support: The minimum number of connections a node must have to be incorporated into the set of genres

Useful for checking the genres that have over <support> connections.
'''
def set_genre(df, support: int = -1):
    set_genres = set()
    for idx, row in df.iterrows():
        connections = ast.literal_eval(row['SIM_GENRES'])
        
         # Only add the (node,edges) into the set if the number of connections is greater than the support
        if len(connections) > support:
            set_genres.add(row['GENRE'])
            set_genres.update(connections)
    return (support, set_genres)

'''
df: Dataframe
support: The minimum number of connections a node must have to be incorporated into the graph
n: The number of rows to traverse on the Dataframe (only useful for testing)
'''
def generate_G(df, pop_nodes, n = 5000):
    G = nx.Graph()
    for idx, row in df.iterrows():
        connections = ast.literal_eval(row['SIM_GENRES'])
        weights = ast.literal_eval(row['SIM_WEIGHTS'])
        
        # Only add the (node,edges) if the number of connections is greater than the support
        G.add_node(idx)
        edges = [(idx, connections[i], int(weights[i])) for i in range(len(connections)) if connections[i] in pop_nodes]
        G.add_weighted_edges_from(edges)
            
        # Useful if you only need a small sample (for testing mainly)
        if idx == n:
            break
    return G

def plot_network(G, set_genres):
    plt.rcParams['figure.dpi'] = 300

    fig, ax = plt.subplots(1, figsize = (16,9))

    pos = nx.spring_layout(G)
    nx.draw_networkx_nodes(G, pos, nodelist = list(set_genres), node_size = 20, alpha = 1)
    nx.draw_networkx_edges(G, pos, width = 1.0, alpha = 1, connectionstyle = 'arc3, rad=1')
#     labels = nx.draw_networkx_labels(G, pos, font_size = 3)

    ax.axis('off')
    plt.show()
#     fig.savefig('./images/spring_layout_subgenres.pdf', bbox_inches='tight')

In [23]:
G1 = generate_G(df_allG, popular_genres[0].values, 2)

In [24]:
louvian_mod_max = community.best_partition(G1, partition=None, weight = 'weight', resolution=1.0, randomize = None, random_state = None)

In [25]:
louvian_mod_max

{'pop': 0,
 'indiepoptimism': 1,
 'hiphop': 0,
 'tropical': 2,
 'electrohouse': 3,
 'southernhiphop': 0,
 'urbancontemporary': 0,
 'poprap': 0,
 'rap': 0,
 'latin': 2,
 'edm': 3,
 'electropop': 1,
 'dancepop': 0,
 'dirtysouthrap': 0,
 'undergroundhiphop': 0,
 'atlhiphop': 0,
 'melodicrap': 0,
 'indiepoprap': 0,
 'gangsterrap': 0,
 'hyphy': 0,
 'westcoasttrap': 0,
 'vaportrap': 0,
 'calirap': 0,
 'escaperoom': 1,
 'vaporsoul': 1,
 'modernrock': 1,
 'indiepop': 1,
 'chamberpop': 1,
 'indietronica': 1,
 'popedm': 3,
 'artpop': 1,
 'indiesoul': 1,
 'stompandholler': 1,
 'indierock': 1,
 'indiefolk': 1,
 'alternativeemo': 4,
 'emo': 4,
 'alternativerb': 1,
 'country': 5,
 'moderncountryrock': 5,
 'countrypop': 5,
 'texascountry': 5,
 'contemporarycountry': 5,
 'countryrock': 5,
 'countryroad': 5,
 'outlawcountry': 5,
 'reddirt': 5,
 'indierb': 1,
 'zapstep': 3,
 'electronictrap': 3,
 'vaportwitch': 3,
 'brostep': 3,
 'christianpop': 6,
 'indiecoustica': 6,
 'christianalternativerock': 6,
 '

In [27]:
pd.Series(louvian_mod_max).to_csv('genre_louv_cat.csv')

  """Entry point for launching an IPython kernel.
