In [1]:
import os
from os import path
import json

import networkx as nx
from networkx.algorithms import bipartite
from cdlib import algorithms, classes, evaluation, readwrite

class SpotifyGraph():

    def __init__(self, dir, features_dir):

        self.base_dir = path.join(dir, "dataset")
        self.save_dir = path.join(dir, "results")
        self.tracks_pth = path.join(self.base_dir, "tracks.json")
        self.col_pth = path.join(self.base_dir, "collections.json")
        self.graph_pth = path.join(self.base_dir, "graph.json")

        self.ft_dir = features_dir
        self.features_dict = {}

        self.load()


    def load(self):
        print("Loading graph...")
        # with open(self.tracks_pth, "r", encoding="utf-8") as f:
        #     self.tracks = json.load(f)
        # with open(self.col_pth, "r", encoding="utf-8") as f:
        #     self.collections = json.load(f)
        with open(self.graph_pth, "r", encoding="utf-8") as f:
            self.graph = json.load(f)
    
    def to_nx_graph(self):
        '''Get dataset as a NetworkX graph.'''
        
        g = nx.Graph()
        #g.add_nodes_from(self.graph["collections"], bipartite=0)
        #g.add_nodes_from(self.graph["tracks"], bipartite=1) 
        edge_tuples = [ (e["from"], e["to"]) for e in self.graph["edges"][:1000] ] 
        g.add_edges_from( edge_tuples )

        # track_ids = list(self.graph["tracks"])
        # col_ids = list(self.graph["collections"])

        return g#, track_ids, col_ids
    
    def get_playlists_vs_albums(self):
        playlist_ids, album_ids = [],[]
        for id,info in self.collections.items():
            if "playlist" in info["type"]:
                playlist_ids.append(id)
            elif "album" in info["type"]:
                album_ids.append(id)

        return playlist_ids, album_ids
    
    def get_playlists_by_keywords(self, keywords):
        playlist_ids = []

        def keywords_in_info(keywords, info):
            return True if (any(word in info["name"].lower() for word in keywords) or \
                            any(word in info["description"].lower() for word in keywords)) else False

        for id,info in self.collections.items():
            if "playlist" in info["type"] and keywords_in_info(keywords, info):
                playlist_ids.append(id)

        return playlist_ids
    
    def get_projected_graph(self, graph, is_multigraph=False):
        G_projected = bipartite.projected_graph(graph, graph.nodes, multigraph=is_multigraph)
        return G_projected
    
    def save_community(self, pred, algo_name):
        readwrite.write_community_csv(pred, path.join(self.save_dir, "{}_communities.csv".format(algo_name)), ",")

    def find_communities(self, g, algorithm):
        algorithm_name = algorithm.__name__
        try:
            print("Starting community detection for {} algorithm".format(algorithm_name))
            if algorithm_name == "overlapping_seed_set_expansion":
                #list of nodes as seeds (preferably each in different community)
                list_of_seeds = []
                community_prediction = algorithm(g, seeds=list_of_seeds)
            else:
                community_prediction = algorithm(g)
            self.save_community(community_prediction, algorithm_name)
        except Exception as e:
            print("Error with {} algorithm".format(algorithm_name))
            print(type(e), e)
        else:
            print("Saved communities file for {} algorithm".format(algorithm_name))


    # Example usage of the SpotifyGraph dataset class
    

    # JSON COLLECTIONS STRUCTURE FOR EACH PLAYLIST - example
    # "type": "playlist",
    # "name": "Adrenaline Workout",
    # "num_tracks": 31,
    # "description": "If your workout doubles as an outlet for your aggression",
    # "ztracks": [ track ids ]


# to je iz hw3 sam sample 

            # g = girvan_newman_graph(mi)
            # louvain = algorithms.louvain(g)
            # walktrap = algorithms.walktrap(g)
            # label_prop = algorithms.label_propagation(g)
            # true_labels = classes.NodeClustering([[3*i + j for i in range(24)] for j in range(3)], g)

            # a += evaluation.normalized_mutual_information(true_labels, louvain).score
            # b += evaluation.normalized_mutual_information(true_labels, walktrap).score
            # c += evaluation.normalized_mutual_information(true_labels, label_prop).score

            ##############################################################################

            # truth = [[i for i in range(1000)]]
            # g = nx.gnm_random_graph(1000, 1000*k)
            # true_labels = classes.NodeClustering(truth, g)
            # louvain = algorithms.louvain(g)
            # walktrap = algorithms.walktrap(g)
            # label_prop = algorithms.label_propagation(g)

            # a += evaluation.variation_of_information(true_labels, louvain).score
            # b += evaluation.variation_of_information(true_labels, walktrap).score
            # c += evaluation.variation_of_information(true_labels, label_prop).score



In [2]:
root = os.getcwd()
dataset = SpotifyGraph(root, None)
g = dataset.to_nx_graph()
print("Starting projection...")
g = dataset.get_projected_graph(g)

    # GT_IDS for evaluation after community detection
    #playlist_ids, album_ids = dataset.get_playlists_vs_albums()


    # hand picked filter words that occour in name or description of the playlists
    #keywords = ["fitness", "workout"]       
    #selected_ids = dataset.get_playlists_by_keywords(keywords)

Loading graph...
Starting projection...


In [3]:
list_of_overlapping_algorithms = [algorithms.aslpaw, 
                                  algorithms.dcs, 
                                  algorithms.lais2,
                                  algorithms.overlapping_seed_set_expansion,
                                  algorithms.umstmo,
                                  algorithms.percomvc,
                                  ]
print("Starting community detection...\n")
for algo in list_of_overlapping_algorithms:
    dataset.find_communities(g, algo)
    print()

Starting community detection...

Starting community detection for aslpaw algorithm
Error with aslpaw algorithm
<class 'ModuleNotFoundError'> Optional dependency not satisfied: install gmpy (conda install gmpy2) and ASLPAw (pip install shuffle_graph>=2.1.0 similarity-index-of-label-graph>=2.0.1 ASLPAw>=2.1.0). If using a notebook, you need also to restart your runtime/kernel.

Starting community detection for dcs algorithm
Saved communities file for dcs algorithm

Starting community detection for lais2 algorithm
Saved communities file for lais2 algorithm

Starting community detection for overlapping_seed_set_expansion algorithm
Saved communities file for overlapping_seed_set_expansion algorithm

Starting community detection for umstmo algorithm
Saved communities file for umstmo algorithm

Starting community detection for percomvc algorithm
Saved communities file for percomvc algorithm

