Notebook for preparing edge sheet from network-graph.csv

In [1]:
import csv
import json
import pandas as pd
from itertools import chain, combinations
from collections import Counter
import functools
import operator
from tqdm.notebook import tqdm

In [2]:
# set pandas display options
pd.set_option("display.max_rows", 500)
pd.set_option('display.max_colwidth', 500)
pd.set_option("display.max_columns", 200)

In [10]:
conv_file = "/Users/geistling/Documents/1_Projects/bandcamp/bandcamp_viz/notebooks/network-graph.csv"
edge_file = "/Users/geistling/Documents/1_Projects/bandcamp/bandcamp_viz/notebooks/edge_sheet.csv"
node_file = "/Users/geistling/Documents/1_Projects/bandcamp/bandcamp_viz/notebooks/node_sheet.csv"

In [11]:
album_data = pd.read_csv(conv_file)

# 1. Produce edge_sheet.csv from network-graph.csv

In [12]:
# global tallying, edges
grouped = album_data.groupby('standard_name')
Collected = pd.DataFrame(columns =['genre1', 'genre2','count','location'])

for place, group in grouped:
    edge_list = []
    genre_list = group['genre']
    for item in genre_list:
        genres_in_list = sorted(list(item.split(",")))
        genre_combs = combinations(genres_in_list, 2)
        # return list of tuples
        for combination in genre_combs:
            edge_list.append([combination])
    edge_tally = Counter(chain(*edge_list))

    edge_tallies_dict = dict(edge_tally)

    edgeprep_dataframe = pd.DataFrame.from_dict(edge_tallies_dict, orient='index')
    edgeprep_dataframe.rename(columns = {0:'count'}, inplace = True) 
    edgeprep_dataframe['genre_pair'] = edgeprep_dataframe.index
    edgeprep_dataframe['row'] = edgeprep_dataframe.reset_index().index
    edgeprep_dataframe = edgeprep_dataframe.set_index(['row'])

    genre_edges = pd.DataFrame(list(edgeprep_dataframe['genre_pair']), columns =['genre1', 'genre2']) 

    count = edgeprep_dataframe['count']
    genre_edges = genre_edges.join(count)
    genre_edges['location'] = place
    Collected = Collected.append(genre_edges)

In [13]:
Collected.to_csv(edge_file, index=False)

In [14]:
Collected.head()

Unnamed: 0,genre1,genre2,count,location
0,ebm,electronic,28,"Adelaide SA, Australia"
1,ebm,electronica,23,"Adelaide SA, Australia"
2,ebm,industrial,27,"Adelaide SA, Australia"
3,electronic,electronica,63,"Adelaide SA, Australia"
4,electronic,industrial,27,"Adelaide SA, Australia"


# 2. produce json file from the edge and node sheet

In [15]:
json_file = "/Users/geistling/Documents/1_Projects/bandcamp/bandcamp_viz/data/network_graph.json"

In [16]:
with open(json_file, "r") as jsonFile:
    json_output = json.load(jsonFile)
node_reader = pd.read_csv(node_file)

# idea - 
for row in node_reader.itertuples():
    place_obj = {'n':[], 'l':[]}
    place_matches = [obj for obj in json_output if obj['ct'] == row.location]
    if not place_matches:
#         print("NEW PLACE:", row.location)
        place_obj['ct'] = row.location
        place_obj['cor'] = [row.lng, row.lat]
        json_output.append(place_obj)
    elif place_matches:
        place_obj = place_matches[0]
    place_obj["n"].append({"g":row.genre,"c":row.count,"r":round(row.relative, 3)})

link_reader = pd.read_csv(edge_file)
for row in link_reader.itertuples():
#     place_matches = [json_output[json_output['ct'].str.contains(row.location)]]
    place_matches = [obj for obj in json_output if obj['ct'] == row.location]
    if not place_matches:
        print(row)
    place_obj = place_matches[0]
    place_obj["l"].append({"g1":row.genre1,"g2":row.genre2,"c":row.count})

with open(json_file, "w") as jsonFile:
    json.dump(json_output, jsonFile, separators=(',', ':'))
