<a href="https://colab.research.google.com/github/somesyd/Colab/blob/main/notebooks/genre_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# authenticate with your Google account
from google.colab import auth
auth.authenticate_user()

In [6]:
tracks = 'tracks.csv'
genres = 'genres.csv'

In [7]:
# link to the Google Cloud Platform project with the storage bucket
project_id = 'audio-philes-test'
!gcloud config set project {project_id}

Updated property [core/project].


In [10]:
# copy the 'file_name' file from the fma_metadata folder
#   audio_philes_data is the Google Storage bucket name
#   saving to a tmp directory
!gsutil cp gs://audio_philes_data/fma_metadata/{tracks} /tmp/{tracks}
!gsutil cp gs://audio_philes_data/fma_metadata/{genres} /tmp/{genres}

# print the first line of the file to check the transfer was successful
!head -n 1 /tmp/{tracks}

Copying gs://audio_philes_data/fma_metadata/tracks.csv...
- [1 files][248.4 MiB/248.4 MiB]                                                
Operation completed over 1 objects/248.4 MiB.                                    
Copying gs://audio_philes_data/fma_metadata/genres.csv...
- [1 files][  3.8 KiB/  3.8 KiB]                                                
Operation completed over 1 objects/3.8 KiB.                                      
,album,album,album,album,album,album,album,album,album,album,album,album,album,artist,artist,artist,artist,artist,artist,artist,artist,artist,artist,artist,artist,artist,artist,artist,artist,artist,set,set,track,track,track,track,track,track,track,track,track,track,track,track,track,track,track,track,track,track,track,track


In [11]:
import numpy as np
import pandas as pd
from google.colab import data_table
data_table.enable_dataframe_formatter()

In [12]:
df = pd.read_csv("/tmp/" + tracks, header=1)

# # fix issues with csv file:

# column 0 should be 'track_id' label
df.columns.values[0] = 'track_id'

# remove bad row from displayed data
df = df.iloc[1:]

# print column names
df.columns


  exec(code_obj, self.user_global_ns, self.user_ns)


Index(['track_id', 'comments', 'date_created', 'date_released', 'engineer',
       'favorites', 'id', 'information', 'listens', 'producer', 'tags',
       'title', 'tracks', 'type', 'active_year_begin', 'active_year_end',
       'associated_labels', 'bio', 'comments.1', 'date_created.1',
       'favorites.1', 'id.1', 'latitude', 'location', 'longitude', 'members',
       'name', 'related_projects', 'tags.1', 'website', 'wikipedia_page',
       'split', 'subset', 'bit_rate', 'comments.2', 'composer',
       'date_created.2', 'date_recorded', 'duration', 'favorites.2',
       'genre_top', 'genres', 'genres_all', 'information.1', 'interest',
       'language_code', 'license', 'listens.1', 'lyricist', 'number',
       'publisher', 'tags.2', 'title.1'],
      dtype='object')

In [13]:
tracks_genre = df[['track_id', 'genres', 'genre_top', 'genres_all']]
tracks_genre.head(20)

Unnamed: 0,track_id,genres,genre_top,genres_all
1,2,[21],Hip-Hop,[21]
2,3,[21],Hip-Hop,[21]
3,5,[21],Hip-Hop,[21]
4,10,[10],Pop,[10]
5,20,"[76, 103]",,"[17, 10, 76, 103]"
6,26,"[76, 103]",,"[17, 10, 76, 103]"
7,30,"[76, 103]",,"[17, 10, 76, 103]"
8,46,"[76, 103]",,"[17, 10, 76, 103]"
9,48,"[76, 103]",,"[17, 10, 76, 103]"
10,134,[21],Hip-Hop,[21]


In [14]:
df2 = pd.read_csv("/tmp/" + genres)
df2

Unnamed: 0,genre_id,#tracks,parent,title,top_level
0,1,8693,38,Avant-Garde,38
1,2,5271,0,International,2
2,3,1752,0,Blues,3
3,4,4126,0,Jazz,4
4,5,4106,0,Classical,5
...,...,...,...,...,...
158,1032,60,102,Turkish,2
159,1060,30,46,Tango,2
160,1156,26,130,Fado,2
161,1193,72,763,Christmas,38


In [15]:
genre_id = df2['genre_id'].to_numpy()
parent_id = df2['parent'].to_numpy()
genre_title = df2['title'].to_numpy()
top_level = df2['top_level'].to_numpy()
genre_title

array(['Avant-Garde', 'International', 'Blues', 'Jazz', 'Classical',
       'Novelty', 'Comedy', 'Old-Time / Historic', 'Country', 'Pop',
       'Disco', 'Rock', 'Easy Listening', 'Soul-RnB', 'Electronic',
       'Sound Effects', 'Folk', 'Soundtrack', 'Funk', 'Spoken', 'Hip-Hop',
       'Audio Collage', 'Punk', 'Post-Rock', 'Lo-Fi', 'Field Recordings',
       'Metal', 'Noise', 'Psych-Folk', 'Krautrock', 'Jazz: Vocal',
       'Experimental', 'Electroacoustic', 'Ambient Electronic',
       'Radio Art', 'Loud-Rock', 'Latin America', 'Drone', 'Free-Folk',
       'Noise-Rock', 'Psych-Rock', 'Bluegrass', 'Electro-Punk', 'Radio',
       'Indie-Rock', 'Industrial', 'No Wave', 'Free-Jazz',
       'Experimental Pop', 'French', 'Reggae - Dub', 'Afrobeat',
       'Nerdcore', 'Garage', 'Indian', 'New Wave', 'Post-Punk', 'Sludge',
       'African', 'Freak-Folk', 'Jazz: Out', 'Progressive',
       'Alternative Hip-Hop', 'Death-Metal', 'Middle East',
       'Singer-Songwriter', 'Ambient', 'Hardcore', 

In [16]:
# dictionary lookup mapping genre_id to corresponding parent_id
genre_dict = {}

# tuple list of top_level genres (genre_id of 0)
root_genres = [] 

for i, g in enumerate(genre_id):
  genre_dict[g] = top_level[i]
  if parent_id[i] == 0:
    root_genres.append((g, genre_title[i]))
root_genres


[(2, 'International'),
 (3, 'Blues'),
 (4, 'Jazz'),
 (5, 'Classical'),
 (8, 'Old-Time / Historic'),
 (9, 'Country'),
 (10, 'Pop'),
 (12, 'Rock'),
 (13, 'Easy Listening'),
 (14, 'Soul-RnB'),
 (15, 'Electronic'),
 (17, 'Folk'),
 (20, 'Spoken'),
 (21, 'Hip-Hop'),
 (38, 'Experimental'),
 (1235, 'Instrumental')]

In [None]:
!pip install dash
!pip install jupyter-dash
!pip install dash-cytoscape

In [25]:
from dash import Dash, html
import dash_cytoscape as cyto
from jupyter_dash import JupyterDash

In [None]:
# build up the network connections for the graph
elements = []
edges = []
for i, genre in enumerate(genre_id):

  # create the node data
  data_id = {'data': {}}
  data_id['data']['id'] = str(genre_id[i])
  data_id['data']['label'] = genre_title[i]
  elements.append(data_id)

  # create the edge data
  if parent_id[i] != 0:
    data_source = {'data': {}}
    data_source['data']['source'] = str(parent_id[i])
    data_source['data']['target'] = str(genre_id[i])
    edges.append(data_source)
  
  # apply style class to top-level nodes
  else:
    data_id['classes'] = 'top_level'

# combine the node and edge data for the graph
elements.extend(edges)

In [52]:
# Build Dash App
app = JupyterDash(__name__)
app.layout = html.Div([
                       html.H2("Genre Network"),
                       cyto.Cytoscape(
                           id='cytoscape',
                           elements=elements,
                           layout={'name': 'breadthfirst'},
                           style={'width': '100%', 'height': '400px'},
                           stylesheet=[
                                       {
                                          'selector': 'node',
                                          'style': {
                                              'content': 'data(label)'
                                          }
                                       },
                                       {
                                          'selector': '.top_level',
                                          'style': {
                                              'background-color': 'blue',
                                              'line-color': 'blue'
                                          }
                                       }
                           ]
                       )
])

app.run_server(mode='inline')

<IPython.core.display.Javascript object>