# Looking at overall song structure

In this notebook, I'm going to look at song structure (intro -> verse -> chorus that kinda thing)

In [1]:
import re
from itertools import chain
import json
import os
from itertools import chain
import pandas as pd

In [19]:
# get rid of numbers and such in region label
def clean_region(rname):
    rname = rname.lower()
    
    return ''.join([i for i in rname if not i.isdigit()]).strip()

# obtain structure dict for a list of files\
def get_structure_dict(file_list):
    structure_dict = {}

    for fname in file_list:
        with open(fname) as json_file:
            data = json.load(json_file)

        sections = [clean_region(data[x]['type'][1:-1]) for x in data]
        sections.append("EOS")

        for i in range(len(sections)-1):
            if sections[i] not in structure_dict:
                structure_dict[sections[i]] = {}

            if sections[i+1] not in structure_dict[sections[i]]:
                structure_dict[sections[i]][sections[i+1]] = 1
            else:
                structure_dict[sections[i]][sections[i+1]] += 1
                
    return (structure_dict)
    
# TODO: fix this
def clean_structure_dict(structure_dict):
    # if a value is only in here once it's probably specialized to the tab it was taken from
    
    unique_values = []
    
    for region_id in structure_dict:
        sub_dict = structure_dict[region_id]
        
        # get the number of regions it maps after
        total_values = sum([sub_dict[k] for k in sub_dict])
        
        if total_values == 1:
            unique_values.append(region_id)
            
    
    # if there are no unique values then just return the dict
    if len(unique_values) == 0:
        return (structure_dict)
    
    # remove all unique values from sub dictionaries
    for region_id in structure_dict:
        for val_to_remove in unique_values:
            if val_to_remove in structure_dict[region_id]:
                structure_dict[region_id].pop(val_to_remove)
            
    # remove all unique values from the main dictionary
    for val_to_remove in unique_values:
        structure_dict.pop(val_to_remove)

    return(structure_dict)
        
        

## Collecting data

In [20]:
input_dir = "/Volumes/SECONDDRIVE/prog/ug/chord_dicts/2000/"
structure_dict = clean_structure_dict(get_structure_dict([input_dir + x for x in os.listdir(input_dir)]))

In [21]:
print(structure_dict)

{'intro': {'verse': 14}, 'verse': {'chorus': 44, 'instrumental': 1, 'pre-chorus': 10, 'verse': 4, 'bridge': 1, 'EOS': 1}, 'chorus': {'verse': 24, 'bridge': 11, 'EOS': 7, 'instrumental': 8, 'solo': 2, 'outro': 12, 'chorus': 3, 'pre-chorus': 2}, 'bridge': {'solo': 1, 'chorus': 11, 'outro': 1, 'pre-chorus': 1, 'verse': 4}, 'solo': {'verse': 2, 'chorus': 1}, 'instrumental': {'bridge': 3, 'verse': 2, 'chorus': 1, 'outro': 1, 'solo': 1}, 'outro': {'EOS': 13}, 'pre-chorus': {'chorus': 12, 'verse': 2}}


## Getting transition probability matrix

In [22]:
structure_df = pd.DataFrame.from_dict(structure_dict).fillna(0)

In [23]:
structure_df

Unnamed: 0,intro,verse,chorus,bridge,solo,instrumental,outro,pre-chorus
verse,14.0,4.0,24,4.0,2.0,2.0,0.0,2.0
chorus,0.0,44.0,3,11.0,1.0,1.0,0.0,12.0
instrumental,0.0,1.0,8,0.0,0.0,0.0,0.0,0.0
pre-chorus,0.0,10.0,2,1.0,0.0,0.0,0.0,0.0
bridge,0.0,1.0,11,0.0,0.0,3.0,0.0,0.0
EOS,0.0,1.0,7,0.0,0.0,0.0,13.0,0.0
solo,0.0,0.0,2,1.0,0.0,1.0,0.0,0.0
outro,0.0,0.0,12,1.0,0.0,1.0,0.0,0.0


In [24]:
for col in structure_df.columns:
    structure_df[col] = structure_df[col]/sum(structure_df[col])

In [25]:
structure_df

Unnamed: 0,intro,verse,chorus,bridge,solo,instrumental,outro,pre-chorus
verse,1.0,0.065574,0.347826,0.222222,0.666667,0.25,0.0,0.142857
chorus,0.0,0.721311,0.043478,0.611111,0.333333,0.125,0.0,0.857143
instrumental,0.0,0.016393,0.115942,0.0,0.0,0.0,0.0,0.0
pre-chorus,0.0,0.163934,0.028986,0.055556,0.0,0.0,0.0,0.0
bridge,0.0,0.016393,0.15942,0.0,0.0,0.375,0.0,0.0
EOS,0.0,0.016393,0.101449,0.0,0.0,0.0,1.0,0.0
solo,0.0,0.0,0.028986,0.055556,0.0,0.125,0.0,0.0
outro,0.0,0.0,0.173913,0.055556,0.0,0.125,0.0,0.0


In [26]:
structure_df = structure_df.transpose()

In [27]:
structure_df

Unnamed: 0,verse,chorus,instrumental,pre-chorus,bridge,EOS,solo,outro
intro,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
verse,0.065574,0.721311,0.016393,0.163934,0.016393,0.016393,0.0,0.0
chorus,0.347826,0.043478,0.115942,0.028986,0.15942,0.101449,0.028986,0.173913
bridge,0.222222,0.611111,0.0,0.055556,0.0,0.0,0.055556,0.055556
solo,0.666667,0.333333,0.0,0.0,0.0,0.0,0.0,0.0
instrumental,0.25,0.125,0.0,0.0,0.375,0.0,0.125,0.125
outro,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
pre-chorus,0.142857,0.857143,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
structure_df.to_csv("song_structure.csv")

In [None]:
#TODO: get stats for what is most frequently started with

In [None]:
#TODO: get distribution of the number of sections

## Visualize results

In [13]:
#TODO: define a specific node shape & color for each column name in structure_df 

In [14]:
#TODO: write a function to visualize the structure of an individual song

In [15]:
#TODO: visualize the overall markov chain