# Looking at chord progressions

In this notebook I'm going to look at chord progressions (overall, not for specific sections or anything)

In [2]:
import re
from itertools import chain
import json
import os
from itertools import chain
import pandas as pd
import music_functions as mf

First, get chord-progression markov chains for each section

In [3]:
input_dir = "/Volumes/SECONDDRIVE/prog/ug/chord_dicts/"

for decade in ['1970', '1980', '1990', '2000', '2010']:
    # read in the structure dictionary
    decade_dir = input_dir + decade + "/"
    structure_dict = mf.get_chord_structure_dict([decade_dir + "/" + x for x in os.listdir(decade_dir)])
    
    # read in the distribution stats (to know what section labels to use)
    structure_vocab = pd.read_csv("Output/SongStructure/"+ decade + "_song_structure.csv")
    labels = [key for key in structure_vocab.iloc[:,0].values]
    labels.remove("StartOfSong")
    
    for label in labels:
        structure_df = pd.DataFrame.from_dict(mf.clean_structure_dict(structure_dict[label], 10)).fillna(0)

        for col in structure_df.columns:
            structure_df[col] = structure_df[col]/sum(structure_df[col])

        structure_df = structure_df.transpose()
        
        structure_df.to_csv("Output/SectionStructure/" + decade + "_" + label + ".csv")

TODO: then we want to do an overall chord progression markov chain

In [2]:
decade_dir = "/Volumes/SECONDDRIVE/prog/ug/chord_dicts/1970/"
structure_dict = mf.get_chord_structure_dict([decade_dir + x for x in os.listdir(decade_dir)])


TODO: get chord statistics

In [3]:
input_dir = "/Volumes/SECONDDRIVE/prog/ug/chord_dicts/"

for decade in ['1970', '1980', '1990', '2000', '2010']:
    # read in the structure dictionary
    decade_dir = input_dir + decade + "/"

    # create blank dataframe to store chord stats
    df = pd.DataFrame(columns=['song_name', 'total_chords', 'chords_in_key_sig', 'chords_not_in_key_sig', 
                               'num_unique_chords', 'most_frequent_chords', 'tonic', 'tonic_frequency'])
    
    all_chord_dict = {}

    for fname in os.listdir(decade_dir):


        with open(decade_dir + fname) as json_file:
            data = json.load(json_file)

        tonic=data['Tonic']

        all_chords = mf.get_all_chords_numeric(data)
        
        all_chord_dict[fname.split('.')[0]] = all_chords

        in_key_sig = mf.numerals_major
        tonic_frequency = all_chords['I']
        if tonic.endswith("m"):
            in_key_sig = mf.numerals_minor
            tonic_frequency = all_chords['i']

        song_dict = {
            'song_name': fname.split(".")[0],
            'total_chords': sum([all_chords[x] for x in all_chords]),
            'chords_in_key_sig': sum([all_chords[x] for x in all_chords if x in in_key_sig]),
            'chords_not_in_key_sig': sum([all_chords[x] for x in all_chords if x not in in_key_sig]),
            'num_unique_chords': len(list(all_chords.keys())),
            'most_frequent_chords': ','.join([key for key in all_chords if all_chords[key] == max([all_chords[x] for x in all_chords])]),
            'tonic': tonic,
            'tonic_frequency': tonic_frequency
        }

        df = df.append(song_dict, ignore_index=True)
        df.to_csv("Output/ChordStats/" + decade + ".csv", index=False)       
        
        with open("Output/ChordCounts/" + decade + ".json", "w") as outfile:
            json.dump(all_chord_dict, outfile)

    