In [None]:
import pandas as pd
import json
import music21

In [None]:
df = pd.read_csv('chordify_200_1991-01-05_to_2022-10-01.csv', parse_dates=['date'])
df['key_clean'] = df.key.fillna('').replace(
    ['b', 's', '_maj', '_min'],
    [ '-', '#', '', 'm'], regex=True
)
df['chords_clean'] = df.chords.fillna('"[]"').replace(
    ['b_', 's_', '_', 'min', 'maj6', 'maj9', 'maj11', 'maj13'],
    ['-', '#', '', 'm', '6', 'M9', 'M11', 'M13'], regex=True
).apply(json.loads)

In [None]:
symbol_for_chord = {} # Cache of chord string -> m21 chord symbol
roman_numeral_for_chord_key = {} # Cache of (chord string, key string) -> roman numeral

def roman_numeral(chord, key):
    try:
        if not key:
            return None
        if chord not in symbol_for_chord:
            symbol_for_chord[chord] = music21.harmony.ChordSymbol(chord)
        chord_symbol = symbol_for_chord[chord]
        chord_symbol.key = music21.key.Key(key)
        if (chord, key) not in roman_numeral_for_chord_key:
            roman_numeral_for_chord_key[(chord, key)] = chord_symbol.romanNumeral.romanNumeral
        return roman_numeral_for_chord_key[(chord, key)]
    except Exception as er:
        print(er)
        return None

In [None]:
df['roman_numerals'] = df.apply(lambda row: json.dumps([roman_numeral(chord, row.key_clean) for chord in row.chords_clean]), axis=1)

In [None]:
df.drop(columns=['key_clean', 'chords_clean'], inplace=True)
df.to_csv('chordify_with_rn_200_1991-01-05_to_2022-10-01.csv', index=False)

In [None]:
df