In [1]:
import pandas as pd
import numpy as np

np.int = np.int64

In [2]:
metadata = pd.read_csv("../data/clean/metadata.csv")
metadata.head()

Unnamed: 0,split,midi_filename,duration
0,train,2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-...,464.649433
1,train,2004/MIDI-Unprocessed_XP_21_R1_2004_01_ORIG_MI...,872.640588
2,validation,2006/MIDI-Unprocessed_17_R1_2006_01-06_ORIG_MI...,397.857508
3,validation,2009/MIDI-Unprocessed_07_R1_2009_04-05_ORIG_MI...,400.557826
4,test,2009/MIDI-Unprocessed_11_R1_2009_06-09_ORIG_MI...,163.74583


In [15]:
def group_simultaneous_notes_with_overlap(midi_data, min_overlap=0.05):
    """
    Group notes that are played at the same time or have a significant overlap.
    
    Parameters:
    - midi_data: PrettyMIDI object
    - min_overlap: Minimum duration of overlap (in seconds) for notes to be considered grouped.
    """
    note_groups = []

    for instrument in midi_data.instruments:
        # Sort notes by start time
        notes = sorted(instrument.notes, key=lambda x: x.start)
        current_group = []

        for i, note in enumerate(notes):
            if not current_group:
                # Start a new group
                current_group.append(note)
                continue

            # Check overlap with all notes in the current group
            overlaps = [
                min(note.end, n.end) - max(note.start, n.start) > min_overlap
                for n in current_group
            ]

            if any(overlaps):
                # Add note to current group if it overlaps sufficiently with any note
                current_group.append(note)
            else:
                # End the current group and start a new one
                note_groups.append(current_group)
                current_group = [note]

        # Append the last group if not empty
        if len(current_group) > 0:
            note_groups.append(current_group)

    return note_groups

In [16]:
import pretty_midi

midi_filename = "../data/raw/" + metadata["midi_filename"][298]
midi_data = pretty_midi.PrettyMIDI(midi_filename)

note_groups = group_simultaneous_notes_with_overlap(midi_data, min_overlap=0.01)
print(note_groups[:3])

note_pitch = lambda note: note.pitch
note_pitch_groups = [sorted([note_pitch(note) for note in group]) for group in note_groups]
print(note_pitch_groups[:3])

note_name = lambda note: pretty_midi.note_number_to_name(note_pitch(note))
note_name_groups = [sorted([note_name(note) for note in group]) for group in note_groups]
print(note_name_groups[:3])

[[Note(start=1.032552, end=1.403646, pitch=86, velocity=93), Note(start=1.032552, end=1.462240, pitch=78, velocity=92), Note(start=1.036458, end=1.440104, pitch=50, velocity=90), Note(start=1.039062, end=1.430990, pitch=74, velocity=90), Note(start=1.040365, end=1.389323, pitch=38, velocity=85), Note(start=1.040365, end=1.433594, pitch=81, velocity=84)], [Note(start=1.875000, end=1.927083, pitch=50, velocity=64), Note(start=1.876302, end=1.908854, pitch=62, velocity=76), Note(start=1.884115, end=1.924479, pitch=42, velocity=64), Note(start=1.895833, end=1.928385, pitch=57, velocity=49)], [Note(start=2.041667, end=2.100260, pitch=42, velocity=70), Note(start=2.042969, end=2.092448, pitch=50, velocity=75), Note(start=2.055990, end=2.098958, pitch=62, velocity=75), Note(start=2.058594, end=2.098958, pitch=57, velocity=65)]]
[[38, 50, 74, 78, 81, 86], [42, 50, 57, 62], [42, 50, 57, 62]]
[['A5', 'D2', 'D3', 'D5', 'D6', 'F#5'], ['A3', 'D3', 'D4', 'F#2'], ['A3', 'D3', 'D4', 'F#2']]


In [17]:
from music21 import chord, note, stream, roman

def detect_chords_from_notes(note_groups, song_key=None):
    """
    Detect chords from groups of notes using music21.
    
    Parameters:
    - note_groups: List of groups, where each group is a list of note pitches (e.g., [60, 64, 67]).
    
    Returns:
    - List of chord names.
    """
    chords = []
    for group in note_groups:
        # Convert MIDI pitches to music21 Notes
        music21_notes = [note.Note(pitch) for pitch in group]
        music21_chord = chord.Chord(music21_notes)

        if song_key:
            # Determine the chord's relationship to the key
            roman_numeral = roman.romanNumeralFromChord(music21_chord, song_key)
            chords.append(roman_numeral.figure)
        else:
            chords.append(music21_chord.commonName) # Fallback: plain chord name
    return chords

# convert midi_data from pretty_midi to music21 stream
stream = stream.Stream()
for instrument in midi_data.instruments:
    for n in instrument.notes:
        stream.append(note.Note(n.pitch))

detected_key = stream.analyze("key")
print(f"Key: {detected_key}")

# Detect chords
detected_chords = detect_chords_from_notes(note_pitch_groups, detected_key)
print(detected_chords[:10])

Key: D major
['I', 'I6', 'I6', 'I6', 'I6', 'ii76432', 'I64b3', 'I64b3', 'I64b3', 'I64b3']


In [18]:
cadences = [
    {
        "type": "perfect",
        "sequence": ["V", "I"],
        "keywords": ["resolution", "finality", "strong", "tonal"],
        "eras": ["Classical", "Baroque", "Symphony", "Sonata"]
    },
    {
        "type": "plagal",
        "sequence": ["IV", "I"],
        "keywords": ["sacred", "amen", "soft", "religious"],
        "eras": ["Cantata", "Oratorio", "Wedding", "Sacred Music"]
    },
    {
        "type": "deceptive",
        "sequence": ["V", "vi"],
        "keywords": ["surprise", "emotional", "unexpected", "romantic"],
        "eras": ["Romantic", "Opera", "Symphony"]
    },
    {
        "type": "imperfect",
        "sequence": ["_", "V"],
        "keywords": ["suspense", "unfinished", "open-ended", "modal"],
        "eras": ["Renaissance", "Minimal", "Impressionist", "Early"]
    }
]

def detect_cadences(chords):
    """
    Detect cadences from a list of chords.
    
    Parameters:
    - chords: List of chord names.
    
    Returns:
    - List of cadence types.
    """
    cadence_types = []
    for i in range(1, len(chords)):
        for cadence in cadences:
            type = cadence["type"]
            sequence = cadence["sequence"]

            wildcard = "_"
            first_match = sequence[0] == chords[i - 1] or sequence[0] == wildcard
            second_match = sequence[1] == chords[i] or sequence[1] == wildcard

            if first_match and second_match:
                cadence_types.append(type)
                break
    return cadence_types

detected_cadences = pd.Series(detect_cadences(detected_chords))

top_cadence = detected_cadences.value_counts().idxmax()
cadtop_cadence_count = detected_cadences.value_counts().max()

print(f"Most frequent cadence: {top_cadence} ({cadtop_cadence_count} occurrences)")

Most frequent cadence: imperfect (174 occurrences)


In [19]:
def detect_keywords_from_cadence(cadence):
    """
    Detect keywords from a cadence type.
    
    Parameters:
    - cadence: Cadence type.
    
    Returns:
    - List of keywords.
    """
    for c in cadences:
        if c["type"] == cadence:
            return c["keywords"]
    return []

keywords = detect_keywords_from_cadence(top_cadence)
print(f"Keywords: {keywords}")

Keywords: ['suspense', 'unfinished', 'open-ended', 'modal']
