In [46]:
import music21
from collections import Counter
import re

In [47]:

DATABASE = {
    "MAJOR_KEYS": ["C", "G", "D", "A", "E", "B", "F#", "C#", "F", "Bb", "Eb", "Ab", "Db", "Gb", "Cb"],

    "MINOR_KEYS": {
        "C":  "Am",  "G":  "Em",  "D":  "Bm",  "A":  "F#m", "E":  "C#m",
        "B":  "G#m", "F#": "D#m", "C#": "A#m", "F":  "Dm",   "Bb": "Gm",
        "Eb": "Cm",  "Ab": "Fm",  "Db": "Bbm", "Gb": "Ebm", "Cb": "Abm"
    },

    "MAJOR_NOTES" : {
        "C": ["C", "D", "E", "F", "G", "A", "B"],
        "G": ["G", "A", "B", "C", "D", "E", "F#"],
        "D": ["D", "E", "F#", "G", "A", "B", "C#"],
        "A": ["A", "B", "C#", "D", "E", "F#", "G#"],
        "E": ["E", "F#", "G#", "A", "B", "C#", "D#"],
        "B": ["B", "C#", "D#", "E", "F#", "G#", "A#"],
        "F#": ["F#", "G#", "A#", "B", "C#", "D#", "E#"],
        "C#": ["C#", "D#", "E#", "F#", "G#", "A#", "B#"],
        "F": ["F", "G", "A", "Bb", "C", "D", "E"],
        "Bb": ["Bb", "C", "D", "Eb", "F", "G", "A"],
        "Eb": ["Eb", "F", "G", "Ab", "Bb", "C", "D"],
        "Ab": ["Ab", "Bb", "C", "Db", "Eb", "F", "G"],
        "Db": ["Db", "Eb", "F", "Gb", "Ab", "Bb", "C"],
        "Gb": ["Gb", "Ab", "Bb", "Cb", "Db", "Eb", "F"],
        "Cb": ["Cb", "Db", "Eb", "Fb", "Gb", "Ab", "Bb"]
        },

    "MINOR_NOTES" : {
        # A minor group
        "Am": {
            "natural": ["A", "B", "C", "D", "E", "F", "G"],
            "harmonic": ["A", "B", "C", "D", "E", "F", "G#"],
            "melodic": ["A", "B", "C", "D", "E", "F#", "G#"]
        },

        # E minor group
        "Em": {
            "natural": ["E", "F#", "G", "A", "B", "C", "D"],
            "harmonic": ["E", "F#", "G", "A", "B", "C", "D#"],
            "melodic": ["E", "F#", "G", "A", "B", "C#", "D#"]
        },

        # B minor group
        "Bm": {
            "natural": ["B", "C#", "D", "E", "F#", "G", "A"],
            "harmonic": ["B", "C#", "D", "E", "F#", "G", "A#"],
            "melodic": ["B", "C#", "D", "E", "F#", "G#", "A#"]
        },

        # F# minor
        "F#m": {
            "natural": ["F#", "G#", "A", "B", "C#", "D", "E"],
            "harmonic": ["F#", "G#", "A", "B", "C#", "D", "E#"],
            "melodic": ["F#", "G#", "A", "B", "C#", "D#", "E#"]
        },

        # C# minor
        "C#m": {
            "natural": ["C#", "D#", "E", "F#", "G#", "A", "B"],
            "harmonic": ["C#", "D#", "E", "F#", "G#", "A", "B#"],
            "melodic": ["C#", "D#", "E", "F#", "G#", "A#", "B#"]
        },

        # G# minor
        "G#m": {
            "natural": ["G#", "A#", "B", "C#", "D#", "E", "F#"],
            "harmonic": ["G#", "A#", "B", "C#", "D#", "E", "Fx"],  # F## = Fx
            "melodic": ["G#", "A#", "B", "C#", "D#", "E#", "Fx"]
        },

        # D# minor
        "D#m": {
            "natural": ["D#", "E#", "F#", "G#", "A#", "B", "C#"],
            "harmonic": ["D#", "E#", "F#", "G#", "A#", "B", "C##"],
            "melodic": ["D#", "E#", "F#", "G#", "A#", "B#", "C##"]
        },

        # A# minor / Bb minor
        "A#m": {
            "natural": ["A#", "B#", "C#", "D#", "E#", "F#", "G#"],
            "harmonic": ["A#", "B#", "C#", "D#", "E#", "F#", "G##"],
            "melodic": ["A#", "B#", "C#", "D#", "E#", "Fx", "G##"]
        },

        "Bbm": {
            "natural": ["Bb", "C", "Db", "Eb", "F", "Gb", "Ab"],
            "harmonic": ["Bb", "C", "Db", "Eb", "F", "Gb", "A"],
            "melodic": ["Bb", "C", "Db", "Eb", "F", "G", "A"]
        },

        # F minor
        "Fm": {
            "natural": ["F", "G", "Ab", "Bb", "C", "Db", "Eb"],
            "harmonic": ["F", "G", "Ab", "Bb", "C", "Db", "E"],
            "melodic": ["F", "G", "Ab", "Bb", "C", "D", "E"]
        },

        # C minor
        "Cm": {
            "natural": ["C", "D", "Eb", "F", "G", "Ab", "Bb"],
            "harmonic": ["C", "D", "Eb", "F", "G", "Ab", "B"],
            "melodic": ["C", "D", "Eb", "F", "G", "A", "B"]
        },

        # G minor
        "Gm": {
            "natural": ["G", "A", "Bb", "C", "D", "Eb", "F"],
            "harmonic": ["G", "A", "Bb", "C", "D", "Eb", "F#"],
            "melodic": ["G", "A", "Bb", "C", "D", "E", "F#"]
        },

        # D minor
        "Dm": {
            "natural": ["D", "E", "F", "G", "A", "Bb", "C"],
            "harmonic": ["D", "E", "F", "G", "A", "Bb", "C#"],
            "melodic": ["D", "E", "F", "G", "A", "B", "C#"]
        }
    },

    "CHORDS_BY_KEY": {
        "C":  ["C",  "Dm",  "Em",  "F",  "G",  "Am",  "Bdim"],
        "G":  ["G",  "Am",  "Bm",  "C",  "D",  "Em",  "F#dim"],
        "D":  ["D",  "Em",  "F#m","G",  "A",  "Bm",  "C#dim"],
        "A":  ["A",  "Bm",  "C#m","D",  "E",  "F#m","G#dim"],
        "E":  ["E",  "F#m","G#m","A",  "B",  "C#m","D#dim"],
        "B":  ["B",  "C#m","D#m","E",  "F#", "G#m","A#dim"],
        "F#": ["F#", "G#m","A#m","B",  "C#", "D#m","E#dim"],
        "C#": ["C#", "D#m","E#m","F#", "G#", "A#m","B#dim"],

        "F":  ["F",  "Gm", "Am", "Bb", "C",  "Dm", "Edim"],
        "Bb": ["Bb", "Cm", "Dm", "Eb", "F",  "Gm", "Adim"],
        "Eb": ["Eb", "Fm", "Gm", "Ab", "Bb", "Cm", "Ddim"],
        "Ab": ["Ab", "Bbm","Cm", "Db", "Eb", "Fm", "Gdim"],
        "Db": ["Db", "Ebm","Fm", "Gb", "Ab", "Bbm","Cdim"],
        "Gb": ["Gb", "Abm","Bbm","Cb","Db", "Ebm","Fdim"],
        "Cb": ["Cb", "Dbm","Ebm","Fb","Gb", "Abm","Bbdim"],


        "Am":  ["Am",  "Bdim", "C",  "Dm",  "E",   "F",   "G"],
        "Em":  ["Em",  "F#dim","G",  "Am",  "B",   "C",   "D"],
        "Bm":  ["Bm",  "C#dim","D",  "Em",  "F#",  "G",   "A"],
        "F#m": ["F#m", "G#dim","A",  "Bm",  "C#",  "D",   "E"],
        "C#m": ["C#m", "D#dim","E",  "F#m", "G#",  "A",   "B"],
        "G#m": ["G#m", "A#dim","B",  "C#m", "D#",  "E",   "F#"],
        "D#m": ["D#m", "E#dim","F#", "G#m", "A#",  "B",   "C#"],
        "A#m": ["A#m", "B#dim","C#", "D#m", "E#",  "F#",  "G#"],

        "Dm":  ["Dm",  "Edim", "F",  "Gm",  "A",   "Bb",  "C"],
        "Gm":  ["Gm",  "Adim", "Bb", "Cm",  "D",   "Eb",  "F"],
        "Cm":  ["Cm",  "Ddim", "Eb", "Fm",  "G",   "Ab",  "Bb"],
        "Fm":  ["Fm",  "Gdim", "Ab", "Bbm", "C",   "Db",  "Eb"],
        "Bbm": ["Bbm", "Cdim", "Db", "Ebm", "F",   "Gb",  "Ab"],
        "Ebm": ["Ebm", "Fdim", "Gb", "Abm", "Bb",  "Cb",  "Db"],
        "Abm": ["Abm", "Bbdim","Cb", "Dbm", "Eb",  "Fb",  "Gb"]
    },

    "CHORD_NOTES" : {
    # Major
        "C":   ["C",  "E",  "G"],    "C#":  ["C#", "E#", "G#"],    "D":   ["D",  "F#", "A"],
        "Eb":  ["Eb", "G",  "Bb"],   "E":   ["E",  "G#", "B"],     "F":   ["F",  "A",  "C"],
        "F#":  ["F#", "A#", "C#"],   "G":   ["G",  "B",  "D"],     "Ab":  ["Ab", "C",  "Eb"],
        "A":   ["A",  "C#", "E"],    "Bb":  ["Bb", "D",  "F"],     "B":   ["B",  "D#", "F#"],
        "Cb":  ["Cb", "Eb", "Gb"],   "Gb":  ["Gb", "Bb", "Db"],    "Db":  ["Db", "F",  "Ab"],

        # Minor
        "Cm":  ["C",  "Eb", "G"],    "C#m": ["C#", "E",  "G#"],    "Dm":  ["D",  "F",  "A"],
        "D#m": ["D#", "F#", "A#"],   "Em":  ["E",  "G",  "B"],     "Fm":  ["F",  "Ab", "C"],
        "F#m": ["F#", "A",  "C#"],   "Gm":  ["G",  "Bb", "D"],     "G#m": ["G#", "B",  "D#"],
        "Am":  ["A",  "C",  "E"],     "Bbm": ["Bb", "Db", "F"],     "Bm":  ["B",  "D",  "F#"],
        "Abm": ["Ab", "Cb", "Eb"],    "Ebm": ["Eb", "Gb", "Bb"],    "Dbm": ["Db", "E",  "Ab"],

        # Diminished triads
        "Cdim": ["C",  "Eb", "Gb"],   "C#dim": ["C#", "E",  "G"],   "Ddim": ["D",  "F",  "Ab"],
        "D#dim":["D#", "F#", "A"],     "Edim": ["E",  "G",  "Bb"],  "Fdim": ["F",  "Ab", "Cb"],
        "F#dim":["F#", "A",  "C"],     "Gdim": ["G",  "Bb", "Db"],  "G#dim":["G#", "B",  "D"],
        "Adim": ["A",  "C",  "Eb"],    "Bbdim":["Bb", "Db", "Fb"],  "Bdim": ["B",  "D",  "F"],
        "E#dim":["E#", "G#", "B"],     "B#dim":["B#", "D#", "F#"],  "DbDim":["Db", "E", "Ab"],

        # Augmented
        "C+":   ["C",  "E",  "G#"],   "C#+": ["C#", "E#", "Gx"],   "D+":  ["D",  "F#", "A#"],
        "D#+":  ["D#", "Fx", "A#"],   "E+":  ["E",  "G#", "B#"],   "F+":  ["F",  "A",  "C#"],
        "F#+":  ["F#", "A#", "C##"],  "G+":  ["G",  "B",  "D#"],   "G#+": ["G#", "B#", "D##"],
        "A+":   ["A",  "C#", "E#"],   "A#+": ["A#", "Cx", "E#"],   "B+":  ["B",  "D#", "F##"],

        # Special
        "Fb": ["Fb", "Ab", "Cb"],

        # Optional\
        "Am(h)": ["A", "C", "E"],  # harmonic
        "Am(m)": ["A", "C", "E"],  # melodic
    },

    "COMMON_CHORD_PROGRESSIONS": [
        [1,4,5,1], [1,5,6,4], [6,4,1,5], [1,6,2,5], [1,4,2,5],
        [1,4,5,3,6,2,5,1], [6,4,5,1], [1,6,4,5], [1,4,6,5],
        [1,2,5], [1,6,3,7], [6,2,5,1], [1,6,7,6], [1,4,7],
        [1,5,1,4], [1,6,7,4], [6,7,1], [1,4,6,3], [1,3,6,7],
        [1,2,5,1]
    ],

    "CADENCE_CHORD": [
        [1, 6], [1, 4], [1, 5], [4, 1], [5, 1]
    ]
}

In [48]:

class ChordAnalyzer:
    def __init__(self, musicxml_file):
        self.musicxml_file = musicxml_file
        self.score = None
        self.key_signature = None
        self.detected_key = None
        self.bars_notes = []
        self.bars_notes_objs = []
        self.all_notes = []

        # Weights
        self.weights = {
            'base_match': 1.0,
            'duration_factor': 0.5,
            'bass_match': 5.0,
            'root_in_bass': 4.0,
            'fifth_in_bass': 2.0,
            'third_in_bass': 1.5,
            'top_note_root': 2.0,
            'top_note_third': 1.5,
            'top_note_fifth': 1.2,
            'short_note_penalty': 0.4,
            'cadence_5_1': 8.0,
            'cadence_4_1': 5.0,
            'first_bar_tonic': 10.0,
            'last_bar_tonic': 12.0,
        }
        self.note_cache = {}  # Untuk normalized note names

        self.load_musicxml()

    def load_musicxml(self):
        try:
            print(f"Loading MusicXML file: {self.musicxml_file}")
            self.score = music21.converter.parse(self.musicxml_file)

            # Get key from file
            key_elements = self.score.flat.getElementsByClass(music21.key.Key)
            if key_elements:
                self.key_signature = str(key_elements[0])
            else:
                key_sig_elements = self.score.flat.getElementsByClass(music21.key.KeySignature)
                if key_sig_elements:
                    self.key_signature = str(key_sig_elements[0])
                else:
                    self.key_signature = None

            self.extract_notes()

            print(f"Successfully loaded file. Found {len(self.bars_notes)} bars.")
        except Exception as e:
            print(f"Error loading MusicXML file: {e}")
            raise

    def normalize_note_name(self, note_name):
        if not note_name:
            return None

        if note_name in self.note_cache:
            return self.note_cache[note_name]

        # Remove octave
        name = re.sub(r'\d', '', note_name)

        # Normalize symbol
        name = name.replace("♯", "#").replace("♭", "b").replace("♮", "")

        # Normalize note name
        ENH = {
            # Flats to sharps
            "Cb": "B", "B#": "C",
            "Db": "C#", "C##": "D",
            "Eb": "D#", "D##": "E",
            "Fb": "E", "E#": "F",
            "Gb": "F#", "F##": "G",
            "Ab": "G#", "G##": "A",
            "Bb": "A#", "A##": "B",
            # Double flats/sharps
            "Cbb": "Bb", "B##": "C#",
            "Dbb": "C", "Cx": "D",  # C## = Cx
            "Ebb": "D", "Dx": "E",
            "Fbb": "Eb", "Ex": "F#",
            "Gbb": "F", "Fx": "G",
            "Abb": "G", "Gx": "A",
            "Bbb": "A", "Ax": "B",

        }

        # If already natural
        if name in ["A", "B", "C", "D", "E", "F", "G"]:
            self.note_cache[note_name] = name
            return name

        # Convert accidental notes
        if name in ENH:
            result = ENH[name]
            self.note_cache[note_name] = result
            return result

        # unexpected patterns -> try stripping unknown chars
        base = re.sub(r'[^A-G#b]', '', name)
        result = ENH.get(base, base)
        self.note_cache[note_name] = result
        return result

    def extract_notes(self):
        self.all_notes = []
        self.bars_notes = []
        self.bars_notes_objs = []

        if not self.score.parts:
            return

        parts_measures = []
        max_measure_number = 0

        # Collect bars
        for part in self.score.parts:
            pm = list(part.getElementsByClass(music21.stream.Measure))
            parts_measures.append(pm)
            max_measure_number = max(max_measure_number, len(pm))

        # For each bar
        for i in range(max_measure_number):
            merged_notes = []
            merged_objs = []
            measure_has_notes = False

            for part_measure_list in parts_measures:
                if i < len(part_measure_list):
                    measure = part_measure_list[i]

                    # Check if bar is empty or has only rests
                    if not measure.notesAndRests:
                        continue

                    for el in measure.notesAndRests:
                        # Skip rests
                        if isinstance(el, music21.note.Rest):
                            continue

                        # Single note
                        if isinstance(el, music21.note.Note):
                            entry = {
                                "name": self.normalize_note_name(el.pitch.name),
                                "duration": float(el.quarterLength),
                                "is_chord": False,
                                "obj": el
                            }
                            merged_notes.append(entry)
                            merged_objs.append(el)
                            self.all_notes.append(entry["name"])
                            measure_has_notes = True

                        # chord
                        elif isinstance(el, music21.chord.Chord):
                            dur = float(el.quarterLength)
                            for p in el.pitches:
                                entry = {
                                    "name": self.normalize_note_name(p.name),
                                    "duration": dur,
                                    "is_chord": True,
                                    "obj": el
                                }
                                merged_notes.append(entry)
                                self.all_notes.append(entry["name"])
                            merged_objs.append(el)
                            measure_has_notes = True

            # If completely empty bar, add placeholder
            if not measure_has_notes:
                merged_notes.append({
                    "name": None,
                    "duration": 1.0,
                    "is_chord": False,
                    "obj": None
                })

            self.bars_notes.append(merged_notes)
            self.bars_notes_objs.append(merged_objs)

    def normalize_key_to_db(self, tonic_name, mode):
        target = tonic_name + ("m" if mode == "minor" else "")

        if target in DATABASE["CHORDS_BY_KEY"]:
            return target

        try:
            pitch = music21.pitch.Pitch(tonic_name)
            enh = pitch.getEnharmonic().name
            target2 = enh + ("m" if mode == "minor" else "")

            if target2 in DATABASE["CHORDS_BY_KEY"]:
                return target2
        except:
            pass

        # If still nothing, try scanning all DB keys with same pitch class
        for k in DATABASE["CHORDS_BY_KEY"].keys():
            k_tonic = k.replace("m", "")
            try:
                if music21.pitch.Pitch(k_tonic).pitchClass == music21.pitch.Pitch(tonic_name).pitchClass:
                    return k
            except:
                continue

        # worst case: return original tonic with mode
        return target

    def analyze_key_signature(self):
        # 1. CHECK MUSICXML KEY SIGNATURE
        ks = None
        try:
            if hasattr(self, 'score') and self.score.parts:
                part = self.score.parts[0]
                key_sigs = part.recurse().getElementsByClass(music21.key.KeySignature)
                if key_sigs:
                    ks = key_sigs[0]
        except Exception as e:
            print(f"Error checking key signature: {e}")
            ks = None

        if ks is not None:
            sf = ks.sharps
            try:
                key_obj = music21.key.KeySignature(sf).asKey()
                tonic = key_obj.tonic.name
                mode = key_obj.mode
                final_key = self.normalize_key_to_db(tonic, mode)
                print(f"[KEY SIGNATURE] Found signature {sf} sharps → returning {final_key}")
                self.detected_key = final_key
                return final_key
            except Exception as e:
                print(f"Error processing key signature {sf}: {e}")


        # if no valid key signature
        if not self.bars_notes_objs or not self.all_notes:
            self.detected_key = None
            return None

        melodic_counter = Counter()   # top notes each measure ++
        bass_counter = Counter()      # bass notes each measure +
        chord_tone_counter = Counter()# chord tones but lower weight
        raw_note_counter = Counter([self.normalize_note_name(n) for n in self.all_notes])

        # iterate bars and collect melody (top), bass (lowest), chord tone occurrences
        for measure_objs in self.bars_notes_objs:
            measure_notes = []
            for el in measure_objs:
                if el is None:
                    continue

                try:
                    if isinstance(el, music21.note.Rest):
                        continue
                    if isinstance(el, music21.note.Note):
                        name = self.normalize_note_name(el.pitch.name)
                        dur = float(getattr(el.duration, "quarterLength", 1.0) or 1.0)
                        midi = getattr(el.pitch, "midi", None)
                        measure_notes.append({'type': 'note', 'name': name, 'dur': dur, 'midi': midi, 'obj': el})
                    elif isinstance(el, music21.chord.Chord):
                        dur = float(getattr(el.duration, "quarterLength", 1.0) or 1.0)
                        # for chord pitches add entries but keep type chord
                        for p in el.pitches:
                            name = self.normalize_note_name(p.name)
                            try:
                                midi = p.midi
                            except Exception:
                                midi = None
                            measure_notes.append({'type': 'chord_tone', 'name': name, 'dur': dur, 'midi': midi, 'obj': el})
                    elif isinstance(el, music21.harmony.ChordSymbol):
                        continue
                except Exception as e:
                    print(f"Error processing element in measure: {e}")
                    continue

            if not measure_notes:
                continue

            # choose melodic (top) note: highest notes with dur >= 0.25
            melodic_candidates = [n for n in measure_notes if n['dur'] >= 0.25]
            if not melodic_candidates:
                melodic_candidates = measure_notes

            top = None
            top_midi = None
            for n in melodic_candidates:
                if n['midi'] is not None:
                    if top is None or n['midi'] > top_midi:
                        top = n
                        top_midi = n['midi']
            if top is None:
                top = melodic_candidates[-1] if melodic_candidates else None

            if top:
                melodic_counter[top['name']] += 3   # +++ for melody

            # choose bass: lowest notes with dur>=0.25
            bass_candidates = [n for n in measure_notes if n['midi'] is not None]
            if bass_candidates:
                lowest = min(bass_candidates, key=lambda x: x['midi'])
                bass_counter[lowest['name']] += 2

            # chord-tone collection
            for n in measure_notes:
                if n['type'] == 'chord_tone':
                    # ignore short chord accidentals
                    if n['dur'] < 0.25:
                        continue
                    chord_tone_counter[n['name']] += 1

        # Counter for scoring keys
        combined_counter = Counter()
        for name, cnt in melodic_counter.items():
            combined_counter[name] += cnt * 1.0

        for name, cnt in bass_counter.items():
            combined_counter[name] += cnt * 0.8

        for name, cnt in chord_tone_counter.items():
            combined_counter[name] += cnt * 0.35

        for name, cnt in raw_note_counter.items():
            combined_counter[name] += cnt * 0.25

        # Last note cadence
        last_note_raw = self.all_notes[-1] if self.all_notes else None
        last_note = self.normalize_note_name(last_note_raw) if last_note_raw else None

        CADENCE_BONUS = 40

        best_key = None
        best_score = -1e9

        # Evaluate major and its relative natural minor
        for maj in DATABASE["MAJOR_KEYS"]:
            maj_scale = DATABASE["MAJOR_NOTES"].get(maj, [])
            if not maj_scale:
                continue

            norm_maj_scale = [self.normalize_note_name(n) for n in maj_scale]

            maj_score = sum(combined_counter.get(n, 0) for n in norm_maj_scale)

            rel_nat_minor = maj_scale[5:] + maj_scale[:5]
            norm_rel_minor = [self.normalize_note_name(n) for n in rel_nat_minor]
            rel_score = sum(combined_counter.get(n, 0) for n in norm_rel_minor)

            # cadence bonus
            if last_note:
                if last_note == self.normalize_note_name(maj):
                    maj_score += CADENCE_BONUS
                rel_tonic = norm_rel_minor[0] if norm_rel_minor else None
                if rel_tonic and last_note == rel_tonic:
                    rel_score += CADENCE_BONUS

            # tie-breaker bonus: if dominant (5th) appears in melody
            maj_fifth = norm_maj_scale[4] if len(norm_maj_scale) > 4 else None
            if maj_fifth and melodic_counter.get(maj_fifth, 0) > 0:
                maj_score += 2

            # pick best
            if maj_score > best_score:
                best_score = maj_score
                best_key = maj
            if rel_score > best_score:
                best_score = rel_score
                best_key = DATABASE["MINOR_KEYS"].get(maj, None)

        print(f"(melody-first) detected key: {best_key} (score {best_score})")
        self.detected_key = best_key
        return best_key

    def check_major_minor(self, key, notes_in_song=None):
        if notes_in_song is None:
            notes_in_song = self.all_notes

        if not key or key.endswith("m"):
            return key

        if key not in DATABASE["MINOR_KEYS"]:
            return key

        norm_song = {self.normalize_note_name(n) for n in notes_in_song}
        maj_scale = DATABASE["MAJOR_NOTES"].get(key, [])

        if not maj_scale:
            return key

        minor_indicators = 0

        # 1. Check raised 7th (harmonic minor)
        seventh = self.normalize_note_name(maj_scale[4])
        raised7 = self.half_step_up(seventh) if seventh else None
        if raised7 and self.normalize_note_name(raised7) in norm_song:
            minor_indicators += 2

        # 2. Check raised 6th (melodic minor)
        sixth = self.normalize_note_name(maj_scale[3])
        raised6 = self.half_step_up(sixth) if sixth else None
        if raised6 and self.normalize_note_name(raised6) in norm_song:
            minor_indicators += 1

        # 3. Check if minor tonic appears more than major tonic
        minor_tonic = DATABASE["MINOR_KEYS"][key]
        minor_tonic_norm = self.normalize_note_name(minor_tonic.replace("m", ""))
        major_tonic_norm = self.normalize_note_name(key)

        major_count = sum(1 for n in notes_in_song if self.normalize_note_name(n) == major_tonic_norm)
        minor_count = sum(1 for n in notes_in_song if self.normalize_note_name(n) == minor_tonic_norm)

        if minor_count > major_count:
            minor_indicators += 1

        # if strong indicators
        if minor_indicators >= 3:
            return DATABASE["MINOR_KEYS"][key]

        return key

    def half_step_up(self, note):
        if not note:
            return None

        half_step_map = {
            "C": "C#", "C#": "D", "D": "D#", "D#": "E", "E": "F",
            "F": "F#", "F#": "G", "G": "G#", "G#": "A", "A": "A#", "A#": "B", "B": "C"
        }
        return half_step_map.get(note, note)

    def get_chord_candidates(self, key):
        chord_candidates = {}

        if not key:
            return {}

        # Get chords by key
        if key in DATABASE["CHORDS_BY_KEY"]: #if major
            chords = DATABASE["CHORDS_BY_KEY"][key]
        else: # if minor
            rel_major = None
            for maj, minv in DATABASE["MINOR_KEYS"].items():
                if minv == key:
                    rel_major = maj
                    break
            chords = DATABASE["CHORDS_BY_KEY"].get(rel_major, [])

        # get  chords' notes
        for chord in chords:
            if chord in DATABASE["CHORD_NOTES"]:
                chord_candidates[chord] = DATABASE["CHORD_NOTES"][chord]
            else:
                notes = self.construct_chord(chord)
                if notes:
                    chord_candidates[chord] = notes

        return chord_candidates

    def construct_chord(self, chord_name): #if needed for fallabck
        if chord_name.endswith('dim'):
            base = chord_name[:-3]
            # Diminished chord: root, minor third, diminished fifth
            root = self.normalize_note_name(base)
            third = self.half_step_up(self.half_step_up(root))  # Minor third = +3 half
            fifth = self.half_step_up(self.half_step_up(third))  # Diminished fifth = +3 half from third
            return [root, third, fifth] if all([root, third, fifth]) else None
        elif chord_name.endswith('m'):
            base = chord_name[:-1]
            # Minor chord: root, minor third, perfect fifth
            root = self.normalize_note_name(base)
            third = self.half_step_up(self.half_step_up(root))  # Minor third = +3 half
            fifth = self.half_step_up(self.half_step_up(self.half_step_up(third)))  # Perfect fifth = +4 half from third
            return [root, third, fifth] if all([root, third, fifth]) else None
        else:
            # Major chord: root, major third, perfect fifth
            root = self.normalize_note_name(chord_name)
            third = self.half_step_up(self.half_step_up(self.half_step_up(root)))  # Major third = +4 half
            fifth = self.half_step_up(self.half_step_up(third))  # Perfect fifth = +3 half from third
            return [root, third, fifth] if all([root, third, fifth]) else None

    def chord_degree(self, chord_name, key_name):
        if not key_name or not chord_name:
            return None

        if key_name in DATABASE["CHORDS_BY_KEY"]:
            scale = DATABASE["CHORDS_BY_KEY"][key_name]
        else:
            rel_major = None
            for maj, minv in DATABASE["MINOR_KEYS"].items():
                if minv == key_name:
                    rel_major = maj
                    break
            if rel_major and rel_major in DATABASE["CHORDS_BY_KEY"]:
                scale = DATABASE["CHORDS_BY_KEY"][rel_major]
            else:
                scale = DATABASE["CHORDS_BY_KEY"].get(key_name, None)

        if not scale:
            return None

        try:
            return scale.index(chord_name) + 1
        except ValueError:
            # make chord without extensions
            base_chord = chord_name
            if chord_name.endswith(('7', 'maj7', 'min7', 'dim7', 'aug')):
                base_chord = re.sub(r'(7|maj7|min7|dim7|aug)$', '', chord_name)

            try:
                return scale.index(base_chord) + 1
            except ValueError:
                return None

    def get_bass_of_bar(self, bar_objs):
        if not bar_objs:
            return None
        lowest = None
        for el in bar_objs:
            if el is None:
                continue
            try:
                if isinstance(el, music21.chord.Chord):
                    for p in el.pitches:
                        if lowest is None or p.midi < lowest.midi:
                            lowest = p
                elif isinstance(el, music21.note.Note):
                    p = el.pitch
                    if lowest is None or p.midi < lowest.midi:
                        lowest = p
            except Exception:
                continue
        return self.normalize_note_name(lowest.name) if lowest else None

    def find_best_chord_for_bar(self, bar_note_names, bar_objs, chord_candidates, prev_chord=None,  bar_index=0, total_bars=1):
        print(f"\n================ BAR {bar_index+1} ================")
        print("Notes:", bar_note_names)
        print("Prev chord:", prev_chord)
        print("-----------------------------------------------")

        bar_note_names = [n for n in bar_note_names if n is not None]

        try:
            if bar_objs:
                for el in bar_objs:
                    if el is None:
                        continue
                    # music21 chord symbol
                    if isinstance(el, music21.harmony.ChordSymbol):
                        figure = getattr(el, 'figure', None)
                        if figure:
                            norm_fig = figure.strip()
                            print(f"Manual chord symbol found in bar: {norm_fig} -> using it.")
                            return norm_fig

                    if hasattr(el, 'manual_chord'):
                        mc = getattr(el, 'manual_chord')
                        if mc:
                            print(f"Manual chord annotation found in bar: {mc} -> using it.")
                            return mc
        except Exception:
            pass

        note_occurrences = []   # list of dicts: {'name':str, 'dur':float, 'midi':int}
        try:
            for el in (bar_objs or []):
                if el is None:
                    continue
                if isinstance(el, music21.note.Rest):
                    continue
                if isinstance(el, music21.note.Note):
                    name = self.normalize_note_name(el.pitch.name)
                    dur = getattr(el.duration, "quarterLength", 1.0) or 1.0
                    midi = getattr(el.pitch, "midi", None)
                    note_occurrences.append({'name': name, 'dur': float(dur), 'midi': midi})
                elif isinstance(el, music21.chord.Chord):
                    dur = getattr(el.duration, "quarterLength", 1.0) or 1.0
                    # for chords, add each pitch as separate occurrence
                    for p in el.pitches:
                        name = self.normalize_note_name(p.name)
                        try:
                            midi = p.midi
                        except Exception:
                            midi = None
                        note_occurrences.append({'name': name, 'dur': float(dur), 'midi': midi})
        except Exception:
            note_occurrences = [{'name': n, 'dur': 1.0, 'midi': None} for n in bar_note_names]

        # If bar_objs missing, get bar_note_names
        if not note_occurrences and bar_note_names:
            note_occurrences = [{'name': n, 'dur': 1.0, 'midi': None} for n in bar_note_names]

        total_note_dur = sum(n['dur'] for n in note_occurrences) or 1.0

        # Find melody and bass note
        top_note = None
        lowest_midi = None
        for n in note_occurrences:
            if n['midi'] is not None:
                if top_note is None or (n['midi'] > top_note['midi']):
                    top_note = n
                if lowest_midi is None or n['midi'] < lowest_midi:
                    lowest_midi = n['midi']
        if top_note is None and note_occurrences:
            top_note = note_occurrences[-1]

        bass = self.get_bass_of_bar(bar_objs)
        print(f"(computed) bass from bar objects = {bass}")

        # 1) Base matching with configurable weights
        scores = {}
        max_base = -1e9
        print(">> BASE SCORE (duration-weighted tone matches)")

        for chord, chord_notes in chord_candidates.items():
            root = chord_notes[0] if len(chord_notes) > 0 else None
            third = chord_notes[1] if len(chord_notes) > 1 else None
            fifth = chord_notes[2] if len(chord_notes) > 2 else None
            extensions = chord_notes[3:] if len(chord_notes) > 3 else []

            base = 0.0
            tone_weights = {
                root: self.weights['root_in_bass'] if root else -self.weights['short_note_penalty'],
                third: self.weights['third_in_bass'] if third else -self.weights['short_note_penalty'],
                fifth: self.weights['fifth_in_bass'] if fifth else -self.weights['short_note_penalty']
            }

            # small weight for extensions
            ext_weight = 0.8 * self.weights['base_match']

            for occ in note_occurrences:
                name = occ['name']
                dur = occ['dur']
                dur_factor = min(dur / total_note_dur * 4.0, 2.0) * self.weights['duration_factor']
                # reduce influence of very short notes
                if dur < 0.25:
                    dur_factor *= 0.45

                if name in tone_weights and tone_weights[name] > 0:
                    base += tone_weights[name] * dur_factor
                elif name in extensions:
                    base += ext_weight * dur_factor
                else:
                    # small penalty for extraneous notes
                    base -= self.weights['short_note_penalty'] * dur_factor

            # penalty if chord has zero intersection with bar notes
            intersection = set([n['name'] for n in note_occurrences]) & set([root, third, fifth] + extensions)
            if not intersection:
                base -= 2.0 * self.weights['base_match']

            scores[chord] = {
                'base': base, 'root': root, 'third': third, 'fifth': fifth,
                'extensions': extensions
            }
            print(f"  {chord:<8} root={str(root):<3} third={str(third):<3} fifth={str(fifth):<3} -> base={base:.3f}")
            if base > max_base:
                max_base = base

        # 2) Candidate set with tolerance
        tol = 1.0 * self.weights['base_match']
        candidates = [c for c, s in scores.items() if s['base'] >= max_base - tol]
        if not candidates:
            candidates = list(scores.keys())

        print("\n>> CHORDS IN TOLERANCE (max_base =", max_base, ")")
        for c in candidates:
            print("  ", c)

        # 3) strong-tone & bass scoring with configurable weights
        print("\n>> STRONG + BASS SCORE (bass =", bass, ")")
        for c in candidates:
            s = scores[c]
            st = 0.0

            # count weighted occurrences of root/third/fifth across note_occurrences
            for occ in note_occurrences:
                name = occ['name']
                dur = occ['dur']
                dur_factor = min(dur, 2.0) * self.weights['duration_factor']
                if name == s['root']:
                    st += self.weights['root_in_bass'] * dur_factor
                elif name == s['third']:
                    st += self.weights['third_in_bass'] * dur_factor
                elif name == s['fifth']:
                    st += self.weights['fifth_in_bass'] * dur_factor
                elif name in s.get('extensions', []):
                    st += 0.8 * self.weights['base_match'] * dur_factor

            # bass match bonus
            if bass and s['root'] and bass == s['root']:
                st += self.weights['bass_match']
            elif bass and s['fifth'] and bass == s['fifth']:
                st += self.weights['fifth_in_bass'] * 1.5
            elif bass and s['third'] and bass == s['third']:
                st += self.weights['third_in_bass'] * 1.2

            # melody bonus
            top_bonus = 0.0
            if top_note:
                top_name = top_note['name']
                if top_name == s['root']:
                    top_bonus += self.weights['top_note_root']
                elif top_name == s['third']:
                    top_bonus += self.weights['top_note_third']
                elif top_name == s['fifth']:
                    top_bonus += self.weights['top_note_fifth']
                elif top_name in s.get('extensions', []):
                    top_bonus += 0.6 * self.weights['base_match']
            st += top_bonus

            scores[c]['strong'] = st
            print(f"  {c:<8} strong={st:.3f} (top_bonus={top_bonus:.2f})")

        # 4) progression/context scoring
        print("\n>> PROGRESSION SCORE")
        common_pairs = set()
        for prog in DATABASE.get("COMMON_CHORD_PROGRESSIONS", []):
            for i in range(len(prog) - 1):
                common_pairs.add((prog[i], prog[i+1]))

        for c in candidates:
            prog = 0.0
            if prev_chord:
                prev_deg = self.chord_degree(prev_chord, self.detected_key)
                curr_deg = self.chord_degree(c, self.detected_key)
                if prev_deg and curr_deg:
                    if prev_deg == 5 and curr_deg == 1:
                        prog += self.weights['cadence_5_1']
                    if prev_deg == 4 and curr_deg == 1:
                        prog += self.weights['cadence_4_1']
                    if prev_deg == 5 and curr_deg == 6:
                        prog += 0.8 * self.weights['base_match']
                    if (prev_deg, curr_deg) in common_pairs:
                        prog += 2.0 * self.weights['base_match']
                    if abs(prev_deg - curr_deg) == 1:
                        prog += 0.6 * self.weights['base_match']
                    if abs(prev_deg - curr_deg) > 3:
                        prog -= 0.8 * self.weights['base_match']

            scores[c]['prog'] = prog
            print(f"  {c:<8} prog={prog:.3f}")

        # 5) first/last
        print("\n>> SPECIAL (FIRST/LAST BAR / CADENCE)")

        for c in candidates:
            special = 0.0
            deg = self.chord_degree(c, self.detected_key)

            if bar_index == 0:
                if deg == 1:
                    special += self.weights['first_bar_tonic']
                elif deg in (3, 5):
                    special += 1.5 * self.weights['base_match']

            # last bar: strong tonic bias (cadence)
            if bar_index == total_bars - 1:
                if deg == 1:
                    special += self.weights['last_bar_tonic']
                elif deg == 5:
                    special += 4.0 * self.weights['base_match']

            # penalty highly unlikely cadence
            if bar_index == total_bars - 1 and deg in (2, 3):
                special -= 1.0 * self.weights['base_match']

            scores[c]['special'] = special
            print(f"  {c:<8} special={special:.3f}")

        # 6) Non-harmonic / passing tone penalty
        short_notes = sum(1 for occ in note_occurrences if occ['dur'] < 0.25)
        short_penalty = short_notes * self.weights['short_note_penalty']
        if short_notes:
            print(f"\n>> NON-HARMONIC INFO: short_notes={short_notes} penalty={short_penalty:.2f}")
        else:
            print("\n>> NON-HARMONIC INFO: none detected")

        # 7) total score
        print("\n>> TOTAL SCORE")
        best = None
        best_total = -1e9
        for c in candidates:
            total = scores[c]['base'] + scores[c]['strong'] + scores[c]['prog'] + scores[c]['special']

            # subtract penalty if chord relies mainly on short notes
            long_match_weight = 0.0
            for occ in note_occurrences:
                if occ['dur'] >= 0.25:
                    name = occ['name']
                    if name in [scores[c]['root'], scores[c]['third'], scores[c]['fifth']] or name in scores[c].get('extensions', []):
                        long_match_weight += min(occ['dur'], 2.0)

            if long_match_weight < 0.5 and scores[c]['base'] > 0:
                total -= 1.5 * self.weights['base_match']

            if short_notes:
                total -= short_penalty * (1.0 if long_match_weight < 1.0 else 0.2)

            scores[c]['total'] = total
            print(f"  {c:<8} total={total:.3f} (base={scores[c]['base']:.3f} strong={scores[c]['strong']:.3f} prog={scores[c]['prog']:.3f} special={scores[c]['special']:.3f})")

            if total > best_total:
                best_total = total
                best = c

        # 8) tie-breaker
        ties = [c for c in candidates if abs(scores[c]['total'] - best_total) < 1e-9]
        if len(ties) > 1:
            ties.sort(key=lambda x: (
                1 if (bass and scores[x]['root'] and bass == scores[x]['root']) else 0,
                scores[x]['strong'],
                scores[x]['base']
            ), reverse=True)
            best = ties[0]

        print("\n>>> SELECTED CHORD:", best)
        print("===================================================")
        return best

    def analyze_chord_progression(self):
        print("=== MUSIC ANALYSIS ===")

        # 1) detect key
        detected_key = self.analyze_key_signature()
        print(f"Detected key from analysis: {detected_key}")

        if self.key_signature and self.key_signature != "None" and self.key_signature in DATABASE["MAJOR_KEYS"]:
            final_key = self.key_signature
            print(f"Using key signature from file: {final_key}")
        else:
            final_key = detected_key
            print(f"Using analyzed key: {final_key}")

        # 2) check major/minor
        final_key = self.check_major_minor(final_key, self.all_notes)
        self.detected_key = final_key  # ensure stored

        # 3) chord candidates
        chord_candidates = self.get_chord_candidates(final_key)
        print(f"Chord candidates for {final_key}: {list(chord_candidates.keys())}")

        # 4) analyze per bar
        chord_progression = []
        prev_chord = None
        total_bars = len(self.bars_notes)

        print("\n=== BAR-BY-BAR ANALYSIS ===")
        for i, (bar_names, bar_objs) in enumerate(zip(self.bars_notes, self.bars_notes_objs)):
            bar_note_list = [entry["name"] for entry in bar_names if entry["name"] is not None]
            best = self.find_best_chord_for_bar(bar_note_list, bar_objs, chord_candidates, prev_chord=prev_chord, bar_index=i, total_bars=total_bars)
            chord_progression.append(best)
            prev_chord = best

            chord_notes = chord_candidates.get(best, [])
            match_count = sum(1 for note in bar_note_list if note in chord_notes)
            print(f"Bar {i+1}: Notes {bar_note_list}")
            print(f"         -> Chord: {best} {chord_notes} (match: {match_count}/{len(bar_note_list)} notes)")

        # Roman numerals
        if final_key in DATABASE["CHORDS_BY_KEY"]:
            chords_in_key = DATABASE["CHORDS_BY_KEY"][final_key]
        else:
            relative_major = None
            for major, minor in DATABASE["MINOR_KEYS"].items():
                if minor == final_key:
                    relative_major = major
                    break
            chords_in_key = DATABASE["CHORDS_BY_KEY"].get(relative_major, DATABASE["CHORDS_BY_KEY"]["C"])

        roman_numerals = []
        for ch in chord_progression:
            if ch in chords_in_key:
                pos = chords_in_key.index(ch) + 1
                roman = {1: "I", 2: "ii", 3: "iii", 4: "IV", 5: "V", 6: "vi", 7: "vii°"}.get(pos, "?")
                roman_numerals.append(roman)
            else:
                roman_numerals.append("?")

        print(f"\n=== FINAL RESULTS ===")
        print(f"Key: {final_key}")
        print(f"Chord Progression: {' - '.join(chord_progression)}")
        print(f"Roman Numerals: {' - '.join(roman_numerals)}")

        return chord_progression

In [50]:
import tkinter as tk
from tkinter import filedialog

def pick_file():
    root = tk.Tk()
    root.withdraw()
    root.attributes("-topmost", True)

    file_path = filedialog.askopenfilename(
        title="Select MusicXML File",
        filetypes=[("MusicXML files", "*.musicxml *.xml")]
    )
    return file_path

def main():
    print("Select a MusicXML file...")
    musicxml_file = pick_file()

    if not musicxml_file:
        print("No file selected. Exiting...")
        return

    print(f"Selected file: {musicxml_file}")

    try:
        print("Starting Music Analysis...")
        analyzer = ChordAnalyzer(musicxml_file)
        chords = analyzer.analyze_chord_progression()
        print(f"\nFinal chord list: {chords}")
        return chords

    except Exception as e:
        print(f"Analysis failed: {e}")
        raise

if __name__ == "__main__":
    main()


Select a MusicXML file...
Selected file: C:/Users/asus/Documents/MuseScore3/Scores/jinggle_bells_piano.musicxml
Starting Music Analysis...
Loading MusicXML file: C:/Users/asus/Documents/MuseScore3/Scores/jinggle_bells_piano.musicxml
Successfully loaded file. Found 48 bars.
=== MUSIC ANALYSIS ===
[KEY SIGNATURE] Found signature 0 sharps → returning C
Detected key from analysis: C
Using analyzed key: C
Chord candidates for C: ['C', 'Dm', 'Em', 'F', 'G', 'Am', 'Bdim']

=== BAR-BY-BAR ANALYSIS ===

Notes: ['C', 'E', 'G', 'E', 'E', 'E', 'C', 'E', 'G', 'C']
Prev chord: None
-----------------------------------------------
Manual chord symbol found in bar: C -> using it.
Bar 1: Notes ['C', 'E', 'G', 'E', 'E', 'E', 'C', 'E', 'G', 'C']
         -> Chord: C ['C', 'E', 'G'] (match: 10/10 notes)

Notes: ['C', 'E', 'G', 'E', 'E', 'E', 'C', 'E', 'G', 'C']
Prev chord: C
-----------------------------------------------
Manual chord symbol found in bar: C -> using it.
Bar 2: Notes ['C', 'E', 'G', 'E', 'E