In [3]:
import json
import os
from collections import defaultdict

import numpy as np
import matplotlib.pyplot as plt

from music21 import *
from tqdm import tqdm
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [6]:
def search_for_centos(musicxml_file: str, centos_list: list):
    """
    Search for given melodic centos in a MusicXML file and retrieve their occurrences.

    Parameters:
    - musicxml_file (str): Path to the MusicXML file.
    - centos_list (list): List of melodic centos represented as lists of pitch names or note objects.

    Returns:
    - list: List of dictionaries, each containing information about a found cento, its position, melodic contour, rhythmic values, and other details.
    """
    # Parse the MusicXML file
    score = converter.parse(musicxml_file)

    # List to store information about found centos
    found_centos_info = []

    # Iterate over parts in the score
    for part in score.parts:
        # Iterate over measures in the part
        for measure_number, measure in enumerate(part.getElementsByClass('Measure'), start=1):
            # Extract notes from the measure
            notes = measure.flat.getElementsByClass("Note")

            # Iterate over each cento in the centos_list
            for cento in centos_list:
                cento_length = len(cento)

                # Iterate over indices to search for the cento
                for i in range(len(notes) - cento_length + 1):
                    # Check if the current sequence of notes matches the cento
                    if [note.pitch.nameWithOctave for note in notes[i:i + cento_length]] == cento:
                        # Extract melodic and rhythmic information
                        melodic_contour = cento
                        rhythmic_values = [note.quarterLength for note in notes[i:i + cento_length]]

                        # Store information about the found cento
                        cento_info = {
                            'cento': cento,
                            'position': {
                                'measure': measure_number,
                                'start_index': i,
                                'end_index': i + cento_length - 1
                            },
                            'melodic_contour': melodic_contour,
                            'rhythmic_values': rhythmic_values,
                        }
                        found_centos_info.append(cento_info)

    return found_centos_info

In [7]:
sample_centos_list = [
    ['C4', 'D4', 'E4'],        # Ascending scale fragment
    ['G4', 'F4', 'E4'],        # Descending scale fragment
    ['A4', 'B4', 'A4'],        # Simple repeated pitch sequence
    ['F4', 'G4', 'A4', 'G4'],  # Simple ascending-descending pattern
    ['D4', 'E4', 'D4'],        # Repeated pitch with step
    # Add more centos as needed
]

In [11]:
found_centos_info=search_for_centos("/Users/oriolcolomefont/Documents/AMPLAB/Module 2/assignment/tests/ArabAndalusianSelectedScores-Annotations/7448d9c2-5261-4e70-bd98-6ed8416f908f.xml",sample_centos_list)
found_centos_info

[{'cento': ['G4', 'F4', 'E4'],
  'position': {'measure': 10, 'start_index': 0, 'end_index': 2},
  'melodic_contour': ['G4', 'F4', 'E4'],
  'rhythmic_values': [0.5, 0.5, 2.0]},
 {'cento': ['G4', 'F4', 'E4'],
  'position': {'measure': 26, 'start_index': 0, 'end_index': 2},
  'melodic_contour': ['G4', 'F4', 'E4'],
  'rhythmic_values': [2.0, 0.5, 0.5]},
 {'cento': ['G4', 'F4', 'E4'],
  'position': {'measure': 27, 'start_index': 5, 'end_index': 7},
  'melodic_contour': ['G4', 'F4', 'E4'],
  'rhythmic_values': [0.5, 0.5, 0.5]},
 {'cento': ['G4', 'F4', 'E4'],
  'position': {'measure': 40, 'start_index': 0, 'end_index': 2},
  'melodic_contour': ['G4', 'F4', 'E4'],
  'rhythmic_values': [0.5, 0.5, 2.0]},
 {'cento': ['G4', 'F4', 'E4'],
  'position': {'measure': 59, 'start_index': 0, 'end_index': 2},
  'melodic_contour': ['G4', 'F4', 'E4'],
  'rhythmic_values': [0.5, 0.5, 2.0]},
 {'cento': ['C4', 'D4', 'E4'],
  'position': {'measure': 71, 'start_index': 1, 'end_index': 3},
  'melodic_contour': ['C

In [17]:
def analyze_repetition(found_centos_info: list):
    """
    Analyze repetition information for each found cento in the provided list.

    Parameters:
    - found_centos_info (list): List of dictionaries, each containing information about a found cento.

    Returns:
    - dict: Dictionary containing repetition information for each cento.
    """
    repetition_info = {}

    for cento_info in found_centos_info:
        cento_key = (tuple(cento_info['melodic_contour']), tuple(cento_info['rhythmic_values']))

        # Check if the cento is already in the dictionary
        if cento_key in repetition_info:
            repetition_info[cento_key]['occurrences'].append(cento_info['position'])
        else:
            repetition_info[cento_key] = {
                'occurrences': [cento_info['position']],
                'melodic_contour': cento_info['melodic_contour'],
                'rhythmic_values': cento_info['rhythmic_values'],
                # Add any other relevant details here
            }

    return repetition_info

In [18]:
repetition_info = analyze_repetition(found_centos_info)
repetition_info

{(('G4', 'F4', 'E4'),
  (0.5,
   0.5,
   2.0)): {'occurrences': [{'measure': 10,
    'start_index': 0,
    'end_index': 2}, {'measure': 40, 'start_index': 0, 'end_index': 2}, {'measure': 59,
    'start_index': 0,
    'end_index': 2}], 'melodic_contour': ['G4',
   'F4',
   'E4'], 'rhythmic_values': [0.5, 0.5, 2.0]},
 (('G4', 'F4', 'E4'),
  (2.0,
   0.5,
   0.5)): {'occurrences': [{'measure': 26,
    'start_index': 0,
    'end_index': 2}], 'melodic_contour': ['G4',
   'F4',
   'E4'], 'rhythmic_values': [2.0, 0.5, 0.5]},
 (('G4', 'F4', 'E4'),
  (0.5,
   0.5,
   0.5)): {'occurrences': [{'measure': 27,
    'start_index': 5,
    'end_index': 7}, {'measure': 286, 'start_index': 8, 'end_index': 10}, {'measure': 288,
    'start_index': 8,
    'end_index': 10}, {'measure': 289,
    'start_index': 7,
    'end_index': 9}, {'measure': 310, 'start_index': 8, 'end_index': 10}, {'measure': 312,
    'start_index': 8,
    'end_index': 10}, {'measure': 313,
    'start_index': 7,
    'end_index': 9}, {'me

In [14]:
keys = repetition_info.keys()
keys

dict_keys([(('G4', 'F4', 'E4'), (0.5, 0.5, 2.0)), (('G4', 'F4', 'E4'), (2.0, 0.5, 0.5)), (('G4', 'F4', 'E4'), (0.5, 0.5, 0.5)), (('C4', 'D4', 'E4'), (0.5, 0.5, 0.5)), (('G4', 'F4', 'E4'), (0.25, 0.25, 0.25)), (('F4', 'G4', 'A4', 'G4'), (0.25, 0.25, 0.25, 0.25)), (('G4', 'F4', 'E4'), (0.25, 0.25, 0.5)), (('F4', 'G4', 'A4', 'G4'), (2.0, 0.25, 0.25, 0.25)), (('G4', 'F4', 'E4'), (0.5, 0.5, 1.5)), (('D4', 'E4', 'D4'), (0.25, 0.25, 0.25)), (('G4', 'F4', 'E4'), (0.5, 1.0, 1.0)), (('D4', 'E4', 'D4'), (0.5, 0.5, 0.5)), (('C4', 'D4', 'E4'), (1.0, 0.5, 0.25)), (('C4', 'D4', 'E4'), (0.5, 0.5, 0.75)), (('F4', 'G4', 'A4', 'G4'), (0.5, 0.5, 1.0, 0.5)), (('C4', 'D4', 'E4'), (0.5, 0.5, 0.25)), (('G4', 'F4', 'E4'), (0.5, 0.25, 0.25)), (('G4', 'F4', 'E4'), (0.5, 0.5, 0.25)), (('C4', 'D4', 'E4'), (1.0, 1.0, 1.0)), (('G4', 'F4', 'E4'), (0.25, 0.25, 1.0)), (('G4', 'F4', 'E4'), (1.0, 0.5, 0.5)), (('G4', 'F4', 'E4'), (1.0, 1.0, 1.0)), (('G4', 'F4', 'E4'), (0.5, 0.5, 1.0))])