In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
bach_path = '/content/drive/MyDrive/jsb_chorales/train'

In [None]:
non_path = '/content/drive/MyDrive/extracted_data'

In [None]:
from music21 import converter
import numpy as np
import pandas as pd
import os

def extract_features_from_midi(midi_file_path):
    midi_data = converter.parse(midi_file_path)
    notes = []

    for part in midi_data.recurse().getElementsByClass('Note'):
        notes.append(part.pitch.midi)

    features = []
    for i in range(2, len(notes)):
        last_three_notes = notes[i-2:i+1]
        intervals = np.diff(last_three_notes)
        avg_interval = np.mean(intervals) if len(intervals) > 0 else 0
        features.append([last_three_notes[0], last_three_notes[1], last_three_notes[2], avg_interval])

    return features

extracted_data_path = non_path

all_features = []

for root, dirs, files in os.walk(extracted_data_path):
    for file in files:
        if file.endswith(('.mid', '.midi')):
            file_path = os.path.join(root, file)
            file_features = extract_features_from_midi(file_path)
            all_features.extend(file_features)

non_bach_df = pd.DataFrame(all_features, columns=['note_minus_2', 'note_minus_1', 'current_note', 'avg_interval'])





In [None]:
non_bach_df['note_minus_2'] = non_bach_df['note_minus_2'] - 24
non_bach_df['note_minus_1'] = non_bach_df['note_minus_1'] - 24
non_bach_df['current_note'] = non_bach_df['current_note'] - 24

# Ensure that no note values are below 0 after transposition
non_bach_df = non_bach_df[(non_bach_df['note_minus_2'] >= 0) &
                          (non_bach_df['note_minus_1'] >= 0) &
                          (non_bach_df['current_note'] >= 0)]

# Save the updated DataFrame back to CSV if needed
non_bach_df.to_csv('non_bach_features.csv', index=False)

In [None]:
non_bach_df

Unnamed: 0,note_minus_2,note_minus_1,current_note,avg_interval,label
0,21,16,9,-6.0,0
1,16,9,4,-6.0,0
2,9,4,15,3.0,0
3,4,15,28,12.0,0
4,15,28,24,4.5,0
...,...,...,...,...,...
25939,21,24,20,-0.5,0
25940,24,20,23,-0.5,0
25941,7,3,2,-2.5,0
25942,3,2,3,0.0,0


In [None]:
import pandas as pd
import numpy as np

bach_file_path = 'preprocessed_chorales.csv'

bach_df = pd.read_csv(bach_file_path)
bach_df = bach_df[bach_df['note'] >= 1]

notes = bach_df['note'].tolist()

features = []

for i in range(2, len(notes)):
    last_three_notes = notes[i-2:i+1]
    intervals = np.diff(last_three_notes)
    avg_interval = np.mean(intervals) if len(intervals) > 0 else 0
    features.append([last_three_notes[0], last_three_notes[1], last_three_notes[2], avg_interval])

bach_features_df = pd.DataFrame(features, columns=['note_minus_2', 'note_minus_1', 'current_note', 'avg_interval'])

bach_features_df.to_csv('bach_features.csv', index=False)


In [None]:
bach_features_df = bach_features_df.iloc[:len(non_bach_df)]

In [None]:
bach_features_df.to_csv('bach_features.csv', index=False)

In [None]:
bach_features_df = bach_features_df.iloc[:len(non_bach_df)]

In [None]:
bach_df = pd.read_csv('bach_features.csv')
bach_df['label'] = 1

non_bach_df = pd.read_csv('non_bach_features.csv')
non_bach_df['label'] = 0


combined_df = pd.concat([bach_df, non_bach_df])


# Save the reduced dataset to a new CSV file if needed
combined_df.to_csv('combined_dataset.csv', index=False)


