In [17]:
import os
import pretty_midi
import numpy as np


In [2]:
# Path to the directory containing MIDI files
base_dir = './Composer_Dataset/NN_midi_files_extended/train'

# List of folders (composers) within the base directory
composer_folders = os.listdir(base_dir)

# Initialize a list to store all loaded MIDI data
all_midi_data = []

# Loop through each composer folder
for composer_folder in composer_folders:
    composer_path = os.path.join(base_dir, composer_folder)
    
    # Check if the item in the base directory is a folder
    if os.path.isdir(composer_path):
        print(f"Loading MIDI files from {composer_folder}...")
        
        # Loop through each MIDI file in the composer folder
        for file_name in os.listdir(composer_path):
            file_path = os.path.join(composer_path, file_name)
            
            # Check if the item in the composer folder is a file and has a .mid or .midi extension
            if os.path.isfile(file_path) and (file_name.endswith('.mid') or file_name.endswith('.midi')):
                try:
                    midi_data = pretty_midi.PrettyMIDI(file_path)
                    all_midi_data.append({
                        'composer': composer_folder,
                        'file_name': file_name,
                        'midi_data': midi_data
                    })
                    print(f"  Loaded: {file_name}")
                except Exception as e:
                    print(f"  Error loading {file_name}: {str(e)}")

print("All MIDI files loaded successfully!")

Loading MIDI files from mozart...
  Loaded: mozart048.mid




  Loaded: mozart028.mid
  Loaded: mozart000.mid
  Loaded: mozart001.mid
  Loaded: mozart015.mid
  Loaded: mozart029.mid
  Loaded: mozart003.mid
  Loaded: mozart017.mid
  Loaded: mozart016.mid
  Loaded: mozart002.mid
  Loaded: mozart006.mid
  Loaded: mozart012.mid
  Loaded: mozart013.mid
  Loaded: mozart007.mid
  Loaded: mozart011.mid
  Loaded: mozart005.mid
  Loaded: mozart010.mid
  Loaded: mozart009.mid
  Loaded: mozart021.mid
  Loaded: mozart034.mid
  Loaded: mozart008.mid
  Loaded: mozart022.mid
  Loaded: mozart036.mid
  Loaded: mozart037.mid
  Loaded: mozart023.mid
  Loaded: mozart027.mid
  Loaded: mozart033.mid
  Loaded: mozart032.mid
  Loaded: mozart026.mid
  Loaded: mozart030.mid
  Loaded: mozart024.mid
  Loaded: mozart018.mid
  Loaded: mozart019.mid
  Loaded: mozart031.mid
  Loaded: mozart042.mid
  Loaded: mozart043.mid
  Loaded: mozart041.mid
  Loaded: mozart044.mid
  Loaded: mozart045.mid
  Loaded: mozart047.mid
  Loaded: mozart046.mid
Loading MIDI files from chopin...
  Load

In [15]:
# Example of extracing midi file metadata
composer = all_midi_data[0]['composer']
print(f"Composer (from all_midi_data[0]): {composer}")


Composer (from all_midi_data[0]): mozart


In [18]:
# Initialize lists to store extracted features
all_note_events = []
all_velocities = []
all_durations = []
all_instruments = []
all_composers = []

# Loop through each composer folder
for composer_folder in composer_folders:
    composer_path = os.path.join(base_dir, composer_folder)
    
    # Check if the item in the base directory is a folder
    if os.path.isdir(composer_path):
        print(f"Loading and processing MIDI files from {composer_folder}...")
        
        # Loop through each MIDI file in the composer folder
        for file_name in os.listdir(composer_path):
            file_path = os.path.join(composer_path, file_name)
            
            # Check if the item in the composer folder is a file and has a .mid or .midi extension
            if os.path.isfile(file_path) and (file_name.endswith('.mid') or file_name.endswith('.midi')):
                try:
                    midi_data = pretty_midi.PrettyMIDI(file_path)
                    
                    # Extract composer (assuming composer name is before the first underscore)
                    composer = file_name.split('_')[0]
                    all_composers.append(composer)
                    
                    # Extract note events, velocities, durations, and instruments
                    note_events = []
                    velocities = []
                    durations = []
                    instruments = []
                    
                    for instrument in midi_data.instruments:
                        for note in instrument.notes:
                            note_events.append(note.pitch)
                            velocities.append(note.velocity)
                            durations.append(note.end - note.start)  # Duration in seconds
                            instruments.append(instrument.program)  # MIDI program number
                    
                    all_note_events.append(note_events)
                    all_velocities.append(velocities)
                    all_durations.append(durations)
                    all_instruments.append(instruments)
                    
                except Exception as e:
                    print(f"  Error processing {file_name}: {str(e)}")

print("MIDI data processing complete!")

# Convert lists to NumPy arrays for further processing
all_note_events = np.array(all_note_events)
all_velocities = np.array(all_velocities)
all_durations = np.array(all_durations)
all_instruments = np.array(all_instruments)
all_composers = np.array(all_composers)

# Example: Print shapes of extracted features
print("Shapes of extracted features:")
print("Note Events:", all_note_events.shape)
print("Velocities:", all_velocities.shape)
print("Durations:", all_durations.shape)
print("Instruments:", all_instruments.shape)
print("Composers:", all_composers.shape)

Loading and processing MIDI files from mozart...
Loading and processing MIDI files from chopin...
Loading and processing MIDI files from handel...
Loading and processing MIDI files from byrd...
Loading and processing MIDI files from schumann...
Loading and processing MIDI files from mendelssohn...
Loading and processing MIDI files from hummel...
Loading and processing MIDI files from bach...
Loading and processing MIDI files from bartok...
MIDI data processing complete!
Shapes of extracted features:
Note Events: (369,)
Velocities: (369,)
Durations: (369,)
Instruments: (369,)
Composers: (369,)


  all_note_events = np.array(all_note_events)
  all_velocities = np.array(all_velocities)
  all_durations = np.array(all_durations)
  all_instruments = np.array(all_instruments)


In [19]:
import os
import pretty_midi
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Path to the directory containing MIDI files
base_dir = './Composer_Dataset/NN_midi_files_extended/train'

# List of folders (composers) within the base directory
composer_folders = os.listdir(base_dir)

# Initialize lists to store extracted features and labels
x_training_data = []
y_training_data = []

# Encoder for composer labels
label_encoder = LabelEncoder()

# Loop through each composer folder
for composer_folder in composer_folders:
    composer_path = os.path.join(base_dir, composer_folder)
    
    # Check if the item in the base directory is a folder
    if os.path.isdir(composer_path):
        print(f"Loading MIDI files from {composer_folder}...")
        
        # Loop through each MIDI file in the composer folder
        for file_name in os.listdir(composer_path):
            file_path = os.path.join(composer_path, file_name)
            
            # Check if the item in the composer folder is a file and has a .mid or .midi extension
            if os.path.isfile(file_path) and (file_name.endswith('.mid') or file_name.endswith('.midi')):
                try:
                    midi_data = pretty_midi.PrettyMIDI(file_path)
                    
                    # Extract musical features (example: note events)
                    note_events = []
                    for instrument in midi_data.instruments:
                        for note in instrument.notes:
                            note_events.append(note.pitch)
                    
                    # Append to x_training_data (assuming fixed sequence length)
                    # Adjust sequence length based on your needs
                    sequence_length = 100  # Example sequence length
                    if len(note_events) >= sequence_length:
                        x_training_data.append(note_events[:sequence_length])
                        y_training_data.append(composer_folder)  # Use folder name as label
                    
                except Exception as e:
                    print(f"  Error loading {file_name}: {str(e)}")

# Convert lists to numpy arrays
x_training_data = np.array(x_training_data)
y_training_data = np.array(y_training_data)

# Encode composer labels
y_training_data_encoded = label_encoder.fit_transform(y_training_data)

# Example: Print shapes of extracted features and labels
print("Shapes:")
print("x_training_data:", x_training_data.shape)
print("y_training_data_encoded:", y_training_data_encoded.shape)


Loading MIDI files from mozart...




Loading MIDI files from chopin...
Loading MIDI files from handel...
Loading MIDI files from byrd...
Loading MIDI files from schumann...
Loading MIDI files from mendelssohn...
Loading MIDI files from hummel...
Loading MIDI files from bach...
Loading MIDI files from bartok...
Shapes:
x_training_data: (369, 100)
y_training_data_encoded: (369,)


In [20]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [21]:
# Define RNN model
model = Sequential([
    LSTM(units=100, return_sequences=True, input_shape=(x_training_data.shape[1], x_training_data.shape[2])),
    Dropout(0.2),
    LSTM(units=100),
    Dropout(0.2),
    Dense(units=y_training_data.shape[1], activation='softmax')
])

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(x_training_data, y_training_data, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate model
# Assuming x_test_data and y_test_data are your test set
# Replace with your actual test data
loss, accuracy = model.evaluate(x_test_data, y_test_data)
print(f'Test accuracy: {accuracy}')

# Make predictions
# Assuming x_new_data is new MIDI data for prediction
# Replace with your actual new data
predictions = model.predict(x_new_data)

IndexError: tuple index out of range