In [3]:
import tensorflow as tf

# Check available gpus

gpus = tf.config.list_logical_devices('GPU')

print(len(gpus))

1


In [4]:
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 27 15:53:38 2024

@author: bplse
"""

import os
import numpy as np
import pretty_midi
from sklearn.model_selection import train_test_split

# Define paths and parameters
data_directory = './/Composer_Dataset//Composer_Dataset//NN_midi_files_extended//train'
fs = 5  # Sampling frequency for piano rolls
max_length = 100  # Max number of timesteps
number_of_composers = 1  # Adjust based on your dataset

# Load and process MIDI files into piano rolls
def midi_to_piano_roll(midi_path):
    midi = pretty_midi.PrettyMIDI(midi_path)
    piano_roll = midi.get_piano_roll(fs=fs)
    # Transpose to make time steps first dimension
    #piano_roll = np.transpose(piano_roll)
    # Truncate or pad the piano roll to max_length
   # piano_roll = piano_roll[:max_length, :] if piano_roll.shape[0] > max_length else np.pad(piano_roll, ((0, max_length - piano_roll.shape[0]), (0,0)), 'constant')
    return piano_roll

def preprocess_piano_roll(piano_roll):
    # Truncate or pad the piano roll to a fixed size
    piano_roll = piano_roll[:, :max_length] if piano_roll.shape[1] > max_length else np.pad(piano_roll, ((0,0), (0, max_length - piano_roll.shape[1])), 'constant')
    return piano_roll

def load_data(directory):
    data = []
    labels = []
    label_map = {}
    for label, composer in enumerate(sorted(os.listdir(directory))):
        composer_path = os.path.join(directory, composer)
        if os.path.isdir(composer_path):
            label_map[label] = composer
            for midi_file in os.listdir(composer_path):
                if midi_file.endswith('.mid'):
                    path = os.path.join(composer_path, midi_file)
                    piano_roll = midi_to_piano_roll(path)
                    # Prepare data for LSTM (flattening the piano roll)
                    piano_roll_lstm = preprocess_piano_roll(piano_roll).flatten()
                    data.append(piano_roll_lstm)
                    labels.append(label)
    return np.array(data), np.array(labels), label_map


data, labels, label_map = load_data(data_directory)


'''
# Reshape data for LSTM and CNN models
data_lstm = data.reshape(data.shape[0], 128* max_length)  # For LSTM
#data_cnn = data[..., np.newaxis]  # For CNN, adding a channel dimension

# Split data into train and test sets
X_train_lstm, X_test_lstm, y_train, y_test = train_test_split(data_lstm, labels, test_size=0.2, random_state=42)
#X_train_cnn, X_test_cnn, _, _ = train_test_split(data_cnn, labels, test_size=0.2, random_state=42)

# LSTM Model
def build_lstm_model():
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(128* max_length,1)),
        tf.keras.layers.LSTM(64, return_sequences=True),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(number_of_composers, activation='softmax')
    ])
    return model

# CNN Model
def build_cnn_model():
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(max_length, 128, 1)),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(number_of_composers, activation='softmax')
    ])
    return model

# Build and compile models
lstm_model = build_lstm_model()
#cnn_model = build_cnn_model()
lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
'''




"\n# Reshape data for LSTM and CNN models\ndata_lstm = data.reshape(data.shape[0], 128* max_length)  # For LSTM\n#data_cnn = data[..., np.newaxis]  # For CNN, adding a channel dimension\n\n# Split data into train and test sets\nX_train_lstm, X_test_lstm, y_train, y_test = train_test_split(data_lstm, labels, test_size=0.2, random_state=42)\n#X_train_cnn, X_test_cnn, _, _ = train_test_split(data_cnn, labels, test_size=0.2, random_state=42)\n\n# LSTM Model\ndef build_lstm_model():\n    model = tf.keras.Sequential([\n        tf.keras.layers.InputLayer(input_shape=(128* max_length,1)),\n        tf.keras.layers.LSTM(64, return_sequences=True),\n        tf.keras.layers.LSTM(64),\n        tf.keras.layers.Dense(64, activation='relu'),\n        tf.keras.layers.Dense(number_of_composers, activation='softmax')\n    ])\n    return model\n\n# CNN Model\ndef build_cnn_model():\n    model = tf.keras.Sequential([\n        tf.keras.layers.InputLayer(input_shape=(max_length, 128, 1)),\n        tf.keras.l

In [5]:
import pretty_midi
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Convert MIDI to audio
def midi_to_audio(midi_path):
    midi_data = pretty_midi.PrettyMIDI(midi_path)
    audio_data = midi_data.fluidsynth()
    audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)
    return 44100, audio_data  # Return sample rate and audio data

# Generate a Mel-spectrogram from the audio
def generate_spectrogram(audio_data, sr):
    S = librosa.feature.melspectrogram(y=audio_data, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
    S_DB = librosa.power_to_db(S, ref=np.max)
    return S_DB

# Build the CNN model
def build_model(num_classes):
    model = Sequential([
        Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(128, 431, 1)),  # Adjust input shape based on your spectrogram size
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Example execution function
def process_midi_and_train(midi_paths, labels, num_classes):
    # Assume midi_paths and labels are lists
    data = []
    for path in midi_paths:
        sr, audio_data = midi_to_audio(path)
        spectrogram = generate_spectrogram(audio_data, sr)
        spectrogram = np.expand_dims(spectrogram, axis=-1)  # Add a channel dimension
        data.append(spectrogram)
    
    # Stack data for training
    data = np.array(data)
    
    # Build and train the model
    model = build_model(num_classes)
    model.fit(data, labels, epochs=10, batch_size=1)

# Example MIDI files and labels
'''midi_files = ['path_to_midi1.mid', 'path_to_midi2.mid']
labels = np.array([[1, 0], [0, 1]])  # Example one-hot encoded labels for 2 classes'''




"midi_files = ['path_to_midi1.mid', 'path_to_midi2.mid']\nlabels = np.array([[1, 0], [0, 1]])  # Example one-hot encoded labels for 2 classes"

In [6]:
def cnn_load_data(directory):
    data = []
    labels = []
    label_map = {}
    for label, composer in enumerate(sorted(os.listdir(directory))):
        composer_path = os.path.join(directory, composer)
        if os.path.isdir(composer_path):
            label_map[label] = composer
            for midi_file in os.listdir(composer_path):
                if midi_file.endswith('.mid'):
                    path = os.path.join(composer_path, midi_file)
                    sr, audio_data = midi_to_audio(path)
                    spectrogram = generate_spectrogram(audio_data, sr)
                    spectrogram = np.expand_dims(spectrogram, axis=-1)  # Add a channel dimension
                    data.append(spectrogram)    
                    labels.append(label)
    return np.array(data), np.array(labels), label_map




In [7]:
data, labels, label_map = cnn_load_data(data_directory)


ImportError: fluidsynth() was called but pyfluidsynth is not installed.

In [None]:
# Process MIDI files and train the model
process_midi_and_train(data, label, num_classes=2)