In [1]:
import pandas as pd
import numpy as np

In [2]:
import os
import numpy as np
import pretty_midi
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import pickle

DATASET_PATH = 'Dataset_midi'
SEQUENCE_LENGTH = 50  # Number of time steps

def midi_to_note_sequence(midi_path):
    try:
        midi_data = pretty_midi.PrettyMIDI(midi_path)
        notes = []
        for instrument in midi_data.instruments:
            if not instrument.is_drum:
                for note in instrument.notes:
                    notes.append(note.pitch)
        return sorted(notes)
    except Exception as e:
        print(f"Error with {midi_path}: {e}")
        return []

def process_dataset():
    all_notes = []
    genres = []
    genre_note_sequences = {}

    for genre_folder in os.listdir(DATASET_PATH):
        genre_path = os.path.join(DATASET_PATH, genre_folder)
        if not os.path.isdir(genre_path):
            continue
        genre_notes = []
        for file in os.listdir(genre_path):
            if file.endswith('.mid') or file.endswith('.midi'):
                midi_path = os.path.join(genre_path, file)
                notes = midi_to_note_sequence(midi_path)
                if notes:
                    genre_notes.extend(notes)
                    genres.append(genre_folder)
        genre_note_sequences[genre_folder] = genre_notes
        all_notes.extend(genre_notes)

    # Create a unique note-to-int mapping
    unique_notes = sorted(set(all_notes))
    note_to_int = {note: number for number, note in enumerate(unique_notes)}
    int_to_note = {number: note for note, number in note_to_int.items()}

    X, y, genre_labels = [], [], []

    genre_encoder = LabelEncoder()
    genre_list = list(genre_note_sequences.keys())
    genre_encoder.fit(genre_list)

    for genre, notes in genre_note_sequences.items():
        encoded_notes = [note_to_int[note] for note in notes]
        for i in range(len(encoded_notes) - SEQUENCE_LENGTH):
            X.append(encoded_notes[i:i+SEQUENCE_LENGTH])
            y.append(encoded_notes[i+SEQUENCE_LENGTH])
            genre_labels.append(genre)

    X = np.array(X)
    y = to_categorical(y, num_classes=len(unique_notes))
    genre_encoded = genre_encoder.transform(genre_labels)
    genre_encoded = to_categorical(genre_encoded)

    # Save mappings
    with open('note_mappings.pkl', 'wb') as f:
        pickle.dump((note_to_int, int_to_note, genre_encoder), f)

    return X, genre_encoded, y, len(unique_notes)

if __name__ == "__main__":
    X, genre_labels, y, vocab_size = process_dataset()
    np.savez("music_dataset.npz", X=X, genre=genre_labels, y=y, vocab_size=vocab_size)


In [11]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Concatenate, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

data = np.load("music_dataset.npz", allow_pickle=True)
X = data["X"]
genre = data["genre"]
y = data["y"]
vocab_size = int(data["vocab_size"])


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [12]:
# Inputs
note_input = Input(shape=(X.shape[1],), name="note_input")
genre_input = Input(shape=(genre.shape[1],), name="genre_input")

embedding = Embedding(input_dim=vocab_size, output_dim=100)(note_input)
x = LSTM(256, return_sequences=True)(embedding)
x = LSTM(256)(x)

merged = Concatenate()([x, genre_input])
output = Dense(vocab_size, activation='softmax')(merged)

model = Model(inputs=[note_input, genre_input], outputs=output)
model.compile(loss='categorical_crossentropy', optimizer=Adam(0.001), metrics=['accuracy'])
model.summary()

# Train
model.fit([X, genre], y, epochs=30, batch_size=64)
model.save("genre_music_lstm.h5")

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 note_input (InputLayer)        [(None, 50)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 50, 100)      8100        ['note_input[0][0]']             
                                                                                                  
 lstm (LSTM)                    (None, 50, 256)      365568      ['embedding[0][0]']              
                                                                                                  
 lstm_1 (LSTM)                  (None, 256)          525312      ['lstm[0][0]']                   
                                                                                              

In [13]:
import numpy as np
from tensorflow.keras.models import load_model
import pickle
import pretty_midi

SEQUENCE_LENGTH = 50

def generate(model, seed_seq, genre_vector, int_to_note, vocab_size, length=100):
    generated = []
    input_seq = seed_seq[:]
    for _ in range(length):
        input_seq_padded = np.array(input_seq).reshape(1, SEQUENCE_LENGTH)
        prediction = model.predict([input_seq_padded, genre_vector], verbose=0)
        next_note = np.argmax(prediction)
        generated.append(next_note)
        input_seq = input_seq[1:] + [next_note]
    return generated

def notes_to_midi(note_sequence, output_file="output.mid"):
    midi = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=0)
    start = 0
    for note_num in note_sequence:
        note = pretty_midi.Note(velocity=100, pitch=note_num, start=start, end=start + 0.5)
        instrument.notes.append(note)
        start += 0.5
    midi.instruments.append(instrument)
    midi.write(output_file)

if __name__ == "__main__":
    with open("note_mappings.pkl", "rb") as f:
        note_to_int, int_to_note, genre_encoder = pickle.load(f)

    model = load_model("genre_music_lstm.h5")
    vocab_size = len(note_to_int)

    # Example usage:
    genre_name = "sad_midi"
    genre_vector = genre_encoder.transform([genre_name])
    genre_vector = np.eye(len(genre_encoder.classes_))[genre_vector]

    # Create a random seed
    seed = np.random.choice(list(note_to_int.values()), SEQUENCE_LENGTH).tolist()

    output_notes = generate(model, seed, genre_vector, int_to_note, vocab_size, length=100)
    output_notes_converted = [int_to_note[n] for n in output_notes]
    notes_to_midi(output_notes_converted, output_file=f"{genre_name}_generated.mid")


In [15]:
import os
import subprocess

def midi_to_wav(midi_path, output_path="output.wav", soundfont_path="example.sf2"):
    command = [
        "fluidsynth",
        "-ni", soundfont_path,
        midi_path,
        "-F", output_path,
        "-r", "44100"
    ]
    subprocess.run(command)

# Example
midi_to_wav("sad_midi_generated.mid", "sad_output.wav", "example.sf2")


FileNotFoundError: [WinError 2] The system cannot find the file specified