In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

## Introducing LSTM-Based Music Generation Model
The model is tailored to understand and replicate the complexities of musical compositions.

- This model will be compiled with sparse categorical crossentropy as the loss function, which is appropriate for this multi-class classification problem.

- Adam optimizer will be used for efficient training.


### Data Preparation
Adjusting note representation to include note duration and time offsets.

In [None]:
import os
import numpy as np
from music21 import converter, instrument, note, chord

def read_midi(file):
    print("Loading Music File:", file)
    notes = []

    midi = converter.parse(file)
    parts = instrument.partitionByInstrument(midi)
    relevant_parts = parts.parts if parts else [midi]

    for part in relevant_parts:
        if 'Violin' in str(part.getInstrument()) or 'Violin' in str(part.partName):
            for element in part.recurse():
                if isinstance(element, note.Note):
                    notes.append((str(element.pitch), element.duration.quarterLength, element.offset))
                elif isinstance(element, chord.Chord):
                    notes.append(('.'.join(str(n) for n in element.normalOrder), element.duration.quarterLength, element.offset))
                elif isinstance(element, note.Rest):
                    notes.append(('rest', element.duration.quarterLength, element.offset))

    return notes


path = '/content/gdrive/MyDrive/Violin_Comp_Data/midi_150/'
files = [i for i in os.listdir(path) if i.endswith(".mid")]
notes_array = [read_midi(os.path.join(path, file)) for file in files]


### Encoding each unique note to an integer

In [None]:
# Flatten
all_notes = [note for sequence in notes_array for note in sequence]

# Mapping from notes to integers
note_to_int = {note: i for i, note in enumerate(sorted(set(all_notes)))}

# Encode sequences
input_sequences = []
output_notes = []
no_of_timesteps = 32

for notes in notes_array:
    for i in range(len(notes) - no_of_timesteps):
        input_seq = notes[i:i + no_of_timesteps]
        output_note = notes[i + no_of_timesteps]
        input_sequences.append([note_to_int[note] for note in input_seq])
        output_notes.append(note_to_int[output_note])

x_seq = np.array(input_sequences)
y_seq = np.array(output_notes)


### Initiating Train-Test Split & Reshaping Input for LSTM Model

In [None]:
from sklearn.model_selection import train_test_split

x_tr, x_val, y_tr, y_val = train_test_split(x_seq, y_seq, test_size=0.2, random_state=13)
x_tr = np.reshape(x_tr, (x_tr.shape[0], no_of_timesteps, 1))
x_val = np.reshape(x_val, (x_val.shape[0], no_of_timesteps, 1))


### LSTM Model Architecture
Trying a LSTM-based architecture for better sequence generation.

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

model = Sequential()
model.add(LSTM(256, input_shape=(no_of_timesteps, 1), return_sequences=True))
model.add(LSTM(256))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(len(note_to_int), activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

### Model Checkpoint

In [None]:
from keras.callbacks import ModelCheckpoint

mc = ModelCheckpoint('best_model_lstm1.h5', monitor='val_loss', mode='min', save_best_only=True, verbose=1)

### Train Model

In [None]:
history = model.fit(x_tr, y_tr, epochs=50, batch_size=128, validation_data=(x_val, y_val), callbacks=[mc])

### Load Best Model

In [None]:
from keras.models import load_model
model = load_model('best_model_lstm1.h5')

### Generate Music Predictions

In [None]:
def generate_music(model, start_sequence, length=50):
    prediction_output = []

    # Ensure start_sequence is correctly formatted
    start_sequence_formatted = np.array([note_to_int[note] for note in start_sequence])

    for note_index in range(length):
        prediction_input = np.reshape(start_sequence_formatted, (1, len(start_sequence_formatted), 1))
        prob = model.predict(prediction_input)[0]
        index = np.random.choice(range(len(prob)), p=prob)
        predicted_note = x_int_to_note[index]  # Use the inverse mapping
        prediction_output.append(predicted_note)

        # Update start_sequence_formatted for the next prediction
        start_sequence_formatted = np.append(start_sequence_formatted, [index])[1:]

    return prediction_output

# Create the inverse mapping from integers back to note tuples
x_int_to_note = dict((number, note) for note, number in note_to_int.items())

### Convert Back to MIDI

In [None]:
from music21 import stream, instrument, note, chord, midi

def midi_number_to_note_name(midi_number):
    # Converts a MIDI note number to a note name
    return midi.translate.pitchToNoteName(midi_number)

def convert_to_midi(prediction_output):
    midi_stream = stream.Stream()
    midi_stream.append(instrument.Violin())

    offset = 0
    for note_info in prediction_output:
        note_name = note_info[0]

        # Convert MIDI note numbers to note names
        if note_name.isdigit():
            note_name = midi_number_to_note_name(int(note_name))

        # Create note or rest
        if note_name != 'rest':
            new_note = note.Note(note_name)
        else:
            new_note = note.Rest()

        new_note.duration.quarterLength = note_info[1]
        new_note.offset = offset
        new_note.storedInstrument = instrument.Violin()
        midi_stream.append(new_note)
        offset += note_info[2]

    midi_stream.write('midi', fp='lstm_music1.mid')

In [None]:
from music21 import pitch

# Randomly select a starting sequence from x_val
random_index = np.random.randint(0, len(x_val))
start_sequence = x_val[random_index]

# Since start_sequence is currently encoded as integers, decode it back to note information
start_sequence_decoded = [x_int_to_note[note] for note in start_sequence.flatten()]

# Generate music based on the starting sequence
prediction_output = generate_music(model, start_sequence_decoded)
convert_to_midi(prediction_output)


# MIDI to WAV Conversion using FluidSynth


In [None]:
!apt install -y fluidsynth
!pip install midi2audio

In [None]:
from midi2audio import FluidSynth

# Initialize FluidSynth with a sound font
fs = FluidSynth('/content/gdrive/MyDrive/Violin_Comp_Data/soundfonts/Acro_Violins.sf2')

# Convert MIDI to WAV
fs.midi_to_audio('lstm_music1.mid', 'lstm1_acro.wav')

## Evaluating Music Generation: Pitch and Rhythm Consistency
Together, pitch and rhythm consistency form two fundamental pillars of music that determine its overall quality and appeal. By evaluating these aspects, we can gauge the success of our music generation models in producing compositions that are not just technically sound but also musically coherent and enjoyable.



### Pitch Consistency
Pitch consistency can be evaluated by extracting the pitch from the audio and then analyzing its stability and variance.

In [None]:
import librosa

def calculate_pitch_consistency(audio, sr):
    # Extract pitch
    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
    # Select the predominant pitch at each frame
    predominant_pitches = [pitches[magnitudes[:, t].argmax(), t] for t in range(pitches.shape[1])]
    predominant_pitches = np.array(predominant_pitches)

    # Calculate variance
    pitch_variance = np.var(predominant_pitches)
    return pitch_variance

# Baseline WaveNet
audio, sr = librosa.load('lstm1_acro.wav')
pitch_variance = calculate_pitch_consistency(audio, sr)
print("First Iteration LSTM Pitch Variance:", pitch_variance)


### Rhythm Consistency
Rhythm consistency can be evaluated by analyzing the beat and tempo of the generated audio.
- Extract Beat Information
- Analyze Tempo Stability Over Time

In [None]:
def calculate_rhythm_consistency(file_path):
    audio, sr = librosa.load(file_path, sr=None)

    # Track beats
    tempo, beats = librosa.beat.beat_track(y=audio, sr=sr)
    beat_times = librosa.frames_to_time(beats, sr=sr)

    # Calculate tempo variability
    inter_beat_intervals = np.diff(beat_times)
    tempo_variability = np.std(inter_beat_intervals)

    return tempo, tempo_variability

file_paths = ['lstm1_acro.wav']

# Calculate and display rhythm consistency for each file
for i, file_path in enumerate(file_paths):
    tempo, tempo_variability = calculate_rhythm_consistency(file_path)
    print(f"File {i + 1}:")
    print(f"Path: {file_path}")
    print(f"Tempo: {tempo}")
    print(f"Tempo Variability: {tempo_variability}")
    print("------")


## Analysis of Pitch and Rhythm Consistency Results
**Pitch Consistency:** The high pitch variance is an area of concern.

**Rhythm Consistency:** The results here are more promising. The consistency in tempo suggests the model is capturing the rhythmic aspect of the music well.

## Conclusion
While the rhythm consistency aspect of this model appears solid, the pitch consistency needs improvement. Next, I will increase the complexity of the LSTM model.

# Proceed to 'Model_3_Complex_LSTM.ipynb'
``` bash
├── AI_Violinist_Intro.ipynb                <- Data capture/project overview
├── Model_1_WaveNet.ipynb                   <- Baseline/WaveNet Models
├── Model_2_LSTM.ipynb                      <- First LSTM Model
├── Model_3__Complex_LSTM.ipynb             <- Second LSTM Model
├── Visual_Analysis_Model_Comparison.ipynb  <- Model Evaluation
├── Pretrained_Model_Jukebox.ipynb          <- Generating Final Music
└── Failed_Models_Spectrograms.ipynb        <- Failed attempts

```
