In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

## Enhanced LSTM Model for Music Generation
Building upon the foundations of recurrent neural networks, this model leverages the power of bidirectional LSTMs and regularization techniques to create a more robust and capable system for music generation tasks.

## Enhanced Sampling for Generation
Implementing Temperature Sampling will allow control of the randomness of predictions. A higher temperature results in more random outputs, and a lower temperature makes the model's outputs more deterministic.


### Data Preparation

In [None]:
import os
import numpy as np
from music21 import converter, instrument, note, chord

def read_midi(file):
    print("Loading Music File:", file)
    notes = []

    midi = converter.parse(file)
    parts = instrument.partitionByInstrument(midi)
    relevant_parts = parts.parts if parts else [midi]

    for part in relevant_parts:
        if 'Violin' in str(part.getInstrument()) or 'Violin' in str(part.partName):
            for element in part.recurse():
                if isinstance(element, note.Note):
                    notes.append((str(element.pitch), element.duration.quarterLength, element.offset))
                elif isinstance(element, chord.Chord):
                    notes.append(('.'.join(str(n) for n in element.normalOrder), element.duration.quarterLength, element.offset))
                elif isinstance(element, note.Rest):
                    notes.append(('rest', element.duration.quarterLength, element.offset))

    return notes


path = '/content/gdrive/MyDrive/Violin_Comp_Data/midi_150/'
files = [i for i in os.listdir(path) if i.endswith(".mid")]
notes_array = [read_midi(os.path.join(path, file)) for file in files]


### Encoding each unique note to an integer.

In [None]:
# Flatten
all_notes = [note for sequence in notes_array for note in sequence]

In [None]:
from fractions import Fraction

all_notes = [(pitch, float(duration) if isinstance(duration, Fraction) else duration,
              float(offset) if isinstance(offset, Fraction) else offset)
             for pitch, duration, offset in all_notes]


In [None]:
notes_array = [[(pitch, float(duration) if isinstance(duration, Fraction) else duration,
                 float(offset) if isinstance(offset, Fraction) else offset)
                for pitch, duration, offset in sequence]
               for sequence in notes_array]


In [None]:
note_to_int = {note: i for i, note in enumerate(sorted(set(all_notes)))}


In [None]:
input_sequences = []
output_notes = []
no_of_timesteps = 100

for notes in notes_array:
    for i in range(len(notes) - no_of_timesteps):
        input_seq = notes[i:i + no_of_timesteps]
        output_note = notes[i + no_of_timesteps]
        input_sequences.append([note_to_int[note] for note in input_seq])
        output_notes.append(note_to_int[output_note])

x_seq = np.array(input_sequences)
y_seq = np.array(output_notes)


### Initiating Train-Test Split & Reshaping Input for LSTM Model

In [None]:
from sklearn.model_selection import train_test_split

x_tr, x_val, y_tr, y_val = train_test_split(x_seq, y_seq, test_size=0.2, random_state=13)
x_tr = np.reshape(x_tr, (x_tr.shape[0], no_of_timesteps, 1))
x_val = np.reshape(x_val, (x_val.shape[0], no_of_timesteps, 1))


### Adjusting LSTM Model Complexity

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Bidirectional
from keras.regularizers import l2
from keras.optimizers import Adam

# Model Configuration
no_of_timesteps = 100
num_notes = len(note_to_int)

model = Sequential()

# First Bidirectional LSTM Layer
model.add(Bidirectional(LSTM(256, return_sequences=True, kernel_regularizer=l2(0.001)), input_shape=(no_of_timesteps, 1)))

# Second LSTM Layer
model.add(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.001)))

# Third Bidirectional LSTM Layer
model.add(Bidirectional(LSTM(64, kernel_regularizer=l2(0.001))))

# Dense Layer with Regularization
model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.3))

# Output Layer
model.add(Dense(num_notes, activation='softmax'))

# Optimizer Configuration
optimizer = Adam(learning_rate=0.001)

model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

model.summary()


### Model Checkpoint

In [None]:
from keras.callbacks import ModelCheckpoint

mc = ModelCheckpoint('best_model_lstm2.h5', monitor='val_loss', mode='min', save_best_only=True, verbose=1)

### Train Model

In [None]:
history = model.fit(x_tr, y_tr, epochs=50, batch_size=64, validation_data=(x_val, y_val), callbacks=[mc])

### Load Best Model

In [None]:
from keras.models import load_model
model = load_model('best_model_lstm2.h5')

### Temperature Sampling

In [None]:
def sample_with_temperature(probabilities, temperature=.96):
    if temperature <= 0:
        return np.argmax(probabilities)
    else:
        probabilities = np.asarray(probabilities).astype('float64')
        probabilities = np.log(probabilities + 1e-7) / temperature
        exp_probs = np.exp(probabilities)
        probabilities = exp_probs / np.sum(exp_probs)
        return np.random.choice(range(len(probabilities)), p=probabilities)


In [None]:
def generate_music(model, start_sequence, length=50, temperature=.96, lookback_length=100):
    prediction_output = []

    # Ensuring start_sequence is of length lookback_length
    if len(start_sequence) > lookback_length:
        start_sequence = start_sequence[-lookback_length:]
    elif len(start_sequence) < lookback_length:
        # Pad the sequence
        start_sequence = [('rest', 0, 0)] * (lookback_length - len(start_sequence)) + start_sequence

    start_sequence_formatted = np.array([note_to_int[note] for note in start_sequence])

    for note_index in range(length):
        prediction_input = np.reshape(start_sequence_formatted, (1, lookback_length, 1))
        prob = model.predict(prediction_input)[0]
        index = sample_with_temperature(prob, temperature)
        predicted_note = x_int_to_note[index]
        prediction_output.append(predicted_note)

        # Update start_sequence_formatted for the next prediction
        start_sequence_formatted = np.append(start_sequence_formatted, [index])[-lookback_length:]

    return prediction_output


# Create the inverse mapping from integers back to note tuples
x_int_to_note = dict((number, note) for note, number in note_to_int.items())


### Convert to MIDI & Generate Music

In [None]:
from music21 import pitch

def midi_number_to_note_name(midi_number):
    return pitch.Pitch(midi=midi_number).nameWithOctave


In [None]:
from music21 import stream, instrument, note, chord

def convert_to_midi(prediction_output):
    midi_stream = stream.Stream()
    midi_stream.append(instrument.Violin())

    offset = 0
    for i, note_info in enumerate(prediction_output):
        try:
            note_name = note_info[0]
            # Check if note_name is a MIDI number and convert it
            if note_name.isdigit():
                note_name = midi_number_to_note_name(int(note_name))

            # Create note or rest
            if note_name != 'rest':
                new_note = note.Note(note_name)
            else:
                new_note = note.Rest()

            new_note.duration.quarterLength = note_info[1]
            new_note.offset = offset
            new_note.storedInstrument = instrument.Violin()
            midi_stream.append(new_note)
            offset += new_note.duration.quarterLength

        except Exception as e:
            print(f"Error processing note at position {i}: {note_info}. Error: {e}")

    midi_stream.write('midi', fp='lstm_music2.mid')

# Randomly select a starting sequence from x_val
random_index = np.random.randint(0, len(x_val))
start_sequence = x_val[random_index]

# Since start_sequence is currently encoded as integers, decode it back to note information
start_sequence_decoded = [x_int_to_note[note] for note in start_sequence.flatten()]

# Generate music based on the starting sequence
prediction_output = generate_music(model, start_sequence_decoded)
convert_to_midi(prediction_output)

# MIDI to WAV Conversion using FluidSynth


In [None]:
!apt install -y fluidsynth
!pip install midi2audio

In [None]:
from midi2audio import FluidSynth

# Initialize FluidSynth with a sound font
fs = FluidSynth('/content/gdrive/MyDrive/Violin_Comp_Data/soundfonts/Acro_Violins.sf2')

# Convert MIDI to WAV
fs.midi_to_audio('lstm_music2.mid', 'lstm2_acro.wav')

## Evaluating Music Generation: Pitch and Rhythm Consistency
Together, pitch and rhythm consistency form two fundamental pillars of music that determine its overall quality and appeal. By evaluating these aspects, we can gauge the success of our music generation models in producing compositions that are not just technically sound but also musically coherent and enjoyable.



### Pitch Consistency

In [None]:
import librosa

def calculate_pitch_consistency(audio, sr):
    # Extract pitch
    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
    # Select the predominant pitch at each frame
    predominant_pitches = [pitches[magnitudes[:, t].argmax(), t] for t in range(pitches.shape[1])]
    predominant_pitches = np.array(predominant_pitches)

    # Calculate variance
    pitch_variance = np.var(predominant_pitches)
    return pitch_variance

# Baseline WaveNet
audio, sr = librosa.load('lstm2_acro.wav')
pitch_variance = calculate_pitch_consistency(audio, sr)
print("Second Iteration LSTM Pitch Variance:", pitch_variance)


### Rhythm Consistency

In [None]:
def calculate_rhythm_consistency(file_path):
    audio, sr = librosa.load(file_path, sr=None)

    # Track beats
    tempo, beats = librosa.beat.beat_track(y=audio, sr=sr)
    beat_times = librosa.frames_to_time(beats, sr=sr)

    # Calculate tempo variability
    inter_beat_intervals = np.diff(beat_times)
    tempo_variability = np.std(inter_beat_intervals)

    return tempo, tempo_variability

file_paths = ['lstm2_acro.wav']

# Calculate and display rhythm consistency for each file
for i, file_path in enumerate(file_paths):
    tempo, tempo_variability = calculate_rhythm_consistency(file_path)
    print(f"File {i + 1}:")
    print(f"Path: {file_path}")
    print(f"Tempo: {tempo}")
    print(f"Tempo Variability: {tempo_variability}")
    print("------")


## Analysis of Pitch and Rhythm Consistency Results
**Pitch Consistency:** The pitch variance in the Second Iteration LSTM model is 33,523.45, which is an improvement compared to the first iteration's 42,279.527. This decrease suggests that the model is getting better at maintaining a consistent pitch pattern, leading to potentially more coherent and harmonious music. However, the variance is still relatively high, indicating room for further improvement in capturing and generating stable pitch patterns.

**Rhythm Consistency:** The tempo increased in variability from the first LSTM model, indicating that the rhythm's timing may fluctuate more in this iteration.

## Conclusion
The second iteration LSTM model shows a promising improvement in pitch consistency but indicates a need to revisit rhythm stability. Because I suspect issues with the limited training data, I will use a pre-trained model with my input audio for the next iteration. But first, I will perform comparative model analysis to determine the best generated audio to use for the pre-trained model.








# Proceed to 'Visual_Analysis_Model_Comparison.ipynb'

``` bash
├── AI_Violinist_Intro.ipynb                <- Data capture/project overview
├── Model_1_WaveNet.ipynb                   <- Baseline/WaveNet Models
├── Model_2_LSTM.ipynb                      <- First LSTM Model
├── Model_3__Complex_LSTM.ipynb             <- Second LSTM Model
├── Visual_Analysis_Model_Comparison.ipynb  <- Model Evaluation
├── Pretrained_Model_Jukebox.ipynb          <- Generating Final Music
└── Failed_Models_Spectrograms.ipynb        <- Failed attempts

```
