In [1]:
!pip install gdown
!pip install pretty_midi
!pip install miditok
!pip install midi-clip

!wget https://raw.githubusercontent.com/roostico/NesGen/refs/heads/main/utility.py

Collecting gdown
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Downloading gdown-5.2.0-py3-none-any.whl (18 kB)
Installing collected packages: gdown
Successfully installed gdown-5.2.0
Collecting pretty_midi
  Downloading pretty_midi-0.2.10.tar.gz (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m64.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting mido>=1.1.16 (from pretty_midi)
  Downloading mido-1.3.3-py3-none-any.whl.metadata (6.4 kB)
Downloading mido-1.3.3-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pretty_midi
  Building wheel for pretty_midi (setup.py) ... [?25ldone
[?25h  Created wheel for pretty_midi: filename=pretty_midi-0.2.10-py3-none-any.whl size=5592292 sha256=3b6096077d95bcf179134e9f0ca451491fd31597ef7154699

In [2]:
import os
import random
import shutil
from tqdm import tqdm
from pathlib import Path
import pretty_midi
import numpy as np
from miditok import REMI, TokenizerConfig
import json
import tensorflow as tf
from miditok.utils import split_files_for_training
from miditok.data_augmentation import augment_dataset
import random
from random import shuffle

import sys
import pickle
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Dense, Reshape, Dropout, LSTM, Bidirectional
from tensorflow.keras.layers import BatchNormalization, Activation, ZeroPadding2D
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
     

In [3]:
import tensorflow.keras.mixed_precision as mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# Data section

In [4]:
!wget https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip
!unzip "maestro-v3.0.0-midi.zip"
!rm "maestro-v3.0.0-midi.zip"

dataset_path = "/kaggle/working/maestro-v3.0.0"

  pid, fd = os.forkpty()


--2024-12-16 09:23:38--  https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 108.177.98.207, 173.194.203.207, 74.125.142.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|108.177.98.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 58416533 (56M) [application/octet-stream]
Saving to: 'maestro-v3.0.0-midi.zip'


2024-12-16 09:23:38 (119 MB/s) - 'maestro-v3.0.0-midi.zip' saved [58416533/58416533]

Archive:  maestro-v3.0.0-midi.zip
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_08_R1_2004_01-02_ORIG_MID--AUDIO_08_R1_2004_01_Track01_wav.midi  
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_09_R1_2004_05_ORIG_MID--AUDIO_09_R1_2004_06_Track06_wav.midi  
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_14_R1_2004_01-03_ORIG_MID--AUDIO_14_R1_2004_01_Track01_wav.midi  
  inflating: maestro-v3.0.0/2004/MIDI-Unprocessed_XP_01_R1_2004_

## Prepare the dataset

In [5]:
# Paths to the files of the dataset

midi_paths = list(Path(dataset_path).resolve().glob("**/*.mid")) + list(Path(dataset_path).resolve().glob("**/*.midi"))

midis_dir = "midis"
os.makedirs(midis_dir, exist_ok=True)


for i, midi_path in enumerate(midi_paths):
  new_midi_path = os.path.join(midis_dir, f"{i}.midi")
  shutil.move(str(midi_path), new_midi_path)


midis = list(Path("/kaggle/working/midis").resolve().glob("**/*.mid")) + list(Path("/kaggle/working/midis").resolve().glob("**/*.midi"))

def sample():
  return str(random.choice(midis))

In [6]:
BEAT_RES = {(0, 1): 12, (1, 2): 4, (2, 4): 2, (4, 8): 1}

TOKENIZER_PARAMS = {

    "pitch_range": (21, 109),
    "beat_res": BEAT_RES,
    "num_velocities": 12,
    "special_tokens": ["PAD", "BOS", "EOS"],
    "use_chords": True,
    "use_rests": True,
    "use_tempos": True,
    "num_tempos": 16,
    "tempo_range": (50, 200),  # (min_tempo, max_tempo),
}

config = TokenizerConfig(**TOKENIZER_PARAMS)

tokenizer = REMI(config)

In [7]:
vocab_size = 1000
tokenizer.train(vocab_size=vocab_size, files_paths=midis)






In [8]:
processed = [Path(f"{s}") for s in midis]
print(len(processed))

1276


In [9]:
valid_perc = 0.3

total_num_files = len(processed)
num_files_valid = round(total_num_files * valid_perc)
shuffle(processed)
midi_paths_valid = processed[:num_files_valid]
midi_paths_train = processed[num_files_valid:]

# Chunk MIDIs and perform data augmentation on each subset independently

for files_paths, subset_name in (
    (midi_paths_train, "train"),
    (midi_paths_valid, "valid"),
):
    print(files_paths[0])

    # Split the MIDIs into chunks of sizes approximately about 1024 tokens

    subset_chunks_dir = Path(f"Maestro_{subset_name}")

    split_files_for_training(
        files_paths=files_paths,
        tokenizer=tokenizer,
        save_dir=subset_chunks_dir,
        max_seq_len=1024,
        num_overlap_bars=2,
    )

    # Perform data augmentation

    augment_dataset(
        subset_chunks_dir,
        pitch_offsets=[-12, 12],
        velocity_offsets=[-4, 4],
        duration_offsets=[-0.5, 0.5],
    )

midi_paths_train = list(Path("Maestro_train").glob("**/*.mid")) + list(Path("Maestro_train").glob("**/*.midi"))
midi_paths_valid = list(Path("Maestro_valid").glob("**/*.mid")) + list(Path("Maestro_valid").glob("**/*.midi"))

/kaggle/working/midis/999.midi


Splitting music files (Maestro_train): 100%|██████████| 893/893 [00:13<00:00, 68.25it/s]
Performing data augmentation: 100%|██████████| 12338/12338 [00:27<00:00, 449.20it/s]


/kaggle/working/midis/458.midi


Splitting music files (Maestro_valid): 100%|██████████| 383/383 [00:05<00:00, 64.34it/s]
Performing data augmentation: 100%|██████████| 5644/5644 [00:12<00:00, 467.43it/s]


In [10]:
def midi_valid(midi) -> bool:

    if any(ts.numerator != 4 for ts in midi.time_signature_changes):

        return False  # time signature different from 4/*, 4 beats per bar

    return True



if os.path.exists("tokenized"):

  shutil.rmtree("tokenized")


for dir in ("train", "valid"):
    tokenizer.tokenize_dataset(        
    
        Path(f"/kaggle/working/Maestro_{dir}"),
        Path(f"/kaggle/working/tokenized_{dir}"),
        midi_valid,
    
    )

Tokenizing music files (working/tokenized_train): 100%|██████████| 75507/75507 [22:24<00:00, 56.16it/s]
Tokenizing music files (working/tokenized_valid): 100%|██████████| 34486/34486 [10:14<00:00, 56.09it/s]


In [11]:
def read_json(path: str) -> dict:

  with open(path, "r") as f:

    return json.load(f)

def read_json_files(json_file_paths):
    """Reads a list of JSON files and returns a list of objects.
    Args:
        json_file_paths: A list of file paths to JSON files.
    Returns:
        A list of objects, where each object represents the data from a JSON file.
        Returns an empty list if any error occurs during file processing.
    """

    objects = []

    for file_path in tqdm(json_file_paths):

        try:

            objects.append(read_json(file_path))

        except FileNotFoundError:

            print(f"Error: File not found - {file_path}")

            return [] # Return empty list on error

        except json.JSONDecodeError:

            print(f"Error decoding JSON in file: {file_path}")

            return [] # Return empty list on error

    return objects


In [12]:
tokenized_train = list(Path("tokenized_train").resolve().glob("**/*.json"))
data_objects_train = read_json_files(tokenized_train)

tokenized_valid = list(Path("tokenized_valid").resolve().glob("**/*.json"))
data_objects_valid = read_json_files(tokenized_valid)

if data_objects_train:
    print(f"\nSuccessfully read {len(data_objects_train)} training JSON files.")
else:
    print("Error reading JSON files.")

100%|██████████| 75507/75507 [00:12<00:00, 5930.90it/s]
100%|██████████| 34486/34486 [00:05<00:00, 6390.58it/s]


Successfully read 75507 training JSON files.





In [13]:
encoded_train = [np.array(song["ids"][0]) for song in data_objects_train]
encoded_valid = [np.array(song["ids"][0]) for song in data_objects_valid]

In [14]:
all_ids_train = np.concatenate(encoded_train)
all_ids_valid = np.concatenate(encoded_valid)

In [24]:
import datetime
today = datetime.datetime.today()
day = today.day
month = today.month
name = "tokenizer{:d}_{:02d}{:02d}.json".format(vocab_size, month, day)
tokenizer.save(name)
np.savetxt("ids_train_{:02d}{:02d}.txt".format(month, day), all_ids_train)
np.savetxt("ids_valid_{:02d}{:02d}.txt".format(month, day), all_ids_valid)


## ... or skip all the data preparation

In [None]:
!gdown 1SDRkoWwyuSl4udoCHdcitjLLm9d0kfxS # tokenizer_maestro0612.json
!gdown 1IQToXD9s8g4L-AlK-MY4qvGoLZ-p7bMw # ids_train
!gdown 1DWjViUKpW07LfbGimlhhhGdK7oQaJpj- # ids_valid

In [None]:
tokenizer = REMI(params="tokenizer_maestro0612.json")
all_ids_train = np.loadtxt("ids_train").astype(dtype=np.int32)
all_ids_valid = np.loadtxt("ids_valid").astype(dtype=np.int32)

## Tensorflow datasets

### Recommended: limit arrays

In [20]:
perc = 0.7
all_ids_train = all_ids_train[:int(perc * len(all_ids_train))]
all_ids_valid = all_ids_valid[:int(perc * len(all_ids_valid))]
print(f"Loaded {len(all_ids_train)} training ids")

Loaded 55321303 training ids


In [21]:
ids_dataset_train = tf.data.Dataset.from_tensor_slices(all_ids_train)
ids_dataset_valid = tf.data.Dataset.from_tensor_slices(all_ids_valid)

In [25]:
seq_length = 1024
vocab_size = len(tokenizer)
BATCH_SIZE = 128
BUFFER_SIZE = 10000

def normalize_and_split(sequence):
    # Convert to float32
    input_seq = tf.cast(sequence, tf.float32)
    normalized_seq = (input_seq - vocab_size / 2) / (vocab_size / 2)
    target = tf.ones_like(normalized_seq)  # Create target tensor with all 1s
    return normalized_seq, target

train_ds = (
    ids_dataset_train
    .batch(seq_length, drop_remainder=True)  # Create sequences of shape (seq_length,)
    .map(normalize_and_split)
    .map(lambda x, y: (tf.expand_dims(x, -1), y))  # Add channel dimension: (seq_length, 1)
    .batch(BATCH_SIZE, drop_remainder=True)  # Batch for training: (batch_size, seq_length, 1)
    .prefetch(tf.data.AUTOTUNE)
)

valid_ds = (
    ids_dataset_valid
    .batch(seq_length, drop_remainder=True)  # Create sequences of shape (seq_length,)
    .map(normalize_and_split)
    .map(lambda x, y: (tf.expand_dims(x, -1), y))  # Add channel dimension: (seq_length, 1)
    .batch(BATCH_SIZE, drop_remainder=True)  # Batch for training: (batch_size, seq_length, 1)
    .prefetch(tf.data.AUTOTUNE)
)

for real_seqs, targets in train_ds.take(1):
    print(f"Input Shape: {real_seqs.shape}, Input Type: {real_seqs.dtype}")
    print(f"Target Shape: {targets.shape}, Target Type: {targets.dtype}")
    

Input Shape: (128, 1024, 1), Input Type: <dtype: 'float32'>
Target Shape: (128, 1024), Target Type: <dtype: 'float32'>


# The model

In [27]:
def generator(latent_dim, seq_shape): 
    model = Sequential()
    model.add(Input(shape=(latent_dim, 1)))
    model.add(LSTM(512, input_shape=(latent_dim, 1), return_sequences=True))
    model.add(Bidirectional(LSTM(512)))
    model.add(Dense(256))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(1024))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(np.prod(seq_shape), activation='tanh'))
    model.add(Reshape(seq_shape))
    return model

def discriminator(seq_shape):
    model = Sequential()
    model.add(Input(shape=seq_shape))
    model.add(LSTM(512, input_shape=seq_shape, return_sequences=True))
    model.add(Bidirectional(LSTM(512, return_sequences=True)))
    model.add(Dense(512))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(Dense(512))
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model

### Smaller version

In [None]:
def generator(latent_dim, seq_shape): 
    model = Sequential()
    model.add(Input(shape=(latent_dim, 1)))
    model.add(LSTM(128, input_shape=(latent_dim, 1), return_sequences=True))  # Reduced units
    model.add(Bidirectional(LSTM(128)))  # Reduced units
    model.add(Dense(64))  # Reduced units
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(128))  # Reduced units
    model.add(LeakyReLU(negative_slope=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(np.prod(seq_shape), activation='tanh'))
    model.add(Reshape(seq_shape))
    return model


def discriminator(seq_shape):
    model = Sequential()
    model.add(Input(shape=seq_shape))
    model.add(LSTM(256, input_shape=seq_shape, return_sequences=True))  # Maintain timestep output
    model.add(Bidirectional(LSTM(256, return_sequences=True)))         # Maintain timestep output
    model.add(Dense(1, activation='sigmoid'))         # Predict for each timestep
    return model

In [28]:
class GAN():
  def __init__(self, vocab_size, seq_length, latent_dim = 1000):
    self.vocab_size = vocab_size
    self.seq_length = seq_length
    self.seq_shape = (self.seq_length, 1)
    self.latent_dim = latent_dim
    self.disc_loss = []
    self.gen_loss = []

    self.loss_fun = tf.keras.losses.BinaryCrossentropy()
    self.generator_opt = tf.keras.optimizers.Adam(1e-4)
    self.discriminator_opt = tf.keras.optimizers.Adam(1e-4)

    # Build and compile the discriminator
    self.discriminator = discriminator(self.seq_shape)
      
    # Build the generator
    self.generator = generator(self.latent_dim, self.seq_shape)

    # The generator takes noise as input and generates note sequences
    z = Input(shape=(self.latent_dim, 1))
    generated_seq = self.generator(z)

    validity = self.discriminator(generated_seq)

    # The combined model  (stacked generator and discriminator)
    # Trains the generator to fool the discriminator
    self.combined = Model(z, validity)
      
  def _discriminator_loss(self, real_output, fake_output):
    real_loss = self.loss_fun(tf.ones_like(real_output), real_output)
    fake_loss = self.loss_fun(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss
      
  def _generator_loss(self, fake_output):
    return self.loss_fun(tf.ones_like(fake_output), fake_output)
      
  def _train_step(self, real_batch_x, real_batch_y, batch_size, pbar):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        # GENERATOR -> FAKE BATCH
        noise = np.random.normal(0, 1, (batch_size, self.latent_dim, 1))
        fake_batch_x = self.generator(noise, training=True)  # Shape: (batch_size, seq_length, 1)

        real_output = self.discriminator(real_batch_x, training=True)
        fake_output = self.discriminator(fake_batch_x, training=True)

        gen_loss = self._generator_loss(fake_output)
        disc_loss = self._discriminator_loss(real_output, fake_output)

        pbar.set_description(
            f"D Loss: {disc_loss:.4f}, " +
            #f"D Accuracy: {100 * d_loss[1]:.2f}%, " +
            f"G Loss: {gen_loss:.4f}"
        )
        pbar.update(1)
        
    gradients_of_generator = gen_tape.gradient(gen_loss, self.generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, self.discriminator.trainable_variables)

    self.generator_opt.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables))
    self.discriminator_opt.apply_gradients(zip(gradients_of_discriminator, self.discriminator.trainable_variables))
      
  def train(self, epochs, batch_size, train_dataset, valid_dataset, sample_interval=50):
    print("\nStarting Training\n")
    iteration_count = len(train_dataset)

    for epoch in range(epochs):
        print("\nStart of epoch %d" % (epoch + 1,))
        pbar = tqdm(total=iteration_count)
        
        for (real_seqs, targets) in train_dataset:
            self._train_step(real_seqs, targets, batch_size, pbar)
        pbar.close()
    
    print("\nTraining Complete.\n")
        
  def save(self):
    # create Model directory if there isn't exist
    if not os.path.exists('Model/'):
      os.makedirs('Model/')

    # save discriminator and generator trained model
    self.discriminator.save('Model/discriminator.h5')
    self.generator.save('Model/generator.h5')
    print("The trained C-RNN-GAN model (generator and discriminator) have been saved in the Model folder.")


  def generate(self):
    """ Use random noise to generate music"""
    
    # random noise for network input
    noise = np.random.normal(0, 1, (BATCH_SIZE, self.latent_dim, 1))
    predictions = self.generator.predict(noise)

    # transfer sequence numbers to notes
    boundary = int(self.vocab_size / 2)
    pred_nums = [x * boundary + boundary for x in predictions[0]]
    return pred_nums


  def plot_loss(self):
    """ Plot and save discriminator and generator loss functions per epoch diagram"""
    plt.plot(self.disc_loss, c='red')
    plt.plot(self.gen_loss, c='blue')
    plt.title("GAN Loss per Epoch")
    plt.legend(['Discriminator', 'Generator'])
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.show()
    plt.savefig('Result/GAN_Loss_per_Epoch_final.png', transparent=True)
    plt.close()

model = GAN(vocab_size, seq_length)

  super().__init__(**kwargs)


### Hide prints

In [29]:
tf.get_logger().setLevel('ERROR')

In [31]:
EPOCHS = 10


model.train(EPOCHS, BATCH_SIZE, train_ds, valid_ds, sample_interval=1)


Starting Training


Start of epoch 1



  0%|          | 0/422 [00:00<?, ?it/s][A
D Loss: 1.3533, G Loss: 0.6920:   0%|          | 0/422 [00:01<?, ?it/s][A
D Loss: 1.3533, G Loss: 0.6920:   0%|          | 1/422 [00:01<09:39,  1.38s/it][A
D Loss: 1.3369, G Loss: 0.6902:   0%|          | 1/422 [00:05<09:39,  1.38s/it][A
D Loss: 1.3369, G Loss: 0.6902:   0%|          | 2/422 [00:05<20:56,  2.99s/it][A
D Loss: 1.3222, G Loss: 0.6878:   0%|          | 2/422 [00:09<20:56,  2.99s/it][A
D Loss: 1.3222, G Loss: 0.6878:   1%|          | 3/422 [00:09<24:25,  3.50s/it][A
D Loss: 1.3019, G Loss: 0.6841:   1%|          | 3/422 [00:13<24:25,  3.50s/it][A
D Loss: 1.3019, G Loss: 0.6841:   1%|          | 4/422 [00:13<25:57,  3.73s/it][A
D Loss: 1.2821, G Loss: 0.6782:   1%|          | 4/422 [00:17<25:57,  3.73s/it][A
D Loss: 1.2821, G Loss: 0.6782:   1%|          | 5/422 [00:17<26:49,  3.86s/it][A
D Loss: 1.2563, G Loss: 0.6710:   1%|          | 5/422 [00:21<26:49,  3.86s/it][A
D Loss: 1.2563, G Loss: 0.6710:   1%|▏         | 6/


Start of epoch 2



  0%|          | 0/422 [00:00<?, ?it/s][A
D Loss: 1.3817, G Loss: 0.6778:   0%|          | 0/422 [00:01<?, ?it/s][A
D Loss: 1.3817, G Loss: 0.6778:   0%|          | 1/422 [00:01<09:37,  1.37s/it][A
D Loss: 1.3820, G Loss: 0.6888:   0%|          | 1/422 [00:05<09:37,  1.37s/it][A
D Loss: 1.3820, G Loss: 0.6888:   0%|          | 2/422 [00:05<20:48,  2.97s/it][A
D Loss: 1.3821, G Loss: 0.6875:   0%|          | 2/422 [00:09<20:48,  2.97s/it][A
D Loss: 1.3821, G Loss: 0.6875:   1%|          | 3/422 [00:09<24:20,  3.49s/it][A
D Loss: 1.3816, G Loss: 0.6786:   1%|          | 3/422 [00:13<24:20,  3.49s/it][A
D Loss: 1.3816, G Loss: 0.6786:   1%|          | 4/422 [00:13<25:57,  3.73s/it][A
D Loss: 1.3825, G Loss: 0.6713:   1%|          | 4/422 [00:17<25:57,  3.73s/it][A
D Loss: 1.3825, G Loss: 0.6713:   1%|          | 5/422 [00:17<26:47,  3.86s/it][A
D Loss: 1.3822, G Loss: 0.6748:   1%|          | 5/422 [00:21<26:47,  3.86s/it][A
D Loss: 1.3822, G Loss: 0.6748:   1%|▏         | 6/


Start of epoch 3



  0%|          | 0/422 [00:00<?, ?it/s][A
D Loss: 1.3867, G Loss: 0.6995:   0%|          | 0/422 [00:01<?, ?it/s][A
D Loss: 1.3867, G Loss: 0.6995:   0%|          | 1/422 [00:01<09:28,  1.35s/it][A
D Loss: 1.3867, G Loss: 0.7066:   0%|          | 1/422 [00:05<09:28,  1.35s/it][A
D Loss: 1.3867, G Loss: 0.7066:   0%|          | 2/422 [00:05<20:52,  2.98s/it][A
D Loss: 1.3863, G Loss: 0.6768:   0%|          | 2/422 [00:09<20:52,  2.98s/it][A
D Loss: 1.3863, G Loss: 0.6768:   1%|          | 3/422 [00:09<24:25,  3.50s/it][A
D Loss: 1.3870, G Loss: 0.6716:   1%|          | 3/422 [00:13<24:25,  3.50s/it][A
D Loss: 1.3870, G Loss: 0.6716:   1%|          | 4/422 [00:13<26:00,  3.73s/it][A
D Loss: 1.3864, G Loss: 0.6927:   1%|          | 4/422 [00:17<26:00,  3.73s/it][A
D Loss: 1.3864, G Loss: 0.6927:   1%|          | 5/422 [00:17<26:56,  3.88s/it][A
D Loss: 1.3865, G Loss: 0.7059:   1%|          | 5/422 [00:21<26:56,  3.88s/it][A
D Loss: 1.3865, G Loss: 0.7059:   1%|▏         | 6/


Start of epoch 4



  0%|          | 0/422 [00:00<?, ?it/s][A
D Loss: 1.3833, G Loss: 0.6845:   0%|          | 0/422 [00:01<?, ?it/s][A
D Loss: 1.3833, G Loss: 0.6845:   0%|          | 1/422 [00:01<09:31,  1.36s/it][A
D Loss: 1.3835, G Loss: 0.6770:   0%|          | 1/422 [00:05<09:31,  1.36s/it][A
D Loss: 1.3835, G Loss: 0.6770:   0%|          | 2/422 [00:05<20:45,  2.97s/it][A
D Loss: 1.3833, G Loss: 0.6726:   0%|          | 2/422 [00:09<20:45,  2.97s/it][A
D Loss: 1.3833, G Loss: 0.6726:   1%|          | 3/422 [00:09<24:22,  3.49s/it][A
D Loss: 1.3828, G Loss: 0.6883:   1%|          | 3/422 [00:13<24:22,  3.49s/it][A
D Loss: 1.3828, G Loss: 0.6883:   1%|          | 4/422 [00:13<25:58,  3.73s/it][A
D Loss: 1.3829, G Loss: 0.7056:   1%|          | 4/422 [00:17<25:58,  3.73s/it][A
D Loss: 1.3829, G Loss: 0.7056:   1%|          | 5/422 [00:17<26:48,  3.86s/it][A
D Loss: 1.3829, G Loss: 0.7054:   1%|          | 5/422 [00:21<26:48,  3.86s/it][A
D Loss: 1.3829, G Loss: 0.7054:   1%|▏         | 6/


Start of epoch 5



  0%|          | 0/422 [00:00<?, ?it/s][A
D Loss: 1.3857, G Loss: 0.6986:   0%|          | 0/422 [00:01<?, ?it/s][A
D Loss: 1.3857, G Loss: 0.6986:   0%|          | 1/422 [00:01<09:29,  1.35s/it][A
D Loss: 1.3860, G Loss: 0.7108:   0%|          | 1/422 [00:05<09:29,  1.35s/it][A
D Loss: 1.3860, G Loss: 0.7108:   0%|          | 2/422 [00:05<20:43,  2.96s/it][A
D Loss: 1.3864, G Loss: 0.7010:   0%|          | 2/422 [00:09<20:43,  2.96s/it][A
D Loss: 1.3864, G Loss: 0.7010:   1%|          | 3/422 [00:09<24:20,  3.49s/it][A
D Loss: 1.3863, G Loss: 0.6827:   1%|          | 3/422 [00:13<24:20,  3.49s/it][A
D Loss: 1.3863, G Loss: 0.6827:   1%|          | 4/422 [00:13<25:59,  3.73s/it][A
D Loss: 1.3867, G Loss: 0.6678:   1%|          | 4/422 [00:17<25:59,  3.73s/it][A
D Loss: 1.3867, G Loss: 0.6678:   1%|          | 5/422 [00:17<26:49,  3.86s/it][A
D Loss: 1.3881, G Loss: 0.6604:   1%|          | 5/422 [00:21<26:49,  3.86s/it][A
D Loss: 1.3881, G Loss: 0.6604:   1%|▏         | 6/


Start of epoch 6



  0%|          | 0/422 [00:00<?, ?it/s][A
D Loss: 1.3797, G Loss: 0.7011:   0%|          | 0/422 [00:01<?, ?it/s][A
D Loss: 1.3797, G Loss: 0.7011:   0%|          | 1/422 [00:01<09:26,  1.35s/it][A
D Loss: 1.3808, G Loss: 0.6733:   0%|          | 1/422 [00:05<09:26,  1.35s/it][A
D Loss: 1.3808, G Loss: 0.6733:   0%|          | 2/422 [00:05<20:43,  2.96s/it][A
D Loss: 1.3804, G Loss: 0.6985:   0%|          | 2/422 [00:09<20:43,  2.96s/it][A
D Loss: 1.3804, G Loss: 0.6985:   1%|          | 3/422 [00:09<24:17,  3.48s/it][A
D Loss: 1.3804, G Loss: 0.7068:   1%|          | 3/422 [00:13<24:17,  3.48s/it][A
D Loss: 1.3804, G Loss: 0.7068:   1%|          | 4/422 [00:13<25:55,  3.72s/it][A
D Loss: 1.3807, G Loss: 0.6914:   1%|          | 4/422 [00:17<25:55,  3.72s/it][A
D Loss: 1.3807, G Loss: 0.6914:   1%|          | 5/422 [00:17<26:48,  3.86s/it][A
D Loss: 1.3803, G Loss: 0.6797:   1%|          | 5/422 [00:21<26:48,  3.86s/it][A
D Loss: 1.3803, G Loss: 0.6797:   1%|▏         | 6/


Start of epoch 7



  0%|          | 0/422 [00:00<?, ?it/s][A
D Loss: 1.3796, G Loss: 0.7310:   0%|          | 0/422 [00:01<?, ?it/s][A
D Loss: 1.3796, G Loss: 0.7310:   0%|          | 1/422 [00:01<09:31,  1.36s/it][A
D Loss: 1.3795, G Loss: 0.6666:   0%|          | 1/422 [00:05<09:31,  1.36s/it][A
D Loss: 1.3795, G Loss: 0.6666:   0%|          | 2/422 [00:05<20:47,  2.97s/it][A
D Loss: 1.3793, G Loss: 0.6999:   0%|          | 2/422 [00:09<20:47,  2.97s/it][A
D Loss: 1.3793, G Loss: 0.6999:   1%|          | 3/422 [00:09<24:23,  3.49s/it][A
D Loss: 1.3794, G Loss: 0.7168:   1%|          | 3/422 [00:13<24:23,  3.49s/it][A
D Loss: 1.3794, G Loss: 0.7168:   1%|          | 4/422 [00:13<26:02,  3.74s/it][A
D Loss: 1.3800, G Loss: 0.6634:   1%|          | 4/422 [00:17<26:02,  3.74s/it][A
D Loss: 1.3800, G Loss: 0.6634:   1%|          | 5/422 [00:17<26:51,  3.86s/it][A
D Loss: 1.3784, G Loss: 0.7067:   1%|          | 5/422 [00:21<26:51,  3.86s/it][A
D Loss: 1.3784, G Loss: 0.7067:   1%|▏         | 6/


Start of epoch 8



  0%|          | 0/422 [00:00<?, ?it/s][A
D Loss: 1.3884, G Loss: 0.6945:   0%|          | 0/422 [00:01<?, ?it/s][A
D Loss: 1.3884, G Loss: 0.6945:   0%|          | 1/422 [00:01<09:31,  1.36s/it][A
D Loss: 1.3883, G Loss: 0.6909:   0%|          | 1/422 [00:05<09:31,  1.36s/it][A
D Loss: 1.3883, G Loss: 0.6909:   0%|          | 2/422 [00:05<20:45,  2.97s/it][A
D Loss: 1.3891, G Loss: 0.6746:   0%|          | 2/422 [00:09<20:45,  2.97s/it][A
D Loss: 1.3891, G Loss: 0.6746:   1%|          | 3/422 [00:09<24:15,  3.47s/it][A
D Loss: 1.3887, G Loss: 0.6691:   1%|          | 3/422 [00:13<24:15,  3.47s/it][A
D Loss: 1.3887, G Loss: 0.6691:   1%|          | 4/422 [00:13<25:57,  3.72s/it][A
D Loss: 1.3878, G Loss: 0.6978:   1%|          | 4/422 [00:17<25:57,  3.72s/it][A
D Loss: 1.3878, G Loss: 0.6978:   1%|          | 5/422 [00:17<26:48,  3.86s/it][A
D Loss: 1.3885, G Loss: 0.7137:   1%|          | 5/422 [00:21<26:48,  3.86s/it][A
D Loss: 1.3885, G Loss: 0.7137:   1%|▏         | 6/


Start of epoch 9



  0%|          | 0/422 [00:00<?, ?it/s][A
D Loss: 1.3831, G Loss: 0.6989:   0%|          | 0/422 [00:01<?, ?it/s][A
D Loss: 1.3831, G Loss: 0.6989:   0%|          | 1/422 [00:01<09:33,  1.36s/it][A
D Loss: 1.3829, G Loss: 0.6927:   0%|          | 1/422 [00:05<09:33,  1.36s/it][A
D Loss: 1.3829, G Loss: 0.6927:   0%|          | 2/422 [00:05<20:45,  2.97s/it][A
D Loss: 1.3832, G Loss: 0.6847:   0%|          | 2/422 [00:09<20:45,  2.97s/it][A
D Loss: 1.3832, G Loss: 0.6847:   1%|          | 3/422 [00:09<24:20,  3.49s/it][A
D Loss: 1.3831, G Loss: 0.6841:   1%|          | 3/422 [00:13<24:20,  3.49s/it][A
D Loss: 1.3831, G Loss: 0.6841:   1%|          | 4/422 [00:13<26:00,  3.73s/it][A
D Loss: 1.3831, G Loss: 0.6904:   1%|          | 4/422 [00:17<26:00,  3.73s/it][A
D Loss: 1.3831, G Loss: 0.6904:   1%|          | 5/422 [00:17<26:50,  3.86s/it][A
D Loss: 1.3831, G Loss: 0.6931:   1%|          | 5/422 [00:21<26:50,  3.86s/it][A
D Loss: 1.3831, G Loss: 0.6931:   1%|▏         | 6/


Start of epoch 10



  0%|          | 0/422 [00:00<?, ?it/s][A
D Loss: 1.3796, G Loss: 0.7005:   0%|          | 0/422 [00:01<?, ?it/s][A
D Loss: 1.3796, G Loss: 0.7005:   0%|          | 1/422 [00:01<09:29,  1.35s/it][A
D Loss: 1.3804, G Loss: 0.6972:   0%|          | 1/422 [00:05<09:29,  1.35s/it][A
D Loss: 1.3804, G Loss: 0.6972:   0%|          | 2/422 [00:05<20:50,  2.98s/it][A
D Loss: 1.3803, G Loss: 0.6846:   0%|          | 2/422 [00:09<20:50,  2.98s/it][A
D Loss: 1.3803, G Loss: 0.6846:   1%|          | 3/422 [00:09<24:25,  3.50s/it][A
D Loss: 1.3802, G Loss: 0.6873:   1%|          | 3/422 [00:13<24:25,  3.50s/it][A
D Loss: 1.3802, G Loss: 0.6873:   1%|          | 4/422 [00:13<26:04,  3.74s/it][A
D Loss: 1.3803, G Loss: 0.6993:   1%|          | 4/422 [00:17<26:04,  3.74s/it][A
D Loss: 1.3803, G Loss: 0.6993:   1%|          | 5/422 [00:17<26:55,  3.87s/it][A
D Loss: 1.3802, G Loss: 0.6985:   1%|          | 5/422 [00:21<26:55,  3.87s/it][A
D Loss: 1.3802, G Loss: 0.6985:   1%|▏         | 6/


Training Complete.






In [32]:
model.save()

The trained C-RNN-GAN model (generator and discriminator) have been saved in the Model folder.


In [33]:
!zip -r model.zip Model/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  adding: Model/ (stored 0%)
  adding: Model/discriminator.h5 (deflated 7%)
  adding: Model/generator.h5 (deflated 8%)


In [50]:
generated_ids = np.concatenate(model.generate()).astype(np.int32)
print(generated_ids)
for index, id in enumerate(generated_ids):
    if id >= 1000:
        print(f"Found id {id}, setting to {len(tokenizer) - 1}")
        generated_ids[index] = len(tokenizer) - 1
decoded = tokenizer.decode([generated_ids])
decoded.dump_midi("generated.mid")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 149ms/step
[1000 1000 1000 ...  936  122  831]
Found id 1000, setting to 999
Found id 1000, setting to 999
Found id 1000, setting to 999
Found id 1000, setting to 999
Found id 1000, setting to 999
Found id 1000, setting to 999
