In [3]:
!pip install music21

Collecting music21
  Using cached music21-7.3.3-py3-none-any.whl (22.4 MB)
Collecting more-itertools
  Using cached more_itertools-9.1.0-py3-none-any.whl (54 kB)
Collecting jsonpickle
  Using cached jsonpickle-3.0.1-py2.py3-none-any.whl (40 kB)
Collecting webcolors>=1.5
  Using cached webcolors-1.13-py3-none-any.whl (14 kB)
Collecting chardet
  Using cached chardet-5.1.0-py3-none-any.whl (199 kB)
Installing collected packages: webcolors, more-itertools, chardet, jsonpickle, music21
Successfully installed chardet-5.1.0 jsonpickle-3.0.1 more-itertools-9.1.0 music21-7.3.3 webcolors-1.13
[0m

In [4]:
import tensorflow as tf
from PIL import Image
import numpy as np
from music21 import instrument, note, chord, stream, converter
import sys
import os
from imageio import imwrite
os.mkdir("generated_images")

In [5]:

class MIDIConverter:

    def __init__(self):

        self.lowerBoundNote = 21
        self.resolution = 0.25

    def column2notes(self, column):
        notes = []
        for i in range(len(column)):
            if column[i] > 255/2:
                notes.append(i+self.lowerBoundNote)
        return notes

    def updateNotes(self, newNotes, prevNotes):
        res = {}
        for note in newNotes:
            if note in prevNotes:
                res[note] = prevNotes[note] + self.resolution
            else:
                res[note] = self.resolution
        return res

    def image2midi(self, image_path):
        with Image.open(image_path) as image:
            im_arr = np.frombuffer(image.tobytes(), dtype=np.uint8)
            try:
                im_arr = im_arr.reshape((image.size[1], image.size[0]))
            except:
                im_arr = im_arr.reshape((image.size[1], image.size[0], 3))
                im_arr = np.dot(im_arr, [0.33, 0.33, 0.33])

        """ convert the output from the prediction to notes and create a midi file
          from the notes """
        offset = 0
        output_notes = []

        # create note and chord objects based on the values generated by the model

        prev_notes = self.updateNotes(im_arr.T[0, :], {})
        for column in im_arr.T[1:, :]:
            notes = self.column2notes(column)
            # pattern is a chord
            notes_in_chord = notes
            old_notes = prev_notes.keys()
            for old_note in old_notes:
                if not old_note in notes_in_chord:
                    new_note = note.Note(
                        old_note, quarterLength=prev_notes[old_note])
                    new_note.storedInstrument = instrument.Piano()
                    if offset - prev_notes[old_note] >= 0:
                        new_note.offset = offset - prev_notes[old_note]
                        output_notes.append(new_note)
                    elif offset == 0:
                        new_note.offset = offset
                        output_notes.append(new_note)
                    else:
                        print(offset, prev_notes[old_note], old_note)

            prev_notes = self.updateNotes(notes_in_chord, prev_notes)

            # increase offset each iteration so that notes do not stack
            offset += self.resolution

        for old_note in prev_notes.keys():
            new_note = note.Note(old_note, quarterLength=prev_notes[old_note])
            new_note.storedInstrument = instrument.Piano()
            new_note.offset = offset - prev_notes[old_note]

            output_notes.append(new_note)

        prev_notes = self.updateNotes(notes_in_chord, prev_notes)

        midi_stream = stream.Stream(output_notes)

        midi_stream.write('midi', fp=image_path.split("/")
                          [-1].replace(".png", ".mid"))

    def extractNote(self, element):
        return int(element.pitch.ps)

    def extractDuration(self, element):
        return element.duration.quarterLength

    def get_notes(self, notes_to_parse):
        """ Get all the notes and chords from the midi files in the ./midi_songs directory """
        durations = []
        notes = []
        start = []

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                if element.isRest:
                    continue

                start.append(element.offset)
                notes.append(self.extractNote(element))
                durations.append(self.extractDuration(element))

            elif isinstance(element, chord.Chord):
                if element.isRest:
                    continue
                for chord_note in element:
                    start.append(element.offset)
                    durations.append(self.extractDuration(element))
                    notes.append(self.extractNote(chord_note))

        return {"start": start, "pitch": notes, "dur": durations}

    def midi2image(self, midi_path, max_repetitions=float("inf"), resolution=0.25, lowerBoundNote=21, upperBoundNote=127, maxSongLength=100):
        mid = converter.parse(midi_path)

        instruments = instrument.partitionByInstrument(mid)

        data = {}

        try:
            i = 0
            for instrument_i in instruments.parts:
                notes_to_parse = instrument_i.recurse()

                notes_data = self.get_notes(notes_to_parse)
                if len(notes_data["start"]) == 0:
                    continue

                if instrument_i.partName is None:
                    data["instrument_{}".format(i)] = notes_data
                    i += 1
                else:
                    data[instrument_i.partName] = notes_data

        except:
            notes_to_parse = mid.flat.notes
            data["instrument_0"] = self.get_notes(notes_to_parse)

        for instrument_name, values in data.items():
            # https://en.wikipedia.org/wiki/Scientific_pitch_notation#Similar_systems

            pitches = values["pitch"]
            durs = values["dur"]
            starts = values["start"]

            index = 0
            while index < max_repetitions:
                matrix = np.zeros(
                    (upperBoundNote-lowerBoundNote, maxSongLength))

                for dur, start, pitch in zip(durs, starts, pitches):
                    dur = int(dur/resolution)
                    start = int(start/resolution)

                    if not start > index*(maxSongLength+1) or not dur+start < index*maxSongLength:
                        for j in range(start, start+dur):
                            if j - index*maxSongLength >= 0 and j - index*maxSongLength < maxSongLength:
                                matrix[pitch-lowerBoundNote, j -
                                       index*maxSongLength] = 255

                if matrix.any():  # If matrix contains no notes (only zeros) don't save it
                    # new_path = midi_path.split("/")[-1].replace(".mid",f"_{instrument_name}_{index}.png"
                    file_name = midi_path.split(
                        "/")[-1].replace(".mid", f"_{instrument_name}_{index}.png")
                    new_path = "generated_images/" + file_name
                    imwrite(new_path, matrix.astype(np.uint8))
                    index += 1
                else:
                    break


In [6]:
# Generating Dataset


# os.makedirs("generated_images")
cvt = MIDIConverter()
parent_path = "/kaggle/input/classical-music-midi/mozart"

all_files = []

for root, dirnames, filenames in os.walk(parent_path):
    for j in filenames:
        all_files.append(root + "/" + j)
print(all_files)


for fname in all_files:
    cvt.midi2image(fname)


['/kaggle/input/classical-music-midi/mozart/mz_330_2.mid', '/kaggle/input/classical-music-midi/mozart/mz_311_3.mid', '/kaggle/input/classical-music-midi/mozart/mz_545_3.mid', '/kaggle/input/classical-music-midi/mozart/mz_332_3.mid', '/kaggle/input/classical-music-midi/mozart/mz_570_2.mid', '/kaggle/input/classical-music-midi/mozart/mz_333_3.mid', '/kaggle/input/classical-music-midi/mozart/mz_311_2.mid', '/kaggle/input/classical-music-midi/mozart/mz_331_3.mid', '/kaggle/input/classical-music-midi/mozart/mz_330_1.mid', '/kaggle/input/classical-music-midi/mozart/mz_570_1.mid', '/kaggle/input/classical-music-midi/mozart/mz_311_1.mid', '/kaggle/input/classical-music-midi/mozart/mz_570_3.mid', '/kaggle/input/classical-music-midi/mozart/mz_545_2.mid', '/kaggle/input/classical-music-midi/mozart/mz_332_2.mid', '/kaggle/input/classical-music-midi/mozart/mz_330_3.mid', '/kaggle/input/classical-music-midi/mozart/mz_333_2.mid', '/kaggle/input/classical-music-midi/mozart/mz_545_1.mid', '/kaggle/inpu



In [7]:
path = '/kaggle/working/generated_images'
os.getcwd()
img_list = os.listdir(path)


def access_images(img_list, path, length):
    pixels = []
    imgs = []
    for i in range(length):
        if 'png' in img_list[i]:
            # try:
            img = Image.open(path+'/'+img_list[i], 'r')

            img = img.convert('1')
            pix = np.array(img.getdata())
            pix = pix.astype('float32')
            pix /= 255.0
            # pad with zeroes
            pix = np.pad(
                pix, (0, 106 * 106 - pix.shape[0]), 'constant', constant_values=(0))

            pixels.append(pix.reshape(106, 106, 1))
            imgs.append(img)
            # except:
            #     pass
    return np.array(pixels), imgs


def show_image(pix_list):
    array = np.array(pix_list.reshape(106, 106), dtype=np.uint8)
    new_image = Image.fromarray(array)
    new_image.show()


pixels, imgs = access_images(img_list, path, 200)


In [8]:
# import time

# for i in range(200):
#     if (np.sum(pixels[i]) > 0):
#         print(np.sum(pixels[i]))
#         show_image(pixels[i])
#         time.sleep(1)


In [9]:
from numpy import zeros
from numpy import ones
from numpy import vstack
from numpy.random import randn
from numpy.random import randint
from keras.datasets.mnist import load_data
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Reshape
from keras.layers import Flatten, BatchNormalization
from keras.layers import Conv2D
from keras.layers import Conv2DTranspose
from keras.layers import LeakyReLU
from keras.layers import Dropout
from matplotlib import pyplot
from IPython.display import clear_output


In [10]:
def define_discriminator(in_shape=(106, 106, 1)):
    model = Sequential()
    model.add(Conv2D(64, (3, 3), strides=(2, 2), padding='same', input_shape=in_shape))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.5))
    model.add(Conv2D(64, (3, 3), strides=(2, 2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(BatchNormalization())
    model.add(Dense(1, activation='sigmoid'))
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy',optimizer=opt, metrics=['accuracy'])
    return model


def define_generator(latent_dim):
    model = Sequential()
    n_nodes = 128 * 53 * 53
    model.add(Dense(n_nodes, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Reshape((53, 53, 128)))
    model.add(Dense(1024))
    model.add(Conv2DTranspose(1024, (4, 4), strides=(2, 2), padding='same'))
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1024))
    model.add(Conv2D(1, (7, 7), padding='same', activation='sigmoid'))
    return model


def define_gan(g_model, d_model):
    d_model.trainable = False
    model = Sequential()
    model.add(g_model)
    model.add(d_model)
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    return model


In [11]:
def generate_real_samples(dataset, n_samples):
    ix = randint(0, dataset.shape[0], n_samples)
    X = dataset[ix]
    y = ones((n_samples, 1))
    return X, y


def generate_latent_points(latent_dim, n_samples):
    x_input = randn(latent_dim * n_samples)
    x_input = x_input.reshape(n_samples, latent_dim)
    return x_input


def generate_fake_samples(g_model, latent_dim, n_samples):
    x_input = generate_latent_points(latent_dim, n_samples)
    X = g_model.predict(x_input)
    y = zeros((n_samples, 1))
    return X, y


In [12]:
def summarize_performance(epoch, g_model, d_model, dataset, latent_dim, n_samples=100):
    X_real, y_real = generate_real_samples(dataset, n_samples)
    _, acc_real = d_model.evaluate(X_real, y_real, verbose=0)
    x_fake, y_fake = generate_fake_samples(g_model, latent_dim, n_samples)
    _, acc_fake = d_model.evaluate(x_fake, y_fake, verbose=0)
    print('>Accuracy real: %.0f%%, fake: %.0f%%' %
          (acc_real*100, acc_fake*100))
    filename = 'generator_model_%03d.h5' % (epoch + 1)
    g_model.save(filename)


def train(g_model, d_model, gan_model, dataset, latent_dim, n_epochs=35, n_batch=10):
    bat_per_epo = int(dataset.shape[0] / n_batch)
    half_batch = int(n_batch / 2)
    for i in range(n_epochs):
        for j in range(bat_per_epo):
            X_real, y_real = generate_real_samples(dataset, half_batch)
            X_fake, y_fake = generate_fake_samples(g_model, latent_dim, half_batch)
            X, y = vstack((X_real, X_fake)), vstack((y_real, y_fake))
            d_loss, _ = d_model.train_on_batch(X, y)
            X_gan = generate_latent_points(latent_dim, n_batch)
            y_gan = ones((n_batch, 1))
            g_loss = gan_model.train_on_batch(X_gan, y_gan)
            print('>%d, %d/%d, d=%.3f, g=%.3f' %
                  (i+1, j+1, bat_per_epo, d_loss, g_loss))
        if (i+1) % 10 == 0:
            summarize_performance(i, g_model, d_model, dataset, latent_dim)
            clear_output()


In [None]:
latent_dim = 100
d_model = define_discriminator()
g_model = define_generator(latent_dim)
gan_model = define_gan(g_model, d_model)
print(pixels.shape)
train(g_model, d_model, gan_model, np.array(pixels), latent_dim)


  super().__init__(name, **kwargs)


(200, 106, 106, 1)


2023-04-28 14:05:18.958044: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2023-04-28 14:05:19.827430: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_2/sequential/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


>1, 1/20, d=0.708, g=0.866
>1, 2/20, d=0.340, g=1.098
>1, 3/20, d=0.106, g=1.168
>1, 4/20, d=0.028, g=1.215
>1, 5/20, d=0.041, g=1.063
>1, 6/20, d=0.149, g=0.882
>1, 7/20, d=0.682, g=0.775
>1, 8/20, d=0.743, g=0.733
>1, 9/20, d=0.684, g=0.704
>1, 10/20, d=0.693, g=0.686
>1, 11/20, d=0.643, g=0.669
>1, 12/20, d=0.533, g=0.662
>1, 13/20, d=0.503, g=0.665
>1, 14/20, d=0.456, g=0.674
>1, 15/20, d=0.341, g=0.680
>1, 16/20, d=0.381, g=0.681
>1, 17/20, d=0.242, g=0.681
>1, 18/20, d=0.248, g=0.677
>1, 19/20, d=0.204, g=0.677
>1, 20/20, d=0.225, g=0.679
>2, 1/20, d=0.218, g=0.675
>2, 2/20, d=0.212, g=0.673
>2, 3/20, d=0.307, g=0.660
>2, 4/20, d=0.219, g=0.646
>2, 5/20, d=0.145, g=0.645
>2, 6/20, d=0.119, g=0.637
>2, 7/20, d=0.292, g=0.616
>2, 8/20, d=0.092, g=0.610
>2, 9/20, d=0.116, g=0.619
>2, 10/20, d=0.070, g=0.632
>2, 11/20, d=0.076, g=0.642
>2, 12/20, d=0.057, g=0.651
>2, 13/20, d=0.077, g=0.646
>2, 14/20, d=0.069, g=0.642
>2, 15/20, d=0.062, g=0.637
>2, 16/20, d=0.051, g=0.640
>2, 17/20,

In [None]:
from keras.models import load_model
from numpy.random import randn
from matplotlib import pyplot


def generate_latent_points(latent_dim, n_samples):
    x_input = randn(latent_dim * n_samples)
    x_input = x_input.reshape(n_samples, latent_dim)
    return x_input


model = g_model
latent_points = generate_latent_points(latent_dim, 1)
X = g_model.predict(latent_points)
array = np.array(X.reshape(106, 106), dtype=np.uint8)
array *= 255
new_image = Image.fromarray(array, 'L')
new_image = new_image.save('composition.png')


In [None]:
cvt.image2midi('composition.png')
