In [1]:
import pydotplus
import os, shutil
import matplotlib.pyplot as plt
import random
import glob
import numpy as np
import tensorflow as tf
from music21 import instrument, note, chord, stream
from PIL import Image
from numpy.random import rand, normal, randn, randint
from numpy import zeros, vstack, asarray, expand_dims, ones
from keras.models import Sequential, load_model
from keras.losses import CategoricalCrossentropy
from keras.optimizers import Adam
from keras import backend as K
from keras.layers import Dense, Reshape, BatchNormalization
from keras.layers import Conv2D, Conv2DTranspose
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LeakyReLU, ReLU
from keras.optimizers import Adam
from keras.utils.vis_utils import plot_model
from keras.preprocessing.image import ImageDataGenerator 

In [2]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [3]:
filelist = glob.glob(r'C:\Users\Harshinee Sriram\OneDrive\Desktop\UBC STUDY\CPSC 540\Project\DATASETS\Working\Train\music\*.png')
trainX = np.array([np.array(Image.open(this_img)) for this_img in filelist])
print("Size of Training X:", trainX.shape)
filelist = glob.glob(r'C:\Users\Harshinee Sriram\OneDrive\Desktop\UBC STUDY\CPSC 540\Project\DATASETS\Working\Validation\music\*.png')
testX = np.array([np.array(Image.open(this_img)) for this_img in filelist])
print("Size of Testing X:", testX.shape)
trainY = []
for i in range(866):
    trainY.append('1')
    
trainY = np.array(trainY)
print("Size of Training Y:", trainY.shape)

testY = []
for i in range(348):
    testY.append('1')
    
testY = np.array(testY)
print("Size of Testing Y:", testY.shape)

Size of Training X: (866, 106, 106)
Size of Testing X: (348, 106, 106)
Size of Training Y: (866,)
Size of Testing Y: (348,)


In [4]:
path = r'C:\Users\Harshinee Sriram\OneDrive\Desktop\UBC STUDY\CPSC 540\Project\DATASETS\Working\Train\music'
os.getcwd()
img_list = os.listdir(path)
def access_images(img_list,path,length):
    pixels = []
    imgs = []
    for i in range(length):
        if 'png' in img_list[i]:
            try:
                img = Image.open(path+'/'+img_list[i],'r')
                img = img.convert('1')
                pix = np.array(img.getdata())
                pix = pix.astype('float32')
                pix /= 255.0
                pixels.append(pix.reshape(106,106,1))
                imgs.append(img)
            except:
                pass
    return np.array(pixels),imgs
def show_image(pix_list):
    array = np.array(pix_list.reshape(106,106), dtype=np.uint8)
    new_image = Image.fromarray(array)
    new_image.show()
    
pixels,imgs = access_images(img_list,path,200)

def train_preprocessing():
    filelist = glob.glob(r'C:\Users\Harshinee Sriram\OneDrive\Desktop\UBC STUDY\CPSC 540\Project\DATASETS\Working\Train\music\*.png')
    trainX = np.array([np.array(Image.open(this_img)) for this_img in filelist])
    trainX = expand_dims(trainX, axis=-1)
    trainX = trainX.astype('float32')
    trainX = trainX/255.0
    return trainX

In [5]:
def define_discriminator(in_shape = (106,106,1)):
    model = Sequential()
    model.add(Conv2D(64, (3,3), strides=(2, 2), padding='same', input_shape=in_shape))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.5))
    model.add(Conv2D(64, (3,3), strides=(2, 2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(BatchNormalization())
    model.add(Dense(1, activation='sigmoid'))
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model

In [6]:
def define_generator(latent_dim):
    model = Sequential()
    n_nodes = 128 * 53 * 53
    model.add(Dense(n_nodes, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Reshape((53, 53, 128)))
    model.add(Dense(512))
    model.add(Conv2DTranspose(512, (4,4), strides=(2,2), padding='same'))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(512))
    model.add(Conv2D(1, (7,7) , padding='same',activation = 'sigmoid'))
    return model

In [7]:
def define_gan(g_model, d_model):
    d_model.trainable = False
    model = Sequential()
    model.add(g_model)
    model.add(d_model)
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    return model

In [8]:
def generate_real_samples(dataset, n_samples):
    ix = randint(0, dataset.shape[0], n_samples)
    X = dataset[ix]
    y = ones((n_samples, 1))
    return X, y
 
def generate_latent_points(latent_dim, n_samples):
    x_input = randn(latent_dim * n_samples)
    x_input = x_input.reshape(n_samples, latent_dim)
    return x_input
def generate_fake_samples(g_model, latent_dim, n_samples):
    x_input = generate_latent_points(latent_dim, n_samples)
    X = g_model.predict(x_input)
    y = zeros((n_samples, 1))
    return X, y

def performance(epoch, generator, discriminator, dataset, n_samples=50):
    X_real, y_real = generate_real_samples(dataset, n_samples)
    _, acc_real = discriminator.evaluate(X_real, y_real, verbose=0)
    x_fake, y_fake = generate_fake_samples(generator, 100, n_samples)
    _, acc_fake = discriminator.evaluate(x_fake, y_fake, verbose=0)
    print('>Accuracy real: %.0f%%, fake: %.0f%%' % (acc_real*100, acc_fake*100))

In [9]:
def train(g_model, d_model, gan_model, dataset, latent_dim, n_epochs=50, n_batch=10):
    bat_per_epo = int(dataset.shape[0] / n_batch)
    half_batch = int(n_batch / 2)
    for i in range(n_epochs):
        for j in range(bat_per_epo):
            X_real, y_real = generate_real_samples(dataset, half_batch)
            X_fake, y_fake = generate_fake_samples(g_model, latent_dim, half_batch)
            X, y = vstack((X_real, X_fake)), vstack((y_real, y_fake))
            d_loss, _ = d_model.train_on_batch(X, y)
            X_gan = generate_latent_points(latent_dim, n_batch)
            y_gan = ones((n_batch, 1))
            g_loss = gan_model.train_on_batch(X_gan, y_gan)
            #print('>%d, %d/%d, d=%.3f, g=%.3f' % (i+1, j+1, bat_per_epo, d_loss, g_loss))
        if (i+1) % 10 == 0:
            performance(i, g_model, d_model, dataset)
            #clear_output()

In [10]:
latent_dim = 100
d_model = define_discriminator()
g_model = define_generator(latent_dim)
gan_model = define_gan(g_model, d_model)
print(pixels.shape)
train(g_model, d_model, gan_model, np.array(pixels), latent_dim, n_epochs=100)

(200, 106, 106, 1)
>Accuracy real: 100%, fake: 0%
>Accuracy real: 100%, fake: 34%
>Accuracy real: 64%, fake: 94%
>Accuracy real: 80%, fake: 100%
>Accuracy real: 94%, fake: 100%
>Accuracy real: 88%, fake: 100%
>Accuracy real: 90%, fake: 96%
>Accuracy real: 98%, fake: 100%
>Accuracy real: 84%, fake: 100%
>Accuracy real: 96%, fake: 100%


In [11]:
os.chdir(r'C:\Users\Harshinee Sriram\OneDrive\Desktop\UBC STUDY\CPSC 540\Project\Jupyter Notebooks\Output\Sophisticated Model\100 Epochs')
model = g_model
for i in range(5):
    latent_points = generate_latent_points(100, 1)
    X = g_model.predict(latent_points)
    array = np.array(X.reshape(106,106),dtype = np.uint8)
    array*= 255
    new_image = Image.fromarray(array,'L')
    new_image = new_image.save('composition_' + str(i) + '.png')

In [12]:
Image.MAX_IMAGE_PIXELS = None
lowerBoundNote = 21
def column2notes(column):
    notes = []
    for i in range(len(column)):
        if column[i] > 255/2:
            notes.append(i+lowerBoundNote)
    return notes

resolution = 0.25
def updateNotes(newNotes,prevNotes): 
    res = {} 
    for note in newNotes:
        if note in prevNotes:
            res[note] = prevNotes[note] + resolution
        else:
            res[note] = resolution
    return res

def image2midi(image_path):
    with Image.open(image_path) as image:
        im_arr = np.frombuffer(image.tobytes(), dtype=np.uint8)
        print(im_arr.shape)
        try:
            im_arr = im_arr.reshape((106, 106))
        except:
            im_arr = im_arr.reshape((106, 106,1))
            im_arr = np.dot(im_arr, [0.33, 0.33, 0.33])

    offset = 0
    output_notes = []
    
    prev_notes = updateNotes(im_arr.T[0,:],{})
    for column in im_arr.T[1:,:]:
        notes = column2notes(column)
        # pattern is a chord
        notes_in_chord = notes
        old_notes = prev_notes.keys()
        for old_note in old_notes:
            if not old_note in notes_in_chord:
                new_note = note.Note(old_note,quarterLength=prev_notes[old_note])
                new_note.storedInstrument = instrument.Piano()
                if offset - prev_notes[old_note] >= 0:
                    new_note.offset = offset - prev_notes[old_note]
                    output_notes.append(new_note)
                elif offset == 0:
                    new_note.offset = offset
                    output_notes.append(new_note)                    
                else:
                    print(offset,prev_notes[old_note],old_note)

        prev_notes = updateNotes(notes_in_chord,prev_notes)

        offset += resolution

    for old_note in prev_notes.keys():
        new_note = note.Note(old_note,quarterLength=prev_notes[old_note])
        new_note.storedInstrument = instrument.Piano()
        new_note.offset = offset - prev_notes[old_note]

        output_notes.append(new_note)

    prev_notes = updateNotes(notes_in_chord,prev_notes)

    midi_stream = stream.Stream(output_notes)

    midi_stream.write('midi', fp=image_path.split("/")[-1].replace(".png",".mid"))

for file in os.listdir(os.getcwd()):
    image_path = os.getcwd() + '\\' + file
    image2midi(image_path)

(11236,)
(11236,)
(11236,)
(11236,)
(11236,)
