In [1689]:
%load_ext autoreload
%autoreload 2
from scipy.misc import imread, imresize
import numpy as np
import tensorflow as tf
from scipy.misc import imread
import matplotlib.pyplot as plt

# Helper functions to deal with image preprocessing
from src.gen_data import *

%matplotlib inline

def get_session():
    """Create a session that dynamically allocates memory."""
    # See: https://www.tensorflow.org/tutorials/using_gpu#allowing_gpu_memory_growth
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)
    return session

def rel_error(x,y):
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

# Older versions of scipy.misc.imresize yield different results
# from newer versions, so we check to make sure scipy is up to date.
def check_scipy():
    import scipy
    version = scipy.__version__.split('.')
    if int(version[0]) < 1:
        assert int(version[1]) >= 16, "You must install SciPy >= 0.16.0 to complete this notebook."

check_scipy()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [1690]:
def check_accuracy(sess, dset, x, scores, is_training=None):
    num_correct, num_samples = 0, 0
    for x_batch, y_batch in dset:
        feed_dict = {x: x_batch, is_training: 0}
        scores_np = sess.run(scores, feed_dict=feed_dict)
        y_pred = scores_np.argmax(axis=1)
        num_samples += x_batch.shape[0]
        num_correct += (y_pred == y_batch).sum()
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f%%)' % (num_correct, num_samples, 100 * acc))
    return acc

In [1691]:
#Need data and stats

X_train = pickle.load(open("data_train.txt",'rb'))
y_train = pickle.load(open("labels_train.txt", 'rb'))

X_val = pickle.load(open("data_val.txt", 'rb'))
y_val = pickle.load(open("labels_val.txt", 'rb'))
means = pickle.load(open("means.txt", 'rb'))
stds  = pickle.load(open("stds.txt", 'rb'))
#print(X_val[0])
extremes = pickle.load(open("extremes.txt", 'rb'))

H, W, C = X_train.shape[1:]

In [1692]:
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
# test_dset = Dataset(X_test, y_test, batch_size=64)

In [1693]:
hidden1, hidden2, channel_1, channel_2, channel_3, channel_4, channel_5, channel_6, num_classes = 80, 250, 32, 64, 128, 256, 350, 512, 5

class MuseConvNet(tf.keras.Model):
    def layer_norm(self, x, scope, is_training):
        """
        Returns a batch normalization layer that automatically switch between train and test phases based on the 
        tensor is_training

        Args:
            x: input tensor
            scope: scope name
            is_training: boolean tensor or variable
            epsilon: epsilon parameter - see batch_norm_layer
            decay: epsilon parameter - see batch_norm_layer

        Returns:
            The correct batch normalization layer based on the value of is_training
        """
        #assert isinstance(is_training, (ops.Tensor, variables.Variable)) and is_training.dtype == tf.bool

        return tf.cond(
            is_training,
            lambda: self.layer_norm_layer(x=x, scope=scope, is_training=True, reuse=None),
            lambda: self.layer_norm_layer(x=x, scope=scope, is_training=False, reuse=True),
        )


    def layer_norm_layer(self, x, scope, is_training, reuse=None):
        return tf.contrib.layers.layer_norm(x, reuse=reuse, scope=scope, trainable=is_training)

    def __init__(self):
        super().__init__()
        
        initializer = tf.variance_scaling_initializer(scale=2.0)
        
        self.conv1 = tf.layers.Conv2D(channel_1, (8, 7), 1, "same", activation=tf.nn.relu, 
                                      use_bias=True, kernel_initializer=initializer, name="conv1")
        self.conv2 = tf.layers.Conv2D(channel_2, (6, 8), 1, "same", activation=tf.nn.relu, 
                                      use_bias=True, kernel_initializer=initializer, name="conv2")
        
        self.conv3 = tf.layers.Conv2D(channel_3, (5, 5), 1, "same", activation=tf.nn.relu, 
                                      use_bias=True, kernel_initializer=initializer, name="conv3")
        
        self.conv4 = tf.layers.Conv2D(channel_4, (6, 4), 1, "same", activation=tf.nn.relu, 
                                      use_bias=True, kernel_initializer=initializer, name="conv4")
        
        self.conv5 = tf.layers.Conv2D(channel_5, (4, 4), 1, "same", activation=tf.nn.relu, 
                                      use_bias=True, kernel_initializer=initializer, name="conv5")
        

        self.conv6 = tf.layers.Conv2D(channel_6, (4, 4), 1, "same", activation=tf.nn.relu, 
                                      use_bias=True, kernel_initializer=initializer, name="conv6") 
        
        
       
        self.fc1 = tf.layers.Dense(hidden1, use_bias = True,
                                  kernel_initializer=initializer, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.5), name="affine1")
        
        self.fc2 = tf.layers.Dense(hidden2, use_bias = True,
                                  kernel_initializer=initializer, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.5), name="affine2")
        
        self.fc = tf.layers.Dense(num_classes, use_bias = True,
                                  kernel_initializer=initializer, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.5), name="affine3")
        
    
    # Define model
    
    def call(self, inputs, is_training):
        X = inputs
        feats = []
        X = self.conv1(X)
        feats.append(X)

        X = self.layer_norm(X, "batch1", is_training)
        
        X = self.conv2(X)
        feats.append(X)
        X = self.layer_norm(X, "batch2", is_training)
        
        
        X = self.conv3(X)
        feats.append(X)
        X = self.layer_norm(X, "batch3", is_training)
        
        
        
        X = self.conv4(X)
        feats.append(X)
        X = self.layer_norm(X, "batch4", is_training)
        
        
        
        X = self.conv5(X)
        feats.append(X)
        X = self.layer_norm(X, "batch5", is_training)
        
        
        X = self.conv6(X)
        feats.append(X)
        X = self.layer_norm(X, "batch6", is_training)
        
        """
        X = self.conv7(X)
        X = tf.layers.batch_normalization(X, training=is_training, name="batch7")
        """
        X = tf.layers.flatten(X)
        
        X = self.fc1(X)
        """
        X = self.fc2(X)
        """
        scores = self.fc(X)
        
        return scores, feats
    """    
    def extract_features(self,X):
        
        feats = []
        
        X = self.conv1(X)
        X = self.layer_norm_layer(x, "batch1", is_training=False, reuse=True)
        feats.append(X)
        
        X = self.conv2(X)
        X = self.layer_norm_layer(x, "batch2", is_training=False, reuse=True)
        feats.append(X)
        
        X = self.conv3(X)
        X = self.layer_norm_layer(x, "batch3", is_training=False, reuse=True)
        feats.append(X)
        
        X = self.conv4(X)
        X = self.layer_norm_layer(x, "batch4", is_training=False, reuse=True)
        feats.append(X)
        
        
        X = self.conv5(X)
        X = self.layer_norm_layer(x, "batch5", is_training=False, reuse=True)
        feats.append(X)
        
        X = self.conv6(X)
        X = self.layer_norm_layer(x, "batch6", is_training=False, reuse=True)
        feats.append(X)
        
        X = self.conv7(X)
        X = tf.layers.batch_normalization(X, training=False, name="batch7", reuse=True)
        feats.append(X)
        
        
        return  feats
    """

In [1694]:
import os

tf.reset_default_graph() # remove all existing variables in the graph 

x = tf.placeholder(tf.float32, [None, H, W, C])
y = tf.placeholder(tf.int32, [None])
        
is_training = tf.placeholder(tf.bool, name='is_training')

model = MuseConvNet()

scores, feats = model(x, is_training)

saver = tf.train.Saver(tf.global_variables())
sess = get_session() # start a new Session

# Load pretrained SqueezeNet model
SAVE_PATH = 'src/model_test/model.ckpt'
    
saver.restore(sess, SAVE_PATH)
#sess.run(tf.global_variables_initializer())
#s = sess.run(scores, feed_dict = {x: X_val[0:5], is_training: 0})
#print(s)
content_image = np.expand_dims(X_val[10], axis=0)
style_image = np.expand_dims(X_val[0], axis=0)

#conv1_kernel_val = gr.get_tensor_by_name('batch1.moving_mean:0').eval(session=sess)
#bn = tf.get_variable('batch1.moving_mean:0').eval(session=sess)

INFO:tensorflow:Restoring parameters from src/model_test/model.ckpt


In [1695]:
#print(conv1_kernel_val)

In [1696]:
def content_loss(content_weight, content_current, content_original):
    
    s = content_current.shape
    
    M = (tf.reshape(content_current, (s[1], -1)) - tf.reshape(content_original, (s[1], -1)))
    return content_weight * tf.reduce_sum(M ** 2)


In [1697]:
def gram_matrix(features, normalize=True):
    
    shape = tf.shape(features)
    H = shape[1]
    W = shape[2]
    C = shape[3]
    r = tf.reshape(features, (-1, C))
    gram = tf.matmul(tf.linalg.transpose(r),r)
    
    if normalize:
        gram /= tf.to_float(H*W*C)
    
    return gram


In [1698]:
def style_loss(feats, style_layers, style_targets, style_weights):
    
    loss = tf.fill([1], 0.0)
    for i, l in enumerate(style_layers):
        loss += style_weights[i] * tf.reduce_sum((gram_matrix(feats[l]) - style_targets[i]) ** 2)
        
    return loss

In [1699]:
def tv_loss(img, tv_weight):
    
    # Your implementation should be vectorized and not require any loops!
    horizontal = tf.reduce_sum((img[:, :, 1:, :] - img[:, :, :-1, :]) ** 2)
    vertical = tf.reduce_sum((img[:, 1:, :, :] - img[:, :-1, :, :]) ** 2)
    
    return tv_weight * (horizontal + vertical)

In [1700]:
from contextlib import redirect_stdout
def style_transfer(content_image, style_image, content_layer, content_weight,
                   style_layers, style_weights, tv_weight, init_random = False):
    """Run style transfer!
    
    Inputs:
    - content_image: filename of content image
    - style_image: filename of style image
    - image_size: size of smallest image dimension (used for content loss and generated image)
    - style_size: size of smallest style image dimension
    - content_layer: layer to use for content loss
    - content_weight: weighting on content loss
    - style_layers: list of layers to use for style loss
    - style_weights: list of weights to use for each layer in style_layers
    - tv_weight: weight of total variation regularization term
    - init_random: initialize the starting image to uniform random noise
    """
    # Extract features from the content image
    #model = saver.restore(sess, SAVE_PATH)
    content_img = content_image
    f = sess.run(feats,
                              {x: content_img, is_training: 0})
    content_target = f[content_layer]

    # Extract features from the style image
    style_img = style_image
    f1 = sess.run(feats, {x: style_img, is_training: 0})
    style_feat_vars = [f1[idx] for idx in style_layers]
    style_target_vars = []
    # Compute list of TensorFlow Gram matrices
    for style_feat_var in style_feat_vars:
        style_target_vars.append(gram_matrix(style_feat_var))
    style_targets = style_target_vars
    # Compute list of NumPy Gram matrices by evaluating the TensorFlow graph on the style image
    #style_targets = sess.run(style_target_vars, {x: style_img[None]})

    # Initialize generated image to content image
    
    if init_random:
        img_var = tf.Variable(tf.random_uniform(content_img.shape, -1.5, 1.5, dtype=tf.float32), name="image")
    else:
        img_var = tf.Variable(content_img, name="image", dtype=tf.float32)
    
    # Extract features on generated image
    fs = sess.run(feats, {x: content_img, is_training: 0})

    
    # Compute loss
    c_loss = content_loss(content_weight, fs[content_layer], content_target)
    s_loss = style_loss(feats, style_layers, style_targets, style_weights)
    t_loss = tv_loss(img_var, tv_weight)
    loss = c_loss + s_loss + t_loss
    
    # Set up optimization hyperparameters
    initial_lr = 1e-9
    decayed_lr = 0.075
    decay_lr_at = 2
    max_iter = 5

    # Create and initialize the Adam optimizer
    lr_var = tf.Variable(initial_lr, name="lr")
    # Create train_op that updates the generated image when run
    with tf.variable_scope("optimizer") as opt_scope:
        train_op = tf.train.AdamOptimizer(lr_var).minimize(loss, var_list=[img_var])
    # Initialize the generated image and optimization variables
    opt_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=opt_scope.name)
    sess.run(tf.variables_initializer([lr_var, img_var] + opt_vars))
    # Create an op that will clamp the image values when run
    clamp_image_op = tf.assign(img_var, tf.clip_by_value(img_var, -1.75, 1.75))
    """
    f, axarr = plt.subplots(1,2)
    axarr[0].axis('off')
    axarr[1].axis('off')
    axarr[0].set_title('Content Source Img.')
    axarr[1].set_title('Style Source Img.')
    axarr[0].imshow(deprocess_image(content_img))
    axarr[1].imshow(deprocess_image(style_img))
    plt.show()
    plt.figure()
    """
    # Hardcoded handcrafted 
    for t in range(max_iter):
        # Take an optimization step to update img_var
        sess.run(train_op)
        if t < decay_lr_at:
            sess.run(clamp_image_op)
        if t == decay_lr_at:
            sess.run(tf.assign(lr_var, decayed_lr))
        if t % 100 == 0:
            #print('Iteration {}'.format(t))
            img = sess.run(img_var)
            s = (features_to_image(deprocess_image(img, means, stds, extremes)))
            
            #with open("pic_"+str(t)+".xml", "wb") as file:
            #    with redirect_stdout(file):
            #pickle.dump(s, open("pic_"+str(t)+".xml", "wb"))
            
            
    #print('Iteration {}'.format(t))
    img = sess.run(img_var)        
    s = (features_to_image(deprocess_image(img, means, stds, extremes)))
    #with open("pic_"+str(t)+".xml", "wb") as file:
    #    with redirect_stdout(file):
    #pickle.dump(s, open("pic_"+str(t)+".xml", "wb"))
    #s.show('text')
    s.write('musicxml', fp='test_output.xml')
    s.write('midi', fp='test_output.mid')
    s.show('text')

In [1701]:
params1 = {
    'content_image' : content_image,
    'style_image' : style_image,
    
    'content_layer' : 1,
    'content_weight' : 0.5, 
    'style_layers' : [2],
    'style_weights' : [100],
    'tv_weight' : 5e-1,
    'init_random': False
}

style_transfer(**params1)

{0.0} <music21.stream.Part 0x7fcf33c33dd8>
    {4.25} <music21.note.Note G#>
    {8.25} <music21.note.Note G#>
    {12.5} <music21.note.Note G#>
    {16.75} <music21.note.Note G#>
    {21.0} <music21.note.Note G#>
    {25.25} <music21.note.Note G#>
    {29.25} <music21.note.Note G#>
    {33.5} <music21.note.Note G#>
    {37.5} <music21.note.Note G#>
    {41.75} <music21.note.Note G#>
    {46.0} <music21.note.Note G#>
    {50.25} <music21.note.Note G#>
    {54.5} <music21.note.Note G#>
    {58.25} <music21.note.Note G#>
    {62.0} <music21.note.Note G#>
    {64.75} <music21.note.Note G->
    {66.0} <music21.note.Note F->
    {67.75} <music21.note.Note E->
    {69.5} <music21.note.Note D#>
    {70.25} <music21.note.Note G->
    {71.0} <music21.note.Note E->
    {71.75} <music21.note.Note F->
    {72.5} <music21.note.Note G->
    {73.75} <music21.note.Note A#>
    {74.5} <music21.note.Note D->
    {76.25} <music21.note.Note E->
    {78.0} <music21.note.Note D->
    {79.25} <music21.note.N

In [1702]:
img = content_image
s = (features_to_image(deprocess_image(img, means, stds, extremes)))
s.write('musicxml', fp='original.xml')
s.write('midi', fp='original.mid')
s.show('text')

{0.0} <music21.stream.Part 0x7fcfb71dff28>
    {4.0} <music21.note.Rest rest>
    {8.0} <music21.note.Rest rest>
    {12.0} <music21.note.Rest rest>
    {16.0} <music21.note.Rest rest>
    {20.0} <music21.note.Rest rest>
    {24.0} <music21.note.Rest rest>
    {28.0} <music21.note.Rest rest>
    {32.0} <music21.note.Rest rest>
    {36.0} <music21.note.Rest rest>
    {40.0} <music21.note.Rest rest>
    {44.0} <music21.note.Rest rest>
    {48.0} <music21.note.Rest rest>
    {52.0} <music21.note.Rest rest>
    {56.0} <music21.note.Rest rest>
    {60.0} <music21.note.Rest rest>
    {63.0} <music21.note.Note G>
    {64.0} <music21.note.Note F>
    {66.0} <music21.note.Note E>
    {68.0} <music21.note.Note D>
    {68.5} <music21.note.Note G>
    {69.0} <music21.note.Note E>
    {69.5} <music21.note.Note F>
    {70.0} <music21.note.Note G>
    {71.5} <music21.note.Note A>
    {72.0} <music21.note.Note D>
    {74.0} <music21.note.Note E>
    {76.0} <music21.note.Note D>
    {77.0} <music21.not

In [1703]:
img = style_image
s = (features_to_image(deprocess_image(img, means, stds, extremes)))
s.write('musicxml', fp='style.xml')
s.write('midi', fp='style.mid')
s.show('text')

{0.0} <music21.stream.Part 0x7fcfa85dcdd8>
    {2.0} <music21.note.Rest rest>
    {4.0} <music21.note.Note A>
    {8.0} <music21.note.Note A>
    {12.0} <music21.note.Note G#>
    {14.0} <music21.note.Note A>
    {16.0} <music21.note.Note A>
    {20.0} <music21.note.Note B->
    {24.0} <music21.note.Note B->
    {28.0} <music21.note.Note G>
    {32.0} <music21.note.Note G>
    {34.0} <music21.note.Rest rest>
    {36.0} <music21.note.Note C>
    {38.0} <music21.note.Note C>
    {40.0} <music21.note.Note C>
    {42.0} <music21.note.Rest rest>
    {42.5} <music21.note.Note G>
    {43.0} <music21.note.Note G>
    {43.5} <music21.note.Note F>
    {44.0} <music21.note.Note E>
    {45.0} <music21.note.Note B>
    {46.0} <music21.note.Note B>
    {46.5} <music21.note.Note B>
    {47.0} <music21.note.Note B>
    {47.5} <music21.note.Note A>
    {48.0} <music21.note.Note G>
    {49.0} <music21.note.Note C>
    {50.0} <music21.note.Note C>
    {50.5} <music21.note.Note C>
    {51.0} <music21.note