In [0]:
import numpy as np
import tables
import os
import pytz
from IPython.display import clear_output
import tensorflow as tf
import matplotlib.pyplot as plt
import time
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
import keras.backend as K
import datetime
import seaborn as sns

sns.set(font_scale=1.5)

# pd.set_option('display.max_rows', 30)
# pd.set_option('display.max_columns', 30)

In [0]:

class TranslationInvariant(BaseModel):
    
    def __init__(self):

        super().__init__()
        
        # ========= Layer 1 =========
        self.lvl1_stride_x = 512
        self.lvl1_filter_height = 1
        self.lvl1_filter_width = 4096

        self.lvl1_filters_amount= 512 # or so called out-channels
        self.lvl1_regions_x = (WINDOW_SIZE - self.lvl1_filter_width)/self.lvl1_stride_x + 1 # 25
        self.lvl1_regions_y = self.lvl1_filters_amount # 512
        # the shape of wsin and wcos is (1, 4096, 1, 512) for now
        self.wsin = np.empty((self.lvl1_filter_height, self.lvl1_filter_width, 1, self.lvl1_filters_amount), dtype=np.float32)
        self.wcos = np.empty((self.lvl1_filter_height, self.lvl1_filter_width, 1, self.lvl1_filters_amount), dtype=np.float32)
        
        self.frequencies = np.logspace(np.log(self.start_freq),
                                       np.log(self.end_freq),
                                       self.lvl1_filters_amount,
                                       base=np.e,
                                       endpoint=True)
        
        self.discrete_time = np.arange(self.lvl1_filter_width)
        self.create_filters()
        self.wsin_var = tf.constant(value=self.wsin, dtype='float32', name="wsin")
        self.wcos_var = tf.constant(value=self.wcos, dtype='float32', name="wcos")
        print('wsin_var: {}'.format(self.wsin_var))
        print('wcos_var: {}'.format(self.wcos_var))

        
        # ========= Layer 2 =========
        self.lvl2_stride_y = 2
        self.lvl2_filter_height = 128
        self.lvl2_filter_width = 1
        
        self.lvl2_filters_amount = 128
        self.lvl2_regions_x = (self.lvl1_regions_x - self.lvl2_filter_width)/1 + 1 # 25
        self.lvl2_regions_y = (self.lvl1_regions_y - self.lvl2_filter_height)/self.lvl2_stride_y + 1 # 193
        
        self.lvl2_filters =  tf.Variable(initial_value=tf.random_normal([int(self.lvl2_filter_height),
                                                                         int(self.lvl2_filter_width),
                                                                         1,
                                                                         int(self.lvl2_filters_amount)], seed=999) * self.wscale,
                                         dtype='float32',
                                         name="lvl2_filters")
        print('lvl2_filters: {}'.format(self.lvl2_filters))

        # ========= Layer 3 =========
        self.lvl3_filter_height = 1
        self.lvl3_filter_width = self.lvl2_regions_x
        
        self.lvl3_filters_amount = 256       
        self.lvl3_regions_x = (self.lvl2_regions_x - self.lvl3_filter_width)/1 + 1
        self.lvl3_regions_y = (self.lvl2_regions_y - self.lvl3_filter_height)/1 + 1
        
        self.lvl3_filters = tf.Variable(initial_value=tf.random_normal([int(self.lvl3_filter_height),
                                                                        int(self.lvl3_filter_width),
                                                                        int(self.lvl2_filters_amount),
                                                                        int(self.lvl3_filters_amount)], seed=999) * self.wscale,
                                        dtype='float32',
                                        name="lvl3_filters")
        print('lvl3_filters: {}'.format(self.lvl3_filters))
        
            

    def define_graph(self, activation=None):

        conv_sin = tf.nn.conv2d(input=self.input_X_ph,
                                filter=self.wsin_var,
                                strides=[1, 1, self.lvl1_stride_x, 1],
                                padding="VALID")
        conv_cos = tf.nn.conv2d(input=self.input_X_ph,
                                filter=self.wcos_var,
                                strides=[1, 1, self.lvl1_stride_x, 1],
                                padding="VALID")
        
        batch_size_ph = tf.shape(self.input_X_ph)[0]

        lvl1_conv = tf.square(conv_sin) + tf.square(conv_cos)
        lvl1_conv = tf.transpose(lvl1_conv, perm=[0, 3, 2, 1])
        print("lvl1_conv shape: {}".format(lvl1_conv.get_shape())) # (?, 512, 25, 1)
        
        
        lvl2_conv = tf.nn.relu(tf.nn.conv2d(input=tf.log(lvl1_conv + 10e-12),
                                            filter=self.lvl2_filters,
                                            strides=[1, self.lvl2_stride_y, 1, 1],
                                            padding="VALID"))
        print("lvl2_conv shape: {}".format(lvl2_conv.get_shape())) # (?, 193, 25, 128)
        
        
        
        lvl3_conv = tf.nn.relu(tf.nn.conv2d(input=lvl2_conv,
                                            filter=self.lvl3_filters,
                                            strides=[1, 1, 1, 1],
                                            padding="VALID"))
        print("lvl3_conv shape: {}".format(lvl3_conv.get_shape())) # 
        
        
        # Output tensor the same shape as inputs except the last dimension is of size units.
        dense_layer = tf.layers.Dense(units=MIDI_PITCH_AMOUNT, activation=activation)
        self.pred_y_ph = dense_layer(tf.reshape(lvl3_conv,
                                                shape=[batch_size_ph, tf.to_int32(self.lvl3_regions_x * self.lvl3_regions_y * self.lvl3_filters_amount)]))
        
        self.loss = tf.reduce_mean(tf.reduce_sum((self.input_y_ph - self.pred_y_ph) ** 2, axis=1)) 
        y_pred_converted =  tf.cast(tf.math.round(self.pred_y_ph), tf.int32)
        
        
        self.optimizer_step = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
        


