In [12]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import os
import random
import csv
import json
import glob
import numpy as np
import resampy
import keras
from keras import backend as K
from keras.optimizers import Adam
from log import *
import tensorflow as tf
import soundfile as sf
import librosa
from kapre.time_frequency import Melspectrogram

In [2]:
LOGGER = logging.getLogger('quantized_inference')
LOGGER.setLevel(logging.DEBUG)

In [3]:
def load_audio(path, sr):
    """
    Load audio file
    """
    data, sr_orig = sf.read(path, dtype='float32', always_2d=True)
    data = data.mean(axis=-1)

    if sr_orig != sr:
        data = resampy.resample(data, sr_orig, sr)

    return data

def amplitude_to_db(S, amin=1e-10, dynamic_range=80.0):
    magnitude = np.abs(S)
    power = np.square(magnitude, out=magnitude)
    ref_value = power.max()

    log_spec = 10.0 * np.log10(np.maximum(amin, magnitude))
    log_spec -= log_spec.max()

    log_spec = np.maximum(log_spec, -dynamic_range)
    return log_spec

def get_melspectrogram(frame, n_fft=2048, mel_hop_length=242, samp_rate=48000, n_mels=256, fmax=None):
    S = np.abs(librosa.core.stft(frame, n_fft=n_fft, hop_length=mel_hop_length, window='hann', center=True, pad_mode='constant'))
    S = librosa.feature.melspectrogram(sr=samp_rate, S=S, n_fft=n_fft, n_mels=n_mels, fmax=fmax, power=1.0, htk=True)
    S = amplitude_to_db(np.array(S))
    return S

In [19]:
def initialize_uninitialized_variables(sess):
    if hasattr(tf, 'global_variables'):
        variables = tf.global_variables()
    else:
        variables = tf.all_variables()

    #print(variables)
    uninitialized_variables = []
    for v in variables:
        if not hasattr(v, '_keras_initialized') or not v._keras_initialized:
            uninitialized_variables.append(v)
            v._keras_initialized = True
    
    #print(uninitialized_variables)
    if uninitialized_variables:
        if hasattr(tf, 'variables_initializer'):
            sess.run(tf.variables_initializer(uninitialized_variables))
        else:
            sess.run(tf.initialize_variables(uninitialized_variables)) 
            
def get_l3model(model_path, saved_model_type='keras'):
    
    if saved_model_type == 'keras': 
        model = keras.models.load_model(model_path, custom_objects={'Melspectrogram': Melspectrogram})
        if 'flatten' in model.layers[-1].name:
            print("Flatten Layer is part of model")
            l3embedding_model = model
        else:
            embed_layer = model.get_layer('audio_embedding_layer')
            pool_size = tuple(embed_layer.get_output_shape_at(0)[1:3])
            y_a = keras.layers.MaxPooling2D(pool_size=pool_size, padding='same')(model.output)
            y_a = keras.layers.Flatten()(y_a)
            l3embedding_model = keras.models.Model(inputs=model.input, outputs=y_a)
        
    elif saved_model_type == 'tflite':
        tflite_model_file = model_path
        l3embedding_model = tf.lite.Interpreter(model_path=str(tflite_model_file))
    else:
        l3embedding_model = model_path
        
    return l3embedding_model

In [10]:
def load_us8k_metadata(path):
    """
    Load UrbanSound8K metadata
    Args:
        path: Path to metadata csv file
              (Type: str)
    Returns:
        metadata: List of metadata dictionaries
                  (Type: list[dict[str, *]])
    """
    metadata = [{} for _ in range(10)]
    with open(path) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            fname = row['slice_file_name']
            row['start'] = float(row['start'])
            row['end'] = float(row['end'])
            row['salience'] = float(row['salience'])
            fold_num = row['fold'] = int(row['fold'])
            row['classID'] = int(row['classID'])
            metadata[fold_num-1][fname] = row

    return metadata

def get_l3_frames_uniform_keras_quantized(audio, model_path, n_fft=2048, n_mels=256,\
                                          mel_hop_length=242, hop_size=0.1, sr=48000,\
                                          with_melSpec=None, fmax=None, **kwargs):
    if type(audio) == str:
        audio = load_audio(audio, sr)

    hop_size = hop_size
    hop_length = int(hop_size * sr)
    frame_length = sr * 1

    audio_length = len(audio)
    if audio_length < frame_length:
        # Make sure we can have at least one frame of audio
        pad_length = frame_length - audio_length
    else:
        # Zero pad so we compute embedding on all samples
        pad_length = int(np.ceil(audio_length - frame_length)/hop_length) * hop_length \
                     - (audio_length - frame_length)

    if pad_length > 0:
        # Use (roughly) symmetric padding
        left_pad = pad_length // 2
        right_pad= pad_length - left_pad
        audio = np.pad(audio, (left_pad, right_pad), mode='constant')
    
    if with_melSpec:
        #print("Melspectrogram is part of the weight file")
        # Divide into overlapping 1 second frames
        x = librosa.util.utils.frame(audio, frame_length=frame_length, hop_length=hop_length).T    
        # Add a channel dimension
        X = x.reshape((x.shape[0], 1, x.shape[-1]))
    
    else:
        #print("Melspectrogram has been removed from the weight file")
        frames = librosa.util.utils.frame(audio, frame_length=frame_length, hop_length=hop_length).T

        X = []
        for frame in frames:
            S = np.abs(librosa.core.stft(frame, n_fft=n_fft, hop_length=mel_hop_length,
                                         window='hann', center=True,
                                         pad_mode='constant'))
            S = librosa.feature.melspectrogram(sr=sr, S=S, n_mels=n_mels, fmax=fmax,
                                           power=1.0, htk=True)
            S = amplitude_to_db(np.array(S))
            X.append(S)

        X = np.array(X)[:, :, :, np.newaxis]

    # Get the L3 embedding for each frame
    l3embedding = predict_quantized_model(model_path, X)

    return l3embedding

def compute_file_features(path, feature_type, l3embedding_model=None, model_type='keras', **feature_args):
    
    if model_type == 'quantized_keras' and type(l3embedding_model) != str:
        raise ValueError('For quantized keras model, pass the model path instead of the model')
        
    if feature_type == 'l3':
        if not l3embedding_model:
            err_msg = 'Must provide L3 embedding model to use {} features'
            raise ValueError(err_msg.format(feature_type))
        #hop_size = feature_args.get('hop_size', 0.1)
        #samp_rate = feature_args.get('samp_rate', 48000)
        
        if model_type == 'keras':
            file_features = get_l3_frames_uniform(path, l3embedding_model, **feature_args)
        elif model_type == 'quantized_keras':
            file_features = get_l3_frames_uniform_keras_quantized(path, model_path=l3embedding_model, **feature_args)
        elif model_type == 'tflite':
            file_features = get_l3_frames_uniform_tflite(path, interpreter=l3embedding_model, **feature_args)
        else:
            raise ValueError('Model type not supported!')
            
    else:
        raise ValueError('Invalid feature type: {}'.format(feature_type))

    return file_features

def generate_us8k_fold_data(metadata, data_dir, fold_idx, output_dir, l3embedding_model=None, model_type='keras',
                            features='l3', random_state=12345678, **feature_args):
    """
    Generate all of the data for a specific fold

    Args:
        metadata: List of metadata dictionaries, or a path to a metadata file to be loaded
                  (Type: list[dict[str,*]] or str)

        data_dir: Path to data directory
                  (Type: str)

        fold_idx: Index of fold to load
                  (Type: int)

        output_dir: Path to output directory where fold data will be stored
                    (Type: str)

    Keyword Args:
        l3embedding_model: L3 embedding model, used if L3 features are used
                           (Type: keras.engine.training.Model or None)

        features: Type of features to be computed
                  (Type: str)

    """

    if type(metadata) == str:
        metadata = load_us8k_metadata(metadata)

    # Set random seed
    random_state = random_state + fold_idx
    random.seed(random_state)
    np.random.seed(random_state)

    audio_fold_dir = os.path.join(data_dir, "fold{}".format(fold_idx+1))

    # Create fold directory if it does not exist
    output_fold_dir = os.path.join(output_dir, "fold{}".format(fold_idx+1))
    if not os.path.isdir(output_fold_dir):
        os.makedirs(output_fold_dir)

    LOGGER.info('Generating fold {} in {}'.format(fold_idx+1, output_fold_dir))

    num_files = len(metadata[fold_idx])

    for idx, (fname, example_metadata) in enumerate(metadata[fold_idx].items()):
        desc = '({}/{}) Processed {} -'.format(idx+1, num_files, fname)
        with LogTimer(LOGGER, desc, log_level=logging.DEBUG):
            # TODO: Make sure glob doesn't catch things with numbers afterwards
            variants = [x for x in glob.glob(os.path.join(audio_fold_dir,
                '**', os.path.splitext(fname)[0] + '[!0-9]*[wm][ap][v3]'), recursive=True)
                if os.path.isfile(x) and not x.endswith('.jams')]
            num_variants = len(variants)
            for var_idx, var_path in enumerate(variants):
                audio_dir = os.path.dirname(var_path)
                var_fname = os.path.basename(var_path)
                desc = '\t({}/{}) Variants {} -'.format(var_idx+1, num_variants, var_fname)
                with LogTimer(LOGGER, desc, log_level=logging.DEBUG):
                    generate_us8k_file_data(var_fname, example_metadata, audio_dir,
                                            output_fold_dir, features,
                                            l3embedding_model, model_type, **feature_args)


def generate_us8k_file_data(fname, example_metadata, audio_fold_dir,
                            output_fold_dir, features,
                            l3embedding_model, model_type, **feature_args):
    audio_path = os.path.join(audio_fold_dir, fname)

    basename, _ = os.path.splitext(fname)
    output_path = os.path.join(output_fold_dir, basename + '.npz')

    if os.path.exists(output_path):
        LOGGER.info('File {} already exists'.format(output_path))
        return

    X = compute_file_features(audio_path, features, l3embedding_model=l3embedding_model,\
                                           model_type=model_type, **feature_args)

    # If we were not able to compute the features, skip this file
    if X is None:
        LOGGER.error('Could not generate data for {}'.format(audio_path))
        return

    class_label = example_metadata['classID']
    y = class_label

    np.savez_compressed(output_path, X=X, y=y)

    return output_path, 'success'

In [16]:
if __name__=='__main__':
    model_path = '/scratch/sk7898/l3pruning/embedding_approx_mse/embedding_approx/music/48000_256_242_2048_fmax_None/quantized_mse_original/20190930153319/model_best_valid_loss.h5'
    fold_num = 1
    metadata_path = '/beegfs/jtc440/UrbanSound8K/metadata/UrbanSound8K.csv'
    data_dir = '/beegfs/jtc440/UrbanSound8K/audio'
    dataset_output_dir = '/scratch/sk7898/test_quant_keras'
    random_state = 20180302
    samp_rate = 48000
    n_mels = 256
    n_hop = 242
    n_dft = 2048 
    fmax=None
    with_melSpec = False
    
    
    _, model_ext = os.path.splitext(os.path.basename(model_path))
    saved_model_type = 'tflite' if model_ext == '.tflite' else ('quantized_keras' if 'quantized' in model_path else 'keras')

    #l3embedding_model = get_l3model(model_path, saved_model_type=saved_model_type)

    # Generate a single fold if a fold was specified
    generate_us8k_fold_data(metadata_path, data_dir, fold_num-1, dataset_output_dir,
                            l3embedding_model=model_path, model_type=saved_model_type, 
                            features='l3', random_state=random_state,
                            mel_hop_length=n_hop, n_mels=n_mels,\
                            n_fft=n_dft, fmax=fmax, sr=samp_rate, with_melSpec=with_melSpec)
    



TypeError: predict() missing 1 required positional argument: 'x'

In [23]:
def restore_save_quantized_model(model_path):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    import keras

    features = []
    eval_graph = tf.Graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True    
    eval_sess = tf.Session(config=config, graph=eval_graph)
        
    K.set_session(eval_sess)

    with eval_graph.as_default():
        optimizer = Adam(lr=0.00001)
        K.set_learning_phase(0)
        eval_model = keras.models.load_model(model_path)
        tf.contrib.quantize.create_eval_graph(input_graph=eval_graph)
        initialize_uninitialized_variables(eval_sess)
        
        eval_graph_def = eval_graph.as_graph_def()
        frozen_graph_def = tf.graph_util.convert_variables_to_constants(
                                                                        eval_sess,
                                                                        eval_graph_def,
                                                                        [eval_model.output.op.name]
                                                                        )
        
        with open('/scratch/sk7898/test_quant_keras/eval_graph.pb', 'w') as f:
            f.write(str(eval_graph_def))
        
        with open('/scratch/sk7898/test_quant_keras/eval_frozen_graph.pb', 'w') as f:
            f.write(str(frozen_graph_def))
        
restore_save_quantized_model(model_path)

INFO:tensorflow:Froze 80 variables.
INFO:tensorflow:Converted 80 variables to const ops.


In [28]:
def load_graph(model_filepath):
    '''
    Lode trained model.
    '''
    print('Loading model...')
    graph = tf.Graph()

    with tf.gfile.GFile(model_filepath, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    print('Check out the input placeholders:')
    nodes = [n.name + ' => ' +  n.op for n in graph_def.node if n.op in ('Placeholder')]
    for node in nodes:
        print(node)

    # Define input tensor
    #input = tf.placeholder(np.float32, shape = [None, 32, 32, 3], name='input')
    #self.dropout_rate = tf.placeholder(tf.float32, shape = [], name = 'dropout_rate')

    #tf.import_graph_def(graph_def, {'input': self.input, 'dropout_rate': self.dropout_rate})

    print('Model loading complete!')

    '''
    # Get layer names
    layers = [op.name for op in self.graph.get_operations()]
    for layer in layers:
        print(layer)
    '''

    '''
    # Check out the weights of the nodes
    weight_nodes = [n for n in graph_def.node if n.op == 'Const']
    for n in weight_nodes:
        print("Name of the node - %s" % n.name)
        print("Value - " )
        print(tensor_util.MakeNdarray(n.attr['value'].tensor))
    '''

frozen_file_path = '/scratch/sk7898/test_quant_keras/eval_frozen_graph.pb'
load_graph(frozen_file_path)

Loading model...
Check out the input placeholders:
Model loading complete!
