##### Load pretrained models

In [None]:
#@markdown <b>Run me to import underscore module</b><br/>   {display-mode: "form"}
#@markdown <small>Method signatures:</small><br/> 
#@markdown <small><small>&nbsp; &nbsp; &nbsp; _under(source_path, target_path, copy=True, auth_on_upload=True)</small></small><br/>
#@markdown <small><small>&nbsp; &nbsp; &nbsp; _set_gh_token(token)</small></small><br/>
#@markdown <small><small>&nbsp; &nbsp; &nbsp; _from_gh(user_name, repo_name, release_name) &nbsp; &nbsp; &nbsp; <b>Returns:</B> dictionary of arrays { 'array_name' : np.ndarray }</small></small><br/>
#@markdown <small><small>&nbsp; &nbsp; &nbsp; _to_gh(user_name, repo_name, release_name, split_size=600, **arr_kwargs)</small></small><br/>
#@markdown <small><small>&nbsp; &nbsp; &nbsp; _export_model(model, model_name, model_type, val_dataset, test_dataset, params, hparams, history, log_dir, n_prep_layers=None)</small></small><br/>
!pip install -q tensorflowjs
!pip install -q githubrelease
import numpy as np
import os, glob, re, time, json
import github_release
import tensorflow.keras.backend as K
from contextlib import redirect_stdout

compressed_dirs = set()


def _compress(source_path, target_path, target_dir=None):
    if target_dir:
        !mkdir -p {target_dir}
    if target_path.endswith('.tar.gz'):
        !tar -czf {target_path} -C {source_path} .
    elif target_path.endswith('.tar'):
        !tar -cf {target_path} -C {source_path} .
    elif target_path.endswith('.zip'):
        !(cd {source_path} && zip -q -r {target_path} .)


def _extract(source_path, target_path):
    !mkdir -p {target_path}
    if source_path.endswith('.tar.gz'):
        !tar -xzf {source_path} -C {target_path}
    elif source_path.endswith('.tar'):
        !tar -xf {source_path} -C {target_path}
    elif source_path.endswith('.zip'):
        !unzip -qq {source_path} -d {target_path}


def _under(source_path, target_path, copy=True, auth_on_upload=True):
    """
    Use cases:
        Movement:
            - GCS -> GCS
            - GCS -> LOCAL
            - LOCAL -> GCS
            - LOCAL -> LOCAL
            
        Compression (e.g. from dir to .tar.gz):
            - GCS -> GCS
            - GCS -> LOCAL
            - LOCAL -> GCS
            - LOCAL -> LOCAL
            
        Extraction (e.g. from .zip to dir):
            - GCS -> GCS
            - GCS -> LOCAL
            - LOCAL -> GCS
            - LOCAL -> LOCAL
            
        Extraction & compression (e.g. from .zip to .tar.gz):
            - GCS -> GCS
            - GCS -> LOCAL
            - LOCAL -> GCS
            - LOCAL -> LOCAL
    """
    COMPRESSION_FORMATS = ('zip', 'tar', 'tar.gz')
    TEMP_DIR = "/tmp_"
    LOG_TEMPLATE = "{}    from    {}    to    {}"

    # Source
    if source_path.endswith("/"):
        source_path = source_path[:-1]
    source_dir, _, source_name = source_path.rpartition('/')
    source_isgcs = source_path.startswith("gs://")
    source_islocal = not source_isgcs
    if source_islocal:
        source_path = os.path.abspath(source_path)
    source_isprefix, source_isfile, source_ext = source_name.partition('.')
    source_isdir = not source_isfile
    source_iscompression = source_ext in COMPRESSION_FORMATS

    # Target
    target_dir, _, target_name = target_path.rpartition('/')
    target_isgcs = target_path.startswith("gs://")
    target_islocal = not target_isgcs
    target_prefix, target_isfile, target_ext = target_name.partition('.')
    target_isdir = not target_isfile
    target_iscompression = target_ext in COMPRESSION_FORMATS

    # Flags
    MOVE_ONLY = source_ext == target_ext
    GCS_ONLY = source_isgcs and target_isgcs
    RENAME = source_isprefix != target_prefix
    COMPRESSION = source_isdir and target_iscompression
    EXTRACTION = source_iscompression and target_isdir
    EXTRACTION_COMPRESSION = source_iscompression and target_iscompression and source_ext != target_ext

    # Add forward slash if file is at root level
    source_dir = "/" if not source_dir else source_dir
    target_dir = "/" if not target_dir else target_dir

    # Authenticate if writing to GCS
    if target_isgcs and auth_on_upload:
        from google.colab import auth
        auth.authenticate_user()

    # Assert that subdirectories exist if target is local
    if target_islocal:
        !mkdir -p {target_dir}

    # Movement commands
    if MOVE_ONLY:
        # GCS -> GCS
        if source_isgcs and target_isgcs:
            action = "COPYING" if copy else "MOVING"
            print(LOG_TEMPLATE.format(f"{action} (1/1)", source_path, target_path))
            if copy:
                !gsutil -m -q cp -r {source_path} {target_path}
            else:
                !gsutil -m -q mv {source_path} {target_path}
        
        # LOCAL -> LOCAL
        elif source_islocal and target_islocal:
            action = "COPYING" if copy else "MOVING"
            print(LOG_TEMPLATE.format(f"{action} (1/1)", source_path, target_path))
            if copy:
                !cp -r {source_path} {target_path}
            else:
                !mv {source_path} {target_path}
        
        # GCS -> LOCAL
        elif source_isgcs and target_islocal:
            if source_isdir:
                print(LOG_TEMPLATE.format("DOWNLOADING DIR (1/1)", source_path, target_dir))
                !gsutil -m -q cp -r {source_path} {target_dir}
                if RENAME:
                    print(LOG_TEMPLATE.format("\tRENAMING DIR", source_isprefix, target_prefix))
                    !mv {target_dir}/{source_isprefix} {target_dir}/{target_prefix}
            else:
                print(LOG_TEMPLATE.format("DOWNLOADING FILE (1/1)", source_path, target_path))
                !gsutil -m -q cp {source_path} {target_path}
        
        # LOCAL -> GCS
        if source_islocal and target_isgcs:
            if source_isdir:
                print(LOG_TEMPLATE.format("UPLOADING DIR (1/1)", source_path, target_path))
                !gsutil -m -q cp -r {source_path} {target_path}
            else:
                print(LOG_TEMPLATE.format("UPLOADING FILE (1/1)", source_path, target_path))
                !gsutil -m -q cp {source_path} {target_path}
        return


    # Create directory for intermediate storage if required
    if source_isgcs or target_isgcs or EXTRACTION_COMPRESSION:
        !mkdir -p {TEMP_DIR}
    

    # For remaining operations, download GCS source to temp and treat as local
    if source_isgcs:
        if source_isdir:
            print(LOG_TEMPLATE.format("\tDOWNLOADING DIR", source_path, TEMP_DIR))
            !gsutil -m -q cp -r {source_path} {TEMP_DIR}
        else:
            print(LOG_TEMPLATE.format("\tDOWNLOADING FILE", source_path, f"{TEMP_DIR}/{source_name}"))
            !gsutil -m -q cp {source_path} {TEMP_DIR}/{source_name}
        source_path = f"{TEMP_DIR}/{source_name}"
        source_dir = TEMP_DIR

    # Compression
    if COMPRESSION:
        if target_islocal:
            print(LOG_TEMPLATE.format("COMPRESSING (1/1)", source_path, target_path))
            _compress(source_path, target_path, target_dir=target_dir)
        else:
            print(LOG_TEMPLATE.format("COMPRESSING (1/2)", source_path, f"{TEMP_DIR}/{target_name}"))
            _compress(source_path, f"{TEMP_DIR}/{target_name}")
            print(LOG_TEMPLATE.format("UPLOADING FILE (2/2)", f"{TEMP_DIR}/{target_name}", target_path))
            !gsutil -m -q cp {TEMP_DIR}/{target_name} {target_path}

    # Extraction
    elif EXTRACTION:
        if target_islocal:
            print(LOG_TEMPLATE.format("EXTRACTING (1/1)", source_path, target_path))
            _extract(source_path, target_path)
        else:
            print(LOG_TEMPLATE.format("EXTRACTING (1/2)", source_path, f"{TEMP_DIR}/{target_name}"))
            _extract(source_path, f"{TEMP_DIR}/{target_name}")
            print(LOG_TEMPLATE.format("UPLOADING DIR (2/2)", f"{TEMP_DIR}/{target_name}", target_path))
            !gsutil -m -q cp -r {TEMP_DIR}/{target_name} {target_path}

    # Extraction & compression
    elif EXTRACTION_COMPRESSION:
        if target_islocal:
            print(LOG_TEMPLATE.format("EXTRACTING (1/2)", source_path, f"{TEMP_DIR}/{target_prefix}"))
            _extract(source_path, f"{TEMP_DIR}/{target_prefix}")
            print(LOG_TEMPLATE.format("COMPRESSING (2/2)", f"{TEMP_DIR}/{target_prefix}", target_path))
            _compress(f"{TEMP_DIR}/{target_prefix}", target_path, target_dir=target_dir)
        else:
            print(LOG_TEMPLATE.format("EXTRACTING (1/3)", source_path, f"{TEMP_DIR}/{target_prefix}"))
            _extract(source_path, f"{TEMP_DIR}/{target_prefix}")
            print(LOG_TEMPLATE.format("COMPRESSING (2/3)", f"{TEMP_DIR}/{target_prefix}", f"{TEMP_DIR}/{target_name}"))
            _compress(f"{TEMP_DIR}/{target_prefix}", f"{TEMP_DIR}/{target_name}")
            print(LOG_TEMPLATE.format("UPLOADING FILE (3/3)", f"{TEMP_DIR}/{target_name}", target_path))
            !gsutil -m -q cp {TEMP_DIR}/{target_name} {target_path}
    
    # Cleanup intermediate storage
    !rm -rf {TEMP_DIR}

def _set_gh_token(token):
    os.environ["GITHUB_TOKEN"] = token


def _export_array(array, release_name, prefix="", splits=3):
    dir_path = f"/tmp_/{release_name}"
    !mkdir -p {dir_path}
    n_digits = len(str(splits - 1))
    subarrays = np.array_split(array, splits)
    for i, subarray in enumerate(subarrays):
        filename = f"{prefix}__{str(i).zfill(n_digits)}.npy"
        np.save(f"{dir_path}/{filename}", subarray)


def _concat_arrays(paths):
    return np.concatenate([np.load(path, allow_pickle=True) for path in sorted(paths)])


def _to_gh(user_name, repo_name, release_name, split_size=600, **arr_kwargs):
    # Assert that GitHub Auth token is set
    if "GITHUB_TOKEN" not in os.environ:
        print("GitHub authentication token is not set.")
        print("Set token using the '_set_gh_token(token_string)' method.")
        print("Minimal required auth scope is 'repo/public_repo' for public repositories.")
        print("URL: https://github.com/settings/tokens/new")
        return

    # Split arrays
    for prefix, array in arr_kwargs.items():
        splits = int((array.nbytes/1_000_000) // split_size) + 1
        _export_array(array, release_name, prefix=prefix, splits=splits)

    # Upload arrays
    github_release.gh_release_create(
        f"{user_name}/{repo_name}", 
        release_name, 
        publish=True, 
        name=release_name, 
        asset_pattern=f"/tmp_/{release_name}/*"
    )
    !rm -rf /tmp_/*


def _from_gh(user_name, repo_name, release_name):
    # Download release to temporary directory
    print("Downloading dataset in parallell ... ", end='\t')
    t0 = time.perf_counter()
    assets = github_release.get_assets(f"{user_name}/{repo_name}", tag_name=release_name)
    download_urls = [asset['browser_download_url'] for asset in assets]
    urls_str = " ".join(download_urls)
    !echo {urls_str} | xargs -n 1 -P 8 wget -q -P /tmp_/{release_name}_dl/
    t1 = time.perf_counter()
    print(f"done! ({t1 - t0:.3f} seconds)")

    # Load data into numpy arrays
    paths = glob.glob(f"/tmp_/{release_name}_dl/*.npy")
    groups = {}
    for path in paths:
        match = re.match(r".*/(.*)__[0-9]*\.npy", path)
        if match:
            prefix = match.group(1)
            groups[prefix] = groups.get(prefix, []) + [path]
    arrays_dict = {name: _concat_arrays(paths) for name, paths in groups.items()}
    !rm -rf /tmp_/*
    return arrays_dict
    

def _log_to_gh(user, repo, tag, log_dir="/tmp/logs"):
    # Create temporary directory for compressed logs
    !mkdir -p /tmp/compressed_logs
    
    # Compress all directories in log dir
    for dirname in os.listdir(log_dir):
        # Skip files
        if "." in dirname or dirname in compressed_dirs:
            continue

        # Compress
        _under(f"{log_dir}/{dirname}", f"/tmp/compressed_logs/{dirname}.tar.gz")
        compressed_dirs.add(dirname)

    # Upload compressed logs to GitHub
    github_release.gh_asset_upload(f"{user}/{repo}", tag, f"/tmp/compressed_logs/*.tar.gz")

    # Cleanup compressed logs
    !rm -rf /tmp/compressed_logs/*

def timeit(method):
    def timed(*args, **kw):
        ts = time.perf_counter()
        result = method(*args, **kw)
        te = time.perf_counter()
        diff = te - ts
        print(f"{method.__name__}: {diff:.8f} s")
        return result
    return timed

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

@timeit
def _export_model(model, model_name, model_type, val_dataset, test_dataset, params, hparams, history, log_dir, n_prep_layers=None):
    # Create temporary directory
    target_dir = f"/tmp/models/{model_type}/{model_name}"
    !mkdir -p {target_dir}     #/tmp_/models/rnn_naive/rnn_naive_20201108_130308

    # Write export logs to file
    export_logs_path = os.path.join(target_dir, "export_logs.txt")
    with open(export_logs_path, 'w') as export_logs:
        with redirect_stdout(export_logs):
            # Profile model on inputs (average of n_runs cycles)
            n_runs = 5
            input_shape = val_dataset.element_spec[0].shape

            def time_examples(model, n):
                dummy = np.random.rand(n, *input_shape[1:])
                t0 = time.perf_counter()
                model.predict(dummy)
                return time.perf_counter() - t0

            with tf.device('/CPU:0'):
                cpu_profiles = {
                    "cpu_1": np.array([time_examples(model, 1) for _ in range(n_runs)]).mean(),
                    "cpu_10": np.array([time_examples(model, 10) for _ in range(n_runs)]).mean(),
                    "cpu_100": np.array([time_examples(model, 100) for _ in range(n_runs)]).mean()
                }

            with tf.device('/GPU:0'):
                gpu_profiles = {
                    "gpu_1": np.array([time_examples(model, 1) for _ in range(n_runs)]).mean(),
                    "gpu_10": np.array([time_examples(model, 10) for _ in range(n_runs)]).mean(),
                    "gpu_100": np.array([time_examples(model, 100) for _ in range(n_runs)]).mean()
                }

            # Get number of parameters
            params_counts = {
                "trainable_params": np.sum([K.count_params(w) for w in model.trainable_weights]),
                "non_trainable_params": np.sum([K.count_params(w) for w in model.non_trainable_weights])
            }
            params_counts["total_params"] = params_counts["trainable_params"] + params_counts["non_trainable_params"]

            # Generate evaluation metrics for validation and test set
            final_metrics_val = model.evaluate(val_dataset, return_dict=True)
            final_metrics_val = {f"final_val_{k}": v for k, v in final_metrics_val.items()}
            final_metrics_test = model.evaluate(test_dataset, return_dict=True)
            final_metrics_test = {f"final_test_{k}": v for k, v in final_metrics_test.items()}

            # Generate Dataframe and export to parquet
            logs_params = {
                **params,
                **hparams,
                **history.params,
                **cpu_profiles,
                **gpu_profiles,
                **params_counts,
                **final_metrics_val,
                **final_metrics_test
            }
            logs_df = pd.DataFrame({**history.history, "epoch": history.epoch})
            for param, value in logs_params.items():
                logs_df[param] = value
            logs_df.to_parquet(os.path.join(target_dir, f"{model_name}.parquet"))

            # Dump all parameters and metadata to .json file
            with open(os.path.join(target_dir, 'model_details.json'), 'w') as f:
                json.dump(logs_params, f, cls=NpEncoder, indent=4)

            def _convert_model(model, subdir="model"):
                # Create subdirectory
                subdir_path = os.path.join(target_dir, subdir)
                !mkdir -p {subdir_path}

                # Write model summary to file
                model_summary_path = os.path.join(subdir_path, "model_summary.txt")
                with open(model_summary_path, 'w') as model_summary:
                    with redirect_stdout(model_summary):
                        model.summary()

                # Export model summary as image
                model_summary_img_path = os.path.join(subdir_path, "model_summary.png")
                tf.keras.utils.plot_model(model, to_file=model_summary_img_path, show_shapes=True)

                # Generate model paths
                keras_model_path = os.path.join(subdir_path, "keras_model.h5")
                saved_model_path = os.path.join(subdir_path, "saved_model")
                tfjs_layers_model_path = os.path.join(subdir_path, "tfjs_layers_model")
                tfjs_graph_model_path = os.path.join(subdir_path, "tfjs_graph_model")

                # Save and convert model
                model.save(keras_model_path)
                tf.saved_model.save(model, saved_model_path)
                !tensorflowjs_converter --input_format=keras --output_format=tfjs_layers_model {keras_model_path} {tfjs_layers_model_path}
                !tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model {saved_model_path} {tfjs_graph_model_path}
            
            # Convert full model
            _convert_model(model, subdir="model")

            if n_prep_layers is not None:
                model_1 = tf.keras.Sequential(model.layers[:n_prep_layers])
                model_1.build(input_shape=input_shape)
                
                model_2 = tf.keras.Sequential(model.layers[n_prep_layers:])
                model_2.build(input_shape=model_1.layers[-1].output_shape)

                # Convert models
                _convert_model(model_1, subdir="submodel_1")
                _convert_model(model_2, subdir="submodel_2")

            # Compress TensorBoard logs
            model_log_dir = os.path.join(LOG_DIR, model_name)
            tensorboard_logs_path = os.path.join(target_dir, f"{model_name}.tar.gz")
            _under(model_log_dir, tensorboard_logs_path)

    # Upload logs to GCS
    _under(target_dir, f"gs://marvin-voice/models/{model_type}/{model_name}", auth_on_upload=False)
    return logs_df

In [None]:
# Download git repository
import os
if not os.getcwd().endswith("marvin-models"):
    !git config --global user.email "patrikkja@gmail.com"
    !git config --global user.name "Patrik Kjærran"
    !git clone -q https://github.com/patrikkj/marvin-models.git
    %cd marvin-models

# Internal modules
import io, sys, glob, time
from datetime import datetime
from importlib import reload

# External modules
!pip install -q pydub
!pip install -q tensorflow-io
#!pip install -q -U tensorboard_plugin_profile
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_io as tfio
import tensorflow_addons as tfa
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, LearningRateScheduler
from tensorboard.plugins.hparams import api as hp

# Colab modules
from google.colab import auth
from IPython import display
    
# Scripts (record_audio requires pydub installed)
import scripts

In [None]:
#rnn_path = "gs://marvin-voice/models/rnn_fft/rnn_fft_20201109_120328/model" best accuracy
#rnn_path = "gs://marvin-voice/models/rnn_fft/rnn_fft_20201110_184648/model"  newly created
rnn_path = "gs://marvin-voice/models/rnn_fft/rnn_fft_20201111_022641/model"
rnn_naive_path = "gs://marvin-voice/models/rnn_naive/rnn_naive_20201115_034239/model"
cnn_path = "gs://marvin-voice/models/cnn_fft/cnn_fft_20201111_063110/model"

_under(rnn_path, "/tmp/rnn")
_under(rnn_naive_path, "/tmp/rnn_naive")
_under(cnn_path, "/tmp/cnn")

rnn = tf.keras.models.load_model("/tmp/rnn/saved_model")
rnn_naive = tf.keras.models.load_model("/tmp/rnn_naive/saved_model")
cnn = tf.keras.models.load_model("/tmp/cnn/saved_model")

##### Test model on custom input and visualize

In [None]:
#@title Marvin!  { vertical-output: true, display-mode: "form" }
#&#128039;&#128039;&#128039;
#https://76bdzjdczr36-496ff2e9c6d22116-6006-colab.googleusercontent.com/#scalars
MODEL_TYPE = "RNN" #@param ["RNN", "CNN", "RNN (Naive)"]
if MODEL_TYPE == "RNN":
    model = rnn
elif MODEL_TYPE == "CNN":
    model = cnn
elif MODEL_TYPE == "RNN (Naive)":
    model = rnn_naive

duration = 2 #@param {type:"slider", min:1, max:10, step:1}
audio = scripts.record_audio.record(duration)
desired_sample_rate = 16_000
sample_rate = audio.frame_rate
raw_audio = audio.raw_data
stride_size = desired_sample_rate//5
threshold = 0.5

tensor = tf.io.decode_raw(audio.raw_data, tf.int32, fixed_length=sample_rate*duration*audio.sample_width)
tensor = tf.cast(tensor, tf.float32)
tensor /= 32768.0**2
tensor = tfio.audio.resample(tensor, sample_rate, desired_sample_rate)

# Generate sliding window dataset
audio_dataset = tf.keras.preprocessing.timeseries_dataset_from_array(
    data=tensor, 
    targets=None, 
    sequence_length=desired_sample_rate, 
    sequence_stride=stride_size, 
    batch_size=1)

# Model prediction
predictions = model.predict(audio_dataset)
is_marvins = predictions > threshold
clip_length = tensor.shape[0]
n_segments = predictions.shape[0]


# Plot figure
FIG_WIDTH = 24 # Don't change
Y_SCALE_FACTOR = 0.8
DOT_SIZE = 900

fig, axes = plt.subplots(n_segments + 1, 2, figsize=(FIG_WIDTH, (n_segments + 1)*Y_SCALE_FACTOR), gridspec_kw={'width_ratios':[1,20]})
axes_flat = axes.flat
axes[0, 0].axis('off')
top_axis = axes[0, 1]
top_axis.plot(tensor.numpy(), color='white')
top_axis.axis('off')
top_axis.set_xbound(0, clip_length)

def set_size(w,h, ax):
    if not ax: ax=plt.gca()
    l = ax.figure.subplotpars.left
    r = ax.figure.subplotpars.right
    t = ax.figure.subplotpars.top
    b = ax.figure.subplotpars.bottom
    figw = float(w)/(r-l)
    figh = float(h)/(t-b)
    ax.figure.set_size_inches(figw, figh)

for i, item in enumerate(zip(audio_dataset, predictions)):
    audio, prediction = item
    dot_ax, ax = axes_flat[2*(i+1):2*(i+2)]

    # Fetch data
    is_marvin = prediction > 0.5
    audio_data = audio.numpy().squeeze()
    x = np.arange(i*stride_size, i*stride_size + desired_sample_rate)

    # Create bubble
    colors = predictions
    area = DOT_SIZE #100 + is_marvins * 500
    dot_ax.scatter([0], [0], s=area, c=[prediction], alpha=0.9, cmap='RdYlGn', vmin=0, vmax=1, )
    dot_ax.set_xlim(-1, 1)
    dot_ax.axis('off')
    #set_size(1, 1, ax=dot_ax)

    # Plot segment
    #set_size(FIG_WIDTH, 1*n_segments+1, ax)
    ax.plot(x, audio_data, color='green' if is_marvin else 'grey')
    ax.set_xbound(0, clip_length)
    ax.axis('off')
    ax.get_shared_x_axes().join(ax, top_axis)
fig.tight_layout(rect=[0, 0, 1, 1])










# # Plot waveform with predictions
# def set_size(w,h, ax):
#     if not ax: ax=plt.gca()
#     l = ax.figure.subplotpars.left
#     r = ax.figure.subplotpars.right
#     t = ax.figure.subplotpars.top
#     b = ax.figure.subplotpars.bottom
#     figw = float(w)/(r-l)
#     figh = float(h)/(t-b)
#     ax.figure.set_size_inches(figw, figh)

# fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(24,3))

# # Create upper graph
# ax1.plot(tensor.numpy(), color='white')
# ax1.axis('off')

# # Create bubbles
# x_shift = stride_size//2
# x = np.linspace(x_shift, clip_length - stride_size//2, num=n_segments)
# y = np.ones(n_segments)
# colors = predictions
# area = 100 + is_marvins * 500
# num = 1
# ax2.scatter(x, y, s=area, c=colors, alpha=0.9, cmap='RdYlGn')
# ax2.axis('off')
# set_size(24, 1, ax=ax2)

# # Plot figure
# fig.tight_layout(rect=[0, 0, 1, 1])
# plt.show()
# plt.ioff()

##### Waveform visualizations

In [None]:
#@title Plot entire waveform
def set_size(w,h, ax):
    if not ax: ax=plt.gca()
    l = ax.figure.subplotpars.left
    r = ax.figure.subplotpars.right
    t = ax.figure.subplotpars.top
    b = ax.figure.subplotpars.bottom
    figw = float(w)/(r-l)
    figh = float(h)/(t-b)
    ax.figure.set_size_inches(figw, figh)

fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(24,3))

# Create upper graph
ax1.plot(tensor.numpy(), color='white')
ax1.axis('off')

# Create bubbles
x_shift = stride_size//2
x = np.linspace(x_shift, clip_length - stride_size//2, num=n_segments)
y = np.ones(n_segments)
colors = predictions
area = 100 + is_marvins * 500
num = 1
ax2.scatter(x, y, s=area, c=colors, alpha=0.9, cmap='RdYlGn')
ax2.axis('off')
set_size(24, 2, ax=ax2)

# Plot figure
fig.tight_layout(rect=[0, 0, 1, 1])
plt.show()
plt.ioff()

In [None]:
#@title Plot segments
# Plot figure
FIG_WIDTH = 24

fig, axes = plt.subplots(n_segments + 1, 2, figsize=(FIG_WIDTH, (n_segments + 1)*1), gridspec_kw={'width_ratios':[1,20]})
axes_flat = axes.flat
axes[0, 0].axis('off')
top_axis = axes[0, 1]
top_axis.plot(tensor.numpy(), color='white')
top_axis.axis('off')
top_axis.set_xbound(0, clip_length)

def set_size(w,h, ax):
    if not ax: ax=plt.gca()
    l = ax.figure.subplotpars.left
    r = ax.figure.subplotpars.right
    t = ax.figure.subplotpars.top
    b = ax.figure.subplotpars.bottom
    figw = float(w)/(r-l)
    figh = float(h)/(t-b)
    ax.figure.set_size_inches(figw, figh)

for i, item in enumerate(zip(audio_dataset, predictions)):
    audio, prediction = item
    dot_ax, ax = axes_flat[2*(i+1):2*(i+2)]

    # Fetch data
    is_marvin = prediction > 0.5
    audio_data = audio.numpy().squeeze()
    x = np.arange(i*stride_size, i*stride_size + desired_sample_rate)

    # Create bubble
    colors = predictions
    area = 1000 #100 + is_marvins * 500
    dot_ax.scatter([0], [0], s=area, c=[prediction], alpha=0.9, cmap='RdYlGn', vmin=0, vmax=1, )
    dot_ax.set_xlim(-1, 1)
    dot_ax.axis('off')
    #set_size(1, 1, ax=dot_ax)

    # Plot segment
    #set_size(FIG_WIDTH, 1*n_segments+1, ax)
    ax.plot(x, audio_data, color='green' if is_marvin else 'grey')
    ax.set_xbound(0, clip_length)
    ax.axis('off')
    ax.get_shared_x_axes().join(ax, top_axis)
fig.tight_layout(rect=[0, 0, 1, 1])

In [None]:
#@title  Plot detailed audio pairs
def set_size(w,h, ax):
    if not ax: ax=plt.gca()
    l = ax.figure.subplotpars.left
    r = ax.figure.subplotpars.right
    t = ax.figure.subplotpars.top
    b = ax.figure.subplotpars.bottom
    figw = float(w)/(r-l)
    figh = float(h)/(t-b)
    ax.figure.set_size_inches(figw, figh)

x_shift = stride_size//2
x = np.linspace(x_shift, clip_length - stride_size//2, num=n_segments)
y = np.ones(n_segments)
colors = predictions
area = 100 + is_marvins * 500

fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(24,3))

ax1.plot(tensor.numpy(), color='white')
ax1.axis('off')

ax2.scatter(x, y, s=area, c=colors, alpha=0.9, cmap='RdYlGn')
ax2.axis('off')
set_size(24, 1, ax=ax2)

fig.tight_layout(rect=[0, 0, 1, 1])
plt.show()
plt.ioff()

# Visualize outputs
for i, item in enumerate(zip(audio_dataset, predictions)):
    audio, prediction = item
    is_marvin = prediction > 0.5
    audio_data = audio.numpy().squeeze()

    displays = []
    #displays.append(f"{str(is_marvin).capitalize()}")
    displays.append(display.Audio(audio_data, rate=16_000))

    buf = io.BytesIO()
    fig = plt.figure(figsize=(8,1.2))
    plt.plot(audio_data, color='green' if is_marvin else 'grey')
    plt.axis('off')
    fig.tight_layout(rect=[0, 0, 1, 1])
    plt.savefig(buf, format='png', transparent=True)
    plt.close(fig)
    buf.seek(0)

    displays.append(display.Image(buf.read()))
    display.display(*displays)
    print('\n')
