# Music Genre Classification

Classify the musical genre of a given audio track - from data to deployed model. Adapted from the work of Huang, Serafini, and Pugh [1].

## Authors
- Sebastian Lehrig <sebastian.lehrig1@ibm.com>
- Marvin Giessing <MARVING@de.ibm.com>

## License
Apache-2.0 License

## References
[1] original paper: http://cs229.stanford.edu/proj2018/report/21.pdf

[2] code: https://github.com/derekahuang/Music-Classification

[3] Preprocessed Data: https://drive.google.com/file/d/12mCgkvbmissLh2Vop0bp_t98G8QCaV1E/view?usp=sharing

## 0.) Imports & Constants

In [None]:
import IPython.display
import json
import kfp
from kfp.components import InputPath, OutputPath
import kfp.dsl as dsl
from kfp.dsl import PipelineConf, data_passing_methods
from kubernetes.client.models import V1Volume, V1PersistentVolumeClaimVolumeSource
import librosa as lb
from librosa import display
import math
import numpy as np
import os
from pydoc import importfile
import pylab
import requests
from typing import List, NamedTuple

%load_ext lab_black

In [None]:
# Environment-specific configurations
# - Activate train-bursting by setting a CLUSTER_CONFIGURATION_SECRET containing the remote cluster configuration
# - Activate distributed training by setting NUMBER_OF_WORKERS > 1; TRAINING_GPUS hold per worker
#
# %env CLUSTER_CONFIGURATION_SECRET remote-power-cluster
# %env CLUSTER_CONFIGURATION_SECRET remote-x86-cluster
# %env CLUSTER_CONFIGURATION_SECRET remote-x86-telekom-cluster
# %env TRAINING_GPUS 0
# %env NUMBER_OF_WORKERS 1
# %env TRAINING_NODE_SELECTOR nvidia.com/gpu.product: "Tesla-V100-SXM2-32GB"
# %env TRAINING_NODE_SELECTOR kubernetes.io/hostname: node2
# %env TRAINING_NODE_SELECTOR worker_type: baremetal_worker
#
# Reset:
# del os.environ['CLUSTER_CONFIGURATION_SECRET']
# del os.environ['TRAINING_GPUS']
# del os.environ['TRAINING_NODE_SELECTOR']

In [None]:
BASE_IMAGE = "quay.io/ibm/kubeflow-notebook-image-ppc64le:latest"

COMPONENT_CATALOG_FOLDER = f"{os.getenv('HOME')}/components"
COMPONENT_CATALOG_GIT = "https://github.com/lehrig/kubeflow-ppc64le-components.git"
COMPONENT_CATALOG_RELEASE = "main"

NUMBER_OF_WORKER = os.getenv("NUMBER_OF_WORKERS", default="1")

ARGUMENTS = {
    "blackboard": "artefacts",
    "dataset_url": "Lehrig/GTZAN-Collection",
    "dataset_configuration": "mel_spectrograms",
    "dataset_label_columns": ["genre"],
    "model_name": "music-classification",
    "cluster_configuration_secret": os.getenv(
        "CLUSTER_CONFIGURATION_SECRET", default=""
    ),
    "training_gpus": os.getenv("TRAINING_GPUS", default="0"),
    "number_of_workers": NUMBER_OF_WORKER,
    "distribution_type": "Job" if int(NUMBER_OF_WORKER) <= 1 else "MPI",
    "training_node_selector": os.getenv("TRAINING_NODE_SELECTOR", default=""),
}
MODEL_NAME = ARGUMENTS["model_name"]

with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace") as f:
    NAMESPACE = f.read()

ARGUMENTS

## 1.) Load catalog with reusable Kubeflow components

In [None]:
!git clone --branch $COMPONENT_CATALOG_RELEASE $COMPONENT_CATALOG_GIT $COMPONENT_CATALOG_FOLDER

In [None]:
CATALOG = importfile(f"{COMPONENT_CATALOG_FOLDER}/catalog.py")

## 2.) Create custom components

### 2.1) Component: Preprocess data (one hot encoding etc.)

In [None]:
def preprocess_dataset(
    dataset_dir: InputPath(str),
    train_dataset_dir: OutputPath(str),
    validation_dataset_dir: OutputPath(str),
    test_dataset_dir: OutputPath(str),
    batch_size: int = 200,
):
    """Split data into train/dev/test data. Saves result into `prep_dataset_dir`."""

    from datasets import load_from_disk
    import numpy as np
    import os
    from transformers import DefaultDataCollator

    print(f"Loading input dataset from {dataset_dir}...")
    dataset = load_from_disk(dataset_dir)

    # Preprocess
    num_classes = dataset["train"].features["genre"].num_classes
    one_hot_matrix = np.eye(num_classes)

    def process(examples):
        examples["genre"] = [one_hot_matrix[genre] for genre in examples["genre"]]
        return examples

    prep_dataset = dataset.map(
        process, batched=True, batch_size=batch_size, num_proc=2, keep_in_memory=True
    )

    def save_as_tfdataset(
        dataset, columns, label_columns, data_collator, directory, shuffle
    ):
        import tensorflow as tf

        tf_dataset = dataset.to_tf_dataset(
            columns=columns,
            label_cols=label_columns,
            shuffle=shuffle,
            batch_size=batch_size,
            collate_fn=data_collator,
        )

        print(f"Saving pre-processed dataset to '{directory}'...")
        if not os.path.exists(directory):
            os.makedirs(directory)
        tf.data.Dataset.save(tf_dataset, directory)

        print(f"Pre-processed dataset saved. Contents of '{directory}':")
        print(os.listdir(directory))

    # prep_dataset = prep_dataset.with_format("numpy")
    data_collator = DefaultDataCollator(return_tensors="tf")
    columns = ["mel_spectrogram"]
    label_columns = ["genre"]
    save_as_tfdataset(
        prep_dataset["train"],
        columns,
        label_columns,
        data_collator,
        train_dataset_dir,
        True,
    )
    save_as_tfdataset(
        prep_dataset["validation"],
        columns,
        label_columns,
        data_collator,
        validation_dataset_dir,
        False,
    )
    save_as_tfdataset(
        prep_dataset["test"],
        columns,
        label_columns,
        data_collator,
        test_dataset_dir,
        False,
    )

    print("Finished.")


preprocess_dataset_comp = kfp.components.create_component_from_func(
    func=preprocess_dataset, base_image=BASE_IMAGE
)

### 2.2) Specification: Train the model (used as parameter to train component)

In [None]:
def train_model(
    train_dataset_dir: InputPath(str),
    validation_dataset_dir: InputPath(str),
    model_dir: OutputPath(str),
    epochs: int = 100,
    batch_size: int = 200,
):
    """Trains CNN model. Once trained, the model is persisted to `model_dir`."""

    import os
    import tensorflow as tf
    from tensorflow.keras.callbacks import (
        EarlyStopping,
        ModelCheckpoint,
        ReduceLROnPlateau,
        TensorBoard,
    )
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import (
        BatchNormalization,
        Conv2D,
        Dense,
        Dropout,
        Flatten,
        MaxPooling2D,
    )
    from tensorflow.keras import regularizers
    import time

    def load_datasets():
        train_dataset = tf.data.Dataset.load(train_dataset_dir)
        validation_dataset = tf.data.Dataset.load(validation_dataset_dir)
        return (train_dataset, validation_dataset)

    def build_model():
        model = Sequential()

        # Feature Learning Layers
        model.add(
            Conv2D(
                64,
                kernel_size=(4, 4),
                activation="relu",
                kernel_regularizer=regularizers.l2(0.04),
                input_shape=(64, 173, 1),
            )
        )
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 4)))

        model.add(
            Conv2D(
                64, (3, 5), activation="relu", kernel_regularizer=regularizers.l2(0.04)
            )
        )
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.2))

        model.add(
            Conv2D(
                64, (2, 2), activation="relu", kernel_regularizer=regularizers.l2(0.04)
            )
        )
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.2))

        # Classification Layers
        model.add(Flatten())
        model.add(
            Dense(64, activation="relu", kernel_regularizer=regularizers.l2(0.04))
        )
        model.add(Dropout(0.5))
        model.add(
            Dense(32, activation="relu", kernel_regularizer=regularizers.l2(0.04))
        )
        model.add(Dense(10, activation="softmax"))

        return model

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    print("Loading datasets...")
    train_dataset, validation_dataset = load_datasets()

    print("Building model...")
    model = build_model()
    print(model.summary())

    print("Compiling model...")
    model.compile(
        loss="categorical_crossentropy",
        optimizer="adam",
        metrics=["categorical_accuracy"],
    )

    print("Initializing training callbacks...")
    callbacks = [
        EarlyStopping(monitor="val_loss", patience=20, verbose=0, mode="min"),
        ModelCheckpoint(
            f"{model_dir}/best_model.keras",
            monitor="val_loss",
            save_best_only=True,
            save_weights_only=True,
            mode="min",
        ),
        ReduceLROnPlateau(
            monitor="val_loss",
            factor=0.1,
            patience=7,
            verbose=1,
            min_delta=0.0001,
            mode="min",
        ),
        TensorBoard(
            log_dir=os.environ["TENSORBOARD_S3_ADDRESS"],
            histogram_freq=1,
        ),
    ]

    print("Starting model training...")
    start = time.time()
    hist = model.fit(
        train_dataset,
        validation_data=validation_dataset,
        epochs=epochs,
        callbacks=callbacks,
    )
    print("\n\nTraining took ", time.time() - start, "seconds")

    print("Model train history:")
    print(hist.history)

    print(f"Saving model to: {model_dir}")
    model.save(model_dir)
    print(f"Model saved to: {model_dir}")

    print("Finished.")

### Distributed Training: Un-collapse below cell!

In [None]:
def train_distributed_model(
    train_dataset_dir: InputPath(str),
    validation_dataset_dir: InputPath(str),
    model_dir: OutputPath(str),
    epochs: int = 100,
    batch_size: int = 200,
):
    """Trains CNN model. Once trained, the model is persisted to `model_dir`."""

    import horovod.tensorflow.keras as hvd
    import os
    import tensorflow as tf
    from tensorflow.keras.callbacks import (
        EarlyStopping,
        ModelCheckpoint,
        ReduceLROnPlateau,
        TensorBoard,
    )
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import (
        BatchNormalization,
        Conv2D,
        Dense,
        Dropout,
        Flatten,
        MaxPooling2D,
    )
    from tensorflow.keras import regularizers
    import tensorflow_datasets as tfds
    import time

    def load_datasets():
        train_dataset = tf.data.Dataset.load(train_dataset_dir)
        validation_dataset = tf.data.Dataset.load(validation_dataset_dir)
        return (train_dataset, validation_dataset)

    def build_model():
        model = Sequential()

        # Feature Learning Layers
        model.add(
            Conv2D(
                64,
                kernel_size=(4, 4),
                activation="relu",
                kernel_regularizer=regularizers.l2(0.04),
                input_shape=(64, 173, 1),
            )
        )
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 4)))

        model.add(
            Conv2D(
                64, (3, 5), activation="relu", kernel_regularizer=regularizers.l2(0.04)
            )
        )
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.2))

        model.add(
            Conv2D(
                64, (2, 2), activation="relu", kernel_regularizer=regularizers.l2(0.04)
            )
        )
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.2))

        # Classification Layers
        model.add(Flatten())
        model.add(
            Dense(64, activation="relu", kernel_regularizer=regularizers.l2(0.04))
        )
        model.add(Dropout(0.5))
        model.add(
            Dense(32, activation="relu", kernel_regularizer=regularizers.l2(0.04))
        )
        model.add(Dense(10, activation="softmax"))

        return model

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    print("Initializing Horovod/MPI for distributed training...")
    hvd.init()

    # Pin GPU to be used to process local rank (one GPU per process)
    gpus = tf.config.experimental.list_physical_devices("GPU")
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    if gpus:
        tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], "GPU")

    # Prepare distributed training with GPU support
    os.environ["NCCL_DEBUG"] = "INFO"
    tfds.disable_progress_bar()

    if hvd.rank() == 0:
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)

    # see https://horovod.readthedocs.io/en/stable/api.html
    print("==============================================")
    print(f"hvd.rank(): {str(hvd.rank())}")
    print(f"hvd.local_rank(): {str(hvd.local_rank())}")
    print(f"hvd.size(): {str(hvd.size())}")
    print(f"hvd.local_size(): {str(hvd.local_size())}")
    print("gpus:")
    print(gpus)
    print("==============================================")

    print("Loading datasets...")
    train_dataset, validation_dataset = load_datasets()

    print("Making traininig dataset ready for distributed training...")
    # Best shuffling needs a buffer with size equal to the size of the
    # dataset. Approximate values should be fine here.
    dataset_elements = 1000  # hard to determine dynamically in TFDataset
    approx_shard_train_size = dataset_elements // hvd.size() + 1

    # References:
    # - shard: https://github.com/horovod/horovod/issues/2623#issuecomment-768435610
    # - cache & prefetch: https://stackoverflow.com/questions/59228816/what-do-the-tensorflow-datasets-functions-cache-and-prefetch-do
    # - shuffle: https://stackoverflow.com/questions/64372390/what-does-buffer-size-do-in-tensorflow-dataset-shuffling
    distributed_train_dataset = (
        train_dataset.unbatch()  # Batch after sharding
        .shard(num_shards=hvd.size(), index=hvd.rank())  # 1 shard per worker
        .cache()  # Reuse data on next epoch
        .shuffle(
            buffer_size=approx_shard_train_size, seed=42, reshuffle_each_iteration=False
        )  # Randomize shards
        .batch(batch_size)
        .repeat()  # Avoid last batch being of unequal size
        .prefetch(tf.data.AUTOTUNE)  # Overlap preprocessing and training
    )

    print("Building model...")
    model = build_model()
    print(model.summary())

    opt = tf.keras.optimizers.Adam(learning_rate=0.001 * hvd.size())
    # Horovod: add Horovod DistributedOptimizer.
    opt = hvd.DistributedOptimizer(opt)

    print("Compiling model...")
    model.compile(
        loss="categorical_crossentropy",
        optimizer=opt,
        metrics=["categorical_accuracy"],
        experimental_run_tf_function=False,
    )

    print("Initializing training callbacks...")
    callbacks = [
        EarlyStopping(monitor="val_loss", patience=20, verbose=0, mode="min"),
        ReduceLROnPlateau(
            monitor="val_loss",
            factor=0.1,
            patience=7,
            verbose=1,
            min_delta=0.0001,
            mode="min",
        ),
        TensorBoard(
            log_dir=os.environ["TENSORBOARD_S3_ADDRESS"],
            histogram_freq=1,
        ),
        # Horovod: broadcast initial variable states from rank 0 to all other processes.
        # This is necessary to ensure consistent initialization of all workers when
        # training is started with random weights or restored from a checkpoint.
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),
    ]
    # Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them.
    if hvd.rank() == 0:
        callbacks.append(
            ModelCheckpoint(
                f"{model_dir}/best_model.keras",
                monitor="val_loss",
                save_best_only=True,
                save_weights_only=True,
                mode="min",
            )
        )

    print("Starting model training...")
    start = time.time()
    hist = model.fit(
        distributed_train_dataset,
        validation_data=validation_dataset,
        epochs=epochs,
        steps_per_epoch=approx_shard_train_size // batch_size
        + 1,  # Needed when using repeat()
        callbacks=callbacks,
        verbose=1 if hvd.rank() == 0 else 0,
    )

    if hvd.rank() == 0:
        print("\n\nTraining took ", time.time() - start, "seconds")

        print("Model train history:")
        print(hist.history)

        print(f"Saving model to: {model_dir}")
        model.save(model_dir)
        print(f"Model saved to: {model_dir}")

        print("Finished.")

In [None]:
train_specification = kfp.components.func_to_component_text(
    func=(
        train_model
        if int(ARGUMENTS["number_of_workers"]) <= 1
        else train_distributed_model
    )
)

### 2.3) Component: Evaluate model with validation data

In [None]:
def evaluate_model(
    test_dataset_dir: InputPath(str), model_dir: InputPath(str), batch_size: int = 20
) -> NamedTuple("EvaluationOutput", [("mlpipeline_metrics", "Metrics")]):
    """Loads a saved model from file and uses a pre-downloaded dataset for evaluation.
    Model metrics are persisted to `{metrics_path}` for Kubeflow Pipelines metadata."""

    from collections import namedtuple
    import json
    import tensorflow as tf

    test_dataset = tf.data.experimental.load(test_dataset_dir)
    model = tf.keras.models.load_model(model_dir)
    (loss, accuracy) = model.evaluate(test_dataset)

    print((loss, accuracy))

    metrics = {
        "metrics": [
            {"name": "loss", "numberValue": str(loss), "format": "PERCENTAGE"},
            {"name": "accuracy", "numberValue": str(accuracy), "format": "PERCENTAGE"},
        ]
    }

    out_tuple = namedtuple("EvaluationOutput", ["mlpipeline_metrics"])

    return out_tuple(json.dumps(metrics))


evaluate_model_comp = kfp.components.create_component_from_func(
    func=evaluate_model, base_image=BASE_IMAGE
)

## 3.) Create the actual pipeline by combining the components

In [None]:
@dsl.pipeline(
    name="Music Genre Classification",
    description="An example pipeline that performs a music genre classification on audio data",
)
def music_genre_classification_pipeline(
    blackboard: str,
    dataset_url: str,
    dataset_configuration: str,
    dataset_label_columns: List[str],
    model_name: str,
    cluster_configuration_secret: str,
    training_gpus: int,
    number_of_workers: int,
    distribution_type: str,
    training_node_selector: str,
):
    create_blackboard = dsl.VolumeOp(
        name="Create Artefacts Blackboard",
        resource_name=blackboard,
        modes=dsl.VOLUME_MODE_RWO,
        size="4Gi",
        set_owner_reference=True,
    )

    load_dataset_task = CATALOG.load_dataset_comp(
        path=dataset_url,
        configuration=dataset_configuration,
        label_columns=dataset_label_columns,
    )
    load_dataset_task.after(create_blackboard)

    CATALOG.create_dataset_quality_report(
        dataset_dir=load_dataset_task.outputs["dataset_dir"],
        dataset_type="huggingface",
        additional_args={"split": "test"},
    )

    preprocess_dataset_task = preprocess_dataset_comp(
        dataset_dir=load_dataset_task.outputs["dataset_dir"],
    )

    monitor_training_task = CATALOG.monitor_training_comp()

    # InputPath and OutputPath like "prep_dataset_dir" & "model_dir":
    # Use name of parameters of train component on right-hand side.
    train_parameters = {
        "train_dataset_dir": "train_dataset_dir",
        "validation_dataset_dir": "validation_dataset_dir",
        "model_dir": "model_dir",
    }

    distribution_specification = {
        "distribution_type": distribution_type,
        "number_of_workers": number_of_workers,
    }

    train_model_task = CATALOG.train_model_comp(
        preprocess_dataset_task.outputs["train_dataset_dir"],
        preprocess_dataset_task.outputs["validation_dataset_dir"],
        train_specification,
        train_parameters,
        model_name=model_name,
        gpus=training_gpus,
        node_selector=training_node_selector,
        tensorboard_s3_address=monitor_training_task.outputs["tensorboard_s3_address"],
        cluster_configuration_secret=cluster_configuration_secret,
        distribution_specification=distribution_specification,
    )

    evaluate_model_comp(
        preprocess_dataset_task.outputs["test_dataset_dir"],
        train_model_task.outputs["model_dir"],
    )

    CATALOG.plot_confusion_matrix_comp(
        input_columns=["mel_spectrogram"],
        label_columns=load_dataset_task.outputs["labels"],
        test_dataset_dir=preprocess_dataset_task.outputs["test_dataset_dir"],
        model_dir=train_model_task.outputs["model_dir"],
    )

    convert_model_to_onnx_task = CATALOG.convert_model_to_onnx_comp(
        train_model_task.outputs["model_dir"]
    )

    upload_model_task = CATALOG.upload_model_comp(
        convert_model_to_onnx_task.outputs["onnx_model_dir"], project_name=model_name
    )

    deploy_model_with_kserve_task = CATALOG.deploy_model_with_kserve_comp(
        project_name=model_name,
        model_version=upload_model_task.outputs["model_version"],
    )

    deploy_model_with_kserve_task.after(upload_model_task)

## 4.) Run the pipeline within an experiment
Create a pipeline run, using a pipeline configuration that:
- enables data passing via persistent volumes (faster than the default MinIO-based passing)
- disables caching (which currently is not supported for data passing via volumes)

In [None]:
# See: https://www.kubeflow.org/docs/components/pipelines/overview/caching/#managing-caching-staleness
def disable_cache_transformer(op):
    if isinstance(op, dsl.ContainerOp):
        op.execution_options.caching_strategy.max_cache_staleness = "P0D"
    else:
        op.add_pod_annotation(
            name="pipelines.kubeflow.org/max_cache_staleness", value="P0D"
        )
    return op


pipeline_conf = PipelineConf()
pipeline_conf.add_op_transformer(disable_cache_transformer)
pipeline_conf.data_passing_method = data_passing_methods.KubernetesVolume(
    volume=V1Volume(
        name=ARGUMENTS["blackboard"],
        persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
            "{{workflow.name}}-%s" % ARGUMENTS["blackboard"]
        ),
    ),
    path_prefix=f'{ARGUMENTS["blackboard"]}/',
)

kfp.Client().create_run_from_pipeline_func(
    music_genre_classification_pipeline,
    arguments=ARGUMENTS,
    namespace=NAMESPACE,
    pipeline_conf=pipeline_conf,
)

## 5.) Test model deployment
See API documentation: https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md

### 5.1) Check model endpoint availability

In [None]:
HOST = MODEL_NAME + "-predictor-default." + NAMESPACE
HEADERS = {"Host": HOST}
MODEL_ENDPOINT = "http://" + MODEL_NAME + "-predictor-default/v2/models/" + MODEL_NAME

res = requests.get(MODEL_ENDPOINT, headers=HEADERS)
response = json.loads(res.text)
response

Note you can also do this:
```curl -H "Host: $HOST" $MODEL_ENDPOINT```

### 5.2) Get test audio
See: https://commons.wikimedia.org/wiki/Category:Audio_files_of_blues

In [None]:
AUDIO = "audio.ogg"

AUDIO_URL = "https://upload.wikimedia.org/wikipedia/commons/7/7c/Boogie_lead_riff.ogg"
# AUDIO_URL = "https://upload.wikimedia.org/wikipedia/commons/9/99/Blues_Rock.ogg"

!wget $AUDIO_URL -O $AUDIO

In [None]:
IPython.display.Audio(AUDIO)

### 5.3) Convert test audio to 2-seconds Mel Spectrograms

In [None]:
SR = 22050
N_FFT = 512
HOP_LENGTH = N_FFT // 2
N_MELS = 64


def convert_to_melspecs(filename):
    audios = get_batches(filename)
    return batch_log_melspectrogram(audios)


def get_batches(audio):
    y, sr = lb.load(audio, mono=True)

    duration = lb.core.get_duration(y)

    audios = []
    # prune first 2 seconds and ending (assumption: does not include important data)
    for i in range(2, math.floor(duration), 2):
        y_sample, sr_sample = lb.load(audio, mono=True, offset=i, duration=2.0)
        audios.append(y_sample)

    return audios


def log_melspectrogram(data):
    melspec = lb.feature.melspectrogram(
        y=data, hop_length=HOP_LENGTH, n_fft=N_FFT, n_mels=N_MELS
    )
    return lb.power_to_db(melspec**2)


def batch_log_melspectrogram(data_list):
    melspecs = np.asarray(
        [log_melspectrogram(data_list[i]) for i in range(len(data_list) - 1)]
    )
    melspecs = melspecs.reshape(
        melspecs.shape[0], melspecs.shape[1], melspecs.shape[2], 1
    )
    return melspecs


melspecs = convert_to_melspecs(AUDIO)

### 5.4) Visualize a Mel Spectrogram

In [None]:
middle = (int)(melspecs.shape[0] / 2)
example_melspec = melspecs[middle]
example_melspec = example_melspec.reshape(
    example_melspec.shape[0], example_melspec.shape[1]
)

pylab.axis("off")  # no axis
pylab.axes(
    [0.0, 0.0, 1.0, 1.0], frameon=False, xticks=[], yticks=[]
)  # Remove the white edge
display.specshow(example_melspec, y_axis="mel", x_axis="time")

### 5.5) Score example Mel Spectrogram against model 

In [None]:
def score(melspec):
    PREDICT_ENDPOINT = MODEL_ENDPOINT + "/infer"

    payload = {
      "inputs": [{
          "name": "conv2d_input",
          "shape": [1, 64, 173, 1],
          "datatype": "FP32",
          "data": melspec.tolist()
        }
      ]
    }

    res = requests.post(PREDICT_ENDPOINT, headers=HEADERS, data=json.dumps(payload))
    response = json.loads(res.text)
    return response['outputs'][0]['data']


test_score = score(example_melspec)
test_score

In [None]:
GENRE_LABELS = [
    "Blues",
    "Classical",
    "Country",
    "Disco",
    "Hip hop",
    "Jazz",
    "Metal",
    "Pop",
    "Reggae",
    "Rock"
]

GENRE_LABELS[np.argmax(test_score)]

### 5.6) Score each Mel Spectrogram against deployed model & aggregate results

In [None]:
probabilities = [0.0 for genre in GENRE_LABELS]
counts = [0 for genre in GENRE_LABELS]

for melspec in melspecs:
    predictions = score(melspec)
    for i in range(len(GENRE_LABELS)):
        probabilities[i] += predictions[i]
    counts[np.argmax(predictions)] += 1

### 5.7) Aggregate results

In [None]:
for i in range(len(GENRE_LABELS)):
    probabilities[i] = probabilities[i]/len(melspecs)
    print(GENRE_LABELS[i] + ": " + str(probabilities[i]) + " (" + str(counts[i]) + "x)")