In [None]:
# IMPORTS

%load_ext autoreload
%autoreload 2

import train
import data_processor as dp
import commons
import models

import torch
import numpy as np
import pandas as pd
import sklearn
import importlib
import pylab
import matplotlib.pyplot as plt
import os
import pickle
import shutil
import itertools

In [None]:
# DATA LOADER

# Block Vars
_quiet = False

# Pick Model
all_models = [
    ('conv_ae_shared_test', None, True),  #0
    ('classifier_test', 'L15', True),  #1
    ('conv_ae_shared', None, True),  #2
    ('conv_ae_skip', None, True),  #3
    ('conv_ae_not_shared', None, True),  #4
    ('alexnet', 'L6', True),  #5
    ('vgg11', 'L8', True),  #6
    ('vgg13', 'L10', True),  #7
    ('vgg16', 'L13', True),  #8
    ('vgg16', 'L14', True),  #9
    ('vgg16', 'L15', True),  #10
]

curr_model = 0
fresh_analysis = False # Warning: this deletes all saved data for the current model including encodings, clusters, etc
compute_stats = False

model_name, model_layer, do_kmeans = all_models[curr_model]
print('Model: {}\tLayer: {}\tDo K-Means: {}'.format(*all_models[curr_model]))
classifier_block, classifier_layer_index = None, None
try:
    classifier_block, classifier_layer_index = models.encoding_layer_options[model_name][model_layer]
except:
    print('No layer info for current model')

# Load config
train_config_path = models.trained_model_configs[model_name]
train_config = train.TrainingConfig.load_from_file(train_config_path)

# Load model
cuda = torch.cuda.is_available()
# cuda = False
model = train_config.get_by_model_key(cuda)
checkpoint = models.ModelCheckpoint(model)
model.load_state(train_config.get_model_path('state_best'))
checkpoint.load(train_config.get_model_path('checkpoint_best'))
if not _quiet:
    print('Model [{}] loaded with weights. Cuda:{}.\nConfig:\n{}\nCheckpoint:\n{}\n'
          .format(train_config.name, cuda, train_config.get_dict(), checkpoint.get_dict()))
    
# Analysis Dir
analysis_dir = os.path.join(train_config.models_dir, 'analysis')
if model_layer is not None:
    analysis_dir = os.path.join(analysis_dir, model_layer)
if fresh_analysis:
    shutil.rmtree(analysis_dir, ignore_errors=True)
    if not _quiet:
        print('Analysis directory has been deleted if it existed')
os.makedirs(analysis_dir, exist_ok=True)
if not _quiet:
    print("Analysis dir: {}".format(analysis_dir))

# Load Dataset
dataset_config = dp.DataPrepConfig.load_from_dataset(train_config.dataset_path)
train_parts, cv_part, test_part = dp.load_created_partitions(train_config.dataset_path)
if test_part.get_num_segments() == 0:
    raise Exception('No data in test set')
if not _quiet:
    print('Dataset [{}] loaded. Config:\n{}\n'.format(dataset_config.name, dataset_config.get_dict()))

test_set = dp.PartitionBatchGenerator(test_part, train_config.batch_size, mode='test')
test_set_len = len(test_set)
if not _quiet:
    print('Test Set Loaded. Batch Size:{} Num Batches:{}'.format(test_set.batch_size, test_set_len))

# Load Tracks
tracks = commons.get_fma_meta(dataset_config.fma_meta_dir, dataset_config.fma_type)
if not _quiet:
    print('FMA metadata loaded. Shape {}'.format(tracks.shape))

In [None]:
# EVALUATION
def eval_test():
    loss_test = 0
    model.begin_evaluation()
    for x_test, y_test in test_set:
        loss_batch_test = model.evaluate(x_test, y_test)
        loss_test += loss_batch_test
    avg_loss_test = loss_test / test_set_len
    print('Average test loss per batch:', avg_loss_test)
    model.post_evaluation()
if False:
    eval_test()

In [None]:
# GENERATE ENCODINGS
def get_test_enc(train_config, test_set, classifier_block=None, classifier_layer_index=None, quiet=False):
    test_enc = None
    enc_segs = None
    for x_test, y_test in test_set:
        with torch.no_grad():
            if train_config.model == 'cnn_classifier':
                enc = model.encode(x_test, classifier_block, classifier_layer_index)
            elif train_config.model == 'conv_autoencoder':
                enc = model.encode(x_test)
            test_enc = enc.cpu().numpy() if test_enc is None else np.concatenate([test_enc, enc.cpu().numpy()])
            enc_segs = y_test if enc_segs is None else np.concatenate([enc_segs, y_test])
    if not quiet: print('Test set encoding shape: {}'.format(test_enc.shape))
    test_enc = test_enc.reshape(test_enc.shape[0], -1)
    if not quiet: print('Test set encoding reshaped: {}'.format(test_enc.shape))
    return test_enc, enc_segs
    
def get_or_load_test_enc():
    _load_cached = True
    _cache_dir = os.path.join(analysis_dir, 'cached')
    os.makedirs(_cache_dir, exist_ok=True)
    _enc_file = os.path.join(_cache_dir, 'test_enc.npy')
    _segs_file = os.path.join(_cache_dir, 'enc_segs.npy')
    if _load_cached and os.path.isfile(_enc_file) and os.path.isfile(_segs_file):
        print('Loading saved encodings')
        test_enc, enc_segs = np.load(_enc_file), np.load(_segs_file)
    else:
        print('Generating encodings')
        test_enc, enc_segs = get_test_enc(train_config, test_set, classifier_block, classifier_layer_index)
        np.save(_enc_file, test_enc), np.save(_segs_file, enc_segs)
    print(test_enc.shape, enc_segs.shape)
    return test_enc, enc_segs

if True:
    test_enc, enc_segs = get_or_load_test_enc()

In [None]:
# SCALE ENCODINGS
def get_enc_scaled(enc, mode='across', method='standard', std_scale=0.4, save=False, load=False, save_dir=None, prefix=None):
    assert mode in ['features', 'across']
    assert method in ['minmax', 'standard']
    print('Scaling encoding with mode: {} and method: {}'.format(mode, method))
    
    enc_shape = enc.shape
    if mode == 'across':
        enc = enc.reshape(-1, 1)
    
    if save_dir is not None:
        scaler_path = os.path.join(save_dir, '{}.{}.{}.scaler'.format(prefix, mode, method))
    if load and os.path.isfile(scaler_path):
        with open(scaler_path, 'rb') as modfile:
            print('Loading saved scaler {}'.format(scaler_path))
            scaler = pickle.load(modfile)
    else:
        if method == 'standard':
            scaler = sklearn.preprocessing.StandardScaler()
        else:
            scaler = sklearn.preprocessing.MinMaxScaler()
        scaler.fit(enc)
        if save:
            os.makedirs(os.path.dirname(scaler_path), exist_ok=True)
            with open(scaler_path, 'wb') as modfile:
                pickle.dump(scaler, modfile)
                print('Scaler saved to: {}'.format(scaler_path))
    enc = scaler.transform(enc)
    if method == 'standard':
        enc = (enc * std_scale) + 0.5  # Scale between 0 and 1
        enc = np.clip(enc, 0, 1)
    
    return enc.reshape(enc_shape)

In [None]:
# ENCODING STATS

# Overall stats
def print_enc_stats(enc, max_segs=1000000, max_encs=10000, dpi=80, save_plots=False, save_dir=None, save_file_prefix=None):
    print('Num segments:', enc.shape[0])
    print('Distribution across entire encoding')
    print(pd.Series(enc.reshape(-1)).describe())
    
    pylab.rcParams['figure.figsize'] = (14,8)
    
    enc_idx = np.arange(enc.shape[1])
    if enc_idx.size > max_encs:
        print('Keeping only {} components'.format(max_encs))
        np.random.shuffle(enc_idx)
        enc_idx = enc_idx[:max_encs]
        enc_idx.sort()
        enc = enc[:, enc_idx]
    if enc.shape[0] > max_segs:
        print('Keeping only {} segments'.format(max_segs))
        idx = np.arange(enc.shape[0])
        np.random.shuffle(idx)
        idx = idx[:max_segs]
        enc = enc[:max_segs, :]

    print('Plotting stats for {} components'.format(enc_idx.size))

    x_label = 'Encoding Component'
    
    plt.subplot(2, 2, 1)
    plt.xlabel(x_label)
    plt.ylabel('Mean')
    plt.bar(enc_idx, enc.mean(axis=0))

    plt.subplot(2, 2, 2)
    plt.xlabel(x_label)
    plt.ylabel('Min')
    plt.bar(enc_idx, enc.min(axis=0))

    plt.subplot(2, 2, 4)
    plt.xlabel(x_label)
    plt.ylabel('Max')
    plt.bar(enc_idx, enc.max(axis=0))

    plt.subplot(2, 2, 3)
    plt.xlabel(x_label)
    plt.ylabel('Standard Deviation')
    plt.bar(enc_idx, enc.std(axis=0))

    if save_plots:
        path = os.path.join(save_dir, "{}.desc.jpg".format(save_file_prefix))
        os.makedirs(os.path.dirname(path), exist_ok=True)
        plt.savefig(path, dpi=dpi)
        print('Plots saved to: {}'.format(path))
    plt.show()
    
#     print('Plotting percentiles {} components'.format(enc_idx.size))
#     pylab.rcParams['figure.figsize'] = (14,12)
#     percentiles = [10, 30, 50 ,70, 90, 100]
#     for i, p in enumerate(percentiles):
#         plt.subplot(3, 2, i+1)
#         plt.xlabel(x_label)
#         plt.ylabel('{} Percentile'.format(p))
#         plt.bar(enc_idx, np.percentile(enc, p, axis=0))
#     if save_plots:
#         path = os.path.join(save_dir, "{}.percetiles.jpg".format(save_file_prefix))
#         os.makedirs(os.path.dirname(path), exist_ok=True)
#         plt.savefig(path, dpi=dpi)
#         print('Plots saved to: {}'.format(path))
#     plt.show()

In [None]:
# RAW ENCODING ANALYSIS
def raw_enc_analysis(test_enc, save_plots=True, save_dir=None, prefix='', scaler_dir=None, compute_stats=True):
    if scaler_dir is None:
        scaler_dir = save_dir
    for _scale, _method in ([(None, None)] + list(itertools.product(['features', 'across'], ['minmax', 'standard']))):
        print('\n\nRaw Encoding Analysis. Scale: {}. Method: {}\n\n'.format(_scale, _method))
        if _scale is None:
            _file_prefix = '{}unscaled.stats'.format(prefix)
            enc_scaled = test_enc
        else:
            enc_scaled = get_enc_scaled(test_enc, _scale, _method, save=True, load=True, save_dir=scaler_dir, prefix='stats')
            _file_prefix = '{}scaled_{}.method_{}.stats'.format(prefix, _scale, _method)
        if compute_stats:
            print_enc_stats(enc_scaled, save_plots=save_plots, save_dir=save_dir, save_file_prefix=_file_prefix)
if True:
    raw_enc_analysis(test_enc, save_dir=os.path.join(analysis_dir, 'raw'), compute_stats=compute_stats)

In [None]:
# ENCODING SCATTER PLOTS

def show_enc_scatter(enc, num_plots=10):
    pylab.rcParams['figure.figsize'] = (20, 20)
    dims_x = np.random.randint(0, enc.shape[1], num_plots)
    dims_y = np.random.randint(0, enc.shape[1], num_plots)
    for i in range(num_plots):
        dim1, dim2 = dims_x[i], dims_y[i]
        x = np.transpose(enc[:, dim1])
        y = np.transpose(enc[:, dim2])
        plt.subplot(int(num_plots/3)+1, 3, i+1)
        plt.xlabel('Dim {0}'.format(dim1))
        plt.ylabel('Dim {0}'.format(dim2))
        plt.scatter(x, y, marker='^', c='blue')
if False:
    show_enc_scatter(enc_scaled, num_plots=20)

In [None]:
# PCA

def get_enc_pca(enc, reduced_dims, save=False, load=False, save_dir=None, save_file_prefix=None):

    scaler_path = os.path.join(save_dir, '{}.pca.scaler'.format(save_file_prefix))
    pca_model_path = os.path.join(save_dir, '{}.pca.model'.format(save_file_prefix))
    if load and os.path.isfile(pca_model_path) and os.path.isfile(scaler_path):
        with open(scaler_path, 'rb') as modfile:
            print('Loading saved scaler {}'.format(scaler_path))
            scaler = pickle.load(modfile)
            enc_scaled = scaler.transform(enc)
        with open(pca_model_path, 'rb') as modfile:
            print('Loading saved model {}'.format(pca_model_path))
            pca = pickle.load(modfile)
    else:
        scaler = sklearn.preprocessing.MinMaxScaler()
        enc_scaled = scaler.fit_transform(enc)
        pca = sklearn.decomposition.PCA(n_components=reduced_dims)
        pca.fit(enc_scaled)
        if save:
            os.makedirs(os.path.dirname(scaler_path), exist_ok=True)
            with open(scaler_path, 'wb') as modfile:
                pickle.dump(scaler, modfile)
                print('Scaler saved to: {}'.format(scaler_path))
            with open(pca_model_path, 'wb') as modfile:
                pickle.dump(pca, modfile)
                print('Model saved to: {}'.format(pca_model_path))
            
    enc_pca = pca.transform(enc_scaled)
    print('Variance retained: {}%'.format(pca.explained_variance_ratio_.sum()*100))
    if True:
        print('Variance by components')
        print(pca.explained_variance_ratio_.cumsum())
    return enc_pca

In [None]:
# PCA ANALYSIS
def pca_analysis(test_enc, model_dir=None, save_plots=True, save_dir=None, prefix='', scaler_dir=None, compute_stats=True):
    if save_dir is None:
        save_dir = model_dir
    if scaler_dir is None:
        scaler_dir = model_dir
    _pca_model_prefix = train_config.name
    enc_pca = get_enc_pca(test_enc, 10, save=True, load=True, save_dir=model_dir, save_file_prefix=_pca_model_prefix)
    for _scale, _method in ([(None, None)] + list(itertools.product(['features', 'across'], ['minmax', 'standard']))):
        print('\n\nPCA Analysis. Scale: {}. Method: {}\n\n'.format(_scale, _method))
        if _scale is None:
            _stats_file_prefix = '{}pca.unscaled.stats'.format(prefix)
            enc_scaled = enc_pca
        else:
            enc_scaled = get_enc_scaled(enc_pca, _scale, _method, save=True, load=True, save_dir=scaler_dir, prefix=_pca_model_prefix)
            _stats_file_prefix = '{}pca.scaled_{}.method_{}.stats'.format(prefix, _scale, _method)
        if compute_stats:
            print_enc_stats(enc_scaled, save_plots=save_plots, save_dir=save_dir, save_file_prefix=_stats_file_prefix)
if True:
    pca_analysis(test_enc, model_dir=os.path.join(analysis_dir, 'pca'), compute_stats=compute_stats)

In [None]:
# BEST CLUSTER

def get_best_cluster(enc, try_clusters=10):
    
    scaler = sklearn.preprocessing.MinMaxScaler()
    enc_scaled = scaler.fit_transform(enc)

    cluster_range = range( 1, try_clusters )
    cluster_errors = []

    for num_clusters in cluster_range:
        print('Checking cluster {} of {}'.format(num_clusters+1, try_clusters))
        clusters = sklearn.cluster.KMeans(num_clusters)
        clusters.fit(enc_scaled)
        cluster_errors.append(clusters.inertia_)

    clusters_df = pd.DataFrame( { "num_clusters":cluster_range, "cluster_errors": cluster_errors } )
    print('Cluster Errors')
    print(clusters_df)

    plt.figure(figsize=(12,6))
    plt.plot( clusters_df.num_clusters, clusters_df.cluster_errors, marker = "o" )
if False:
    get_best_cluster(test_enc)

In [None]:
# KMEANS

def get_enc_kmeans(enc, reduced_dims, save=False, load=False, save_dir=None, save_file_prefix=None):
            
    scaler_path = os.path.join(save_dir, '{}.kmeans.scaler'.format(save_file_prefix))
    model_path = os.path.join(save_dir, '{}.kmeans.model'.format(save_file_prefix))
    if load and os.path.isfile(model_path) and os.path.isfile(scaler_path):
        with open(scaler_path, 'rb') as modfile:
            print('Loading saved scaler {}'.format(scaler_path))
            scaler = pickle.load(modfile)
            enc_scaled = scaler.transform(enc)
        with open(model_path, 'rb') as modfile:
            print('Loading saved model {}'.format(model_path))
            kmeans = pickle.load(modfile)
    else:
        scaler = sklearn.preprocessing.MinMaxScaler()
        enc_scaled = scaler.fit_transform(enc)
        kmeans = sklearn.cluster.KMeans(n_clusters=reduced_dims)
        kmeans.fit(enc_scaled)
        if save:
            os.makedirs(os.path.dirname(scaler_path), exist_ok=True)
            with open(scaler_path, 'wb') as modfile:
                pickle.dump(scaler, modfile)
                print('Scaler saved to: {}'.format(scaler_path))
            with open(model_path, 'wb') as modfile:
                pickle.dump(kmeans, modfile)
                print('Model saved to: {}'.format(model_path))
                
    enc_kmeans = kmeans.transform(enc_scaled)
    print('Score', kmeans.score(enc))
    print('Data transformed', pd.Series(enc_kmeans.reshape(-1)).describe())
    enc_kmeans = 1 / (enc_kmeans)
    print('Data similarity', pd.Series(enc_kmeans.reshape(-1)).describe())
    
    return enc_kmeans

In [None]:
# KMEANS ANALYSIS
def kmeans_analysis(test_enc, model_dir=None, save_plots=True, save_dir=None, prefix='', scaler_dir=None, compute_stats=True):
    if not do_kmeans:
        print('K-Means analysis is not allowed')
        return
    
    if save_dir is None:
        save_dir = model_dir
    if scaler_dir is None:
        scaler_dir = model_dir
    _kmeans_model_prefix = train_config.name
    enc_kmeans = get_enc_kmeans(test_enc, 10, save=True, load=True, save_dir=model_dir, save_file_prefix=_kmeans_model_prefix)
    for _scale, _method in ([(None, None)] + list(itertools.product(['features', 'across'], ['minmax', 'standard']))):
        print('\n\nK-Means Analysis. Scale: {}. Method: {}\n\n'.format(_scale, _method))
        if _scale is None:
            _stats_file_prefix = '{}kmeans.unscaled.stats'.format(prefix)
            enc_scaled = enc_kmeans
        else:
            scaler_prefix = _kmeans_model_prefix
            enc_scaled = get_enc_scaled(enc_kmeans, _scale, _method, save=True, load=True, save_dir=scaler_dir, prefix=scaler_prefix)
            _stats_file_prefix = '{}kmeans.scaled_{}.method_{}.stats'.format(prefix, _scale, _method)
        if compute_stats:
            print_enc_stats(enc_scaled, save_plots=save_plots, save_dir=save_dir, save_file_prefix=_stats_file_prefix)
if True:
    kmeans_analysis(test_enc, model_dir=os.path.join(analysis_dir, 'kmeans'), compute_stats=compute_stats)

In [None]:
# KMEANS PCA ANALYSIS
def kmeans_pca_analysis(test_enc, kmeans_model_dir=None, kmeans_pca_model_dir=None, save_plots=True, save_dir=None, prefix='', scaler_dir=None, compute_stats=True):
    if not do_kmeans:
        print('K-Means PCA analysis is not allowed')
        return
    
    if save_dir is None:
        save_dir = kmeans_pca_model_dir
    if scaler_dir is None:
        scaler_dir = kmeans_pca_model_dir
    _kmeans_model_prefix = train_config.name
    _pca_model_prefix = "{}.kmeans-pca".format(train_config.name)
    enc_kmeans = get_enc_kmeans(test_enc, 10, save=True, load=True, save_dir=kmeans_model_dir, save_file_prefix=_kmeans_model_prefix)
    enc_pca = get_enc_pca(enc_kmeans, 10, save=True, load=True, save_dir=kmeans_pca_model_dir, save_file_prefix=_pca_model_prefix)

    for _scale, _method in ([(None, None)] + list(itertools.product(['features', 'across'], ['minmax', 'standard']))):
        print('\n\nK-Means PCA Analysis. Scale: {}. Method: {}\n\n'.format(_scale, _method))
        if _scale is None:
            _stats_file_prefix = '{}kmeans-pca.unscaled.stats'.format(prefix)
            enc_scaled = enc_pca
        else:
            scaler_prefix = _kmeans_model_prefix
            enc_scaled = get_enc_scaled(enc_pca, _scale, _method, save=True, load=True, save_dir=scaler_dir, prefix=scaler_prefix)
            _stats_file_prefix = '{}kmeans-pca.scaled_{}.method_{}.stats'.format(prefix, _scale, _method)
        if compute_stats:
            print_enc_stats(enc_scaled, save_plots=save_plots, save_dir=save_dir, save_file_prefix=_stats_file_prefix)
if True:
    kmeans_pca_analysis(test_enc, kmeans_model_dir=os.path.join(analysis_dir, 'kmeans'),
                       kmeans_pca_model_dir=os.path.join(analysis_dir, 'kmeans-pca'), compute_stats=compute_stats)

In [None]:
# GENRE WISE ANALYSIS
def genre_wise_analysis():
    genres_map = commons.get_genres_map(dataset_config.datasets_dir, dataset_config.fma_type, reverse=True)
    enc_genres = commons.map_indices_to_genre(enc_segs, dataset_config.fma_meta_dir, dataset_config.fma_type)
    for genre in ['Electronic', 'Rock', 'Folk']:
        print('ANALYSING GENRE: {}'.format(genre))
        enc = np.array([e for i,e in filter(lambda enum_enc: enc_genres[enum_enc[0]] == genres_map[genre], enumerate(test_enc))])
        raw_enc_analysis(enc, save_dir=os.path.join(analysis_dir, 'genre-wise', genre, 'raw'),
                         prefix="{}_".format(genre), scaler_dir=os.path.join(analysis_dir, 'raw'))
        pca_analysis(enc, model_dir=os.path.join(analysis_dir, 'pca'),
                     save_dir=os.path.join(analysis_dir, 'genre-wise', genre, 'pca'), prefix="{}_".format(genre))
        kmeans_analysis(enc, model_dir=os.path.join(analysis_dir, 'kmeans'),
                     save_dir=os.path.join(analysis_dir, 'genre-wise', genre, 'kmeans'), prefix="{}_".format(genre))
        kmeans_pca_analysis(enc, kmeans_model_dir=os.path.join(analysis_dir, 'kmeans'),
                            kmeans_pca_model_dir=os.path.join(analysis_dir, 'kmeans-pca'),
                            save_dir=os.path.join(analysis_dir, 'genre-wise', genre, 'kmeans-pca'), prefix="{}_".format(genre))
if True and compute_stats:
    genre_wise_analysis()

In [None]:
# TRACK WISE ANALYSIS
def track_wise_analysis(num_tracks):
    random_tracks = np.random.choice(np.unique(enc_segs), num_tracks)
    genre_idx = commons.map_indices_to_genre(random_tracks, dataset_config.fma_meta_dir, dataset_config.fma_type)
    genres_map = commons.get_genres_map(dataset_config.datasets_dir, dataset_config.fma_type, reverse=False)
    for i, track_idx in enumerate(random_tracks):
        genre = genres_map[genre_idx[i]]
        prefix = "{}_{}".format(track_idx, genre)
        print('ANALYSING TRACK: {} GENRE: {}'.format(track_idx, genre))
        enc = np.array([e for i,e in filter(lambda enum_enc: enc_segs[enum_enc[0]] == track_idx, enumerate(test_enc))])
        raw_enc_analysis(enc, save_dir=os.path.join(analysis_dir, 'track-wise', prefix, 'raw'),
                         prefix="{}_".format(prefix), scaler_dir=os.path.join(analysis_dir, 'raw'))
        pca_analysis(enc, model_dir=os.path.join(analysis_dir, 'pca'),
                     save_dir=os.path.join(analysis_dir, 'track-wise', prefix, 'pca'), prefix="{}_".format(prefix))
        kmeans_analysis(enc, model_dir=os.path.join(analysis_dir, 'kmeans'),
                     save_dir=os.path.join(analysis_dir, 'track-wise', prefix, 'kmeans'), prefix="{}_".format(prefix))
        kmeans_pca_analysis(enc, kmeans_model_dir=os.path.join(analysis_dir, 'kmeans'),
                            kmeans_pca_model_dir=os.path.join(analysis_dir, 'kmeans-pca'),
                            save_dir=os.path.join(analysis_dir, 'track-wise', prefix, 'kmeans-pca'), prefix="{}_".format(prefix))
if False and compute_stats:
    track_wise_analysis(3)

In [None]:
# TRACK WISE ANALYSIS - SPECIFIC TRACK
def specific_track_analysis(track_idx):
    enc = np.array([e for i,e in filter(lambda enum_enc: enc_segs[enum_enc[0]] == track_idx, enumerate(test_enc))])
    raw_enc_analysis(enc, save_plots=False, scaler_dir=os.path.join(analysis_dir, 'raw'))
    pca_analysis(enc, save_plots=False, model_dir=os.path.join(analysis_dir, 'pca'))
    kmeans_analysis(enc, save_plots=False, model_dir=os.path.join(analysis_dir, 'kmeans'))
    kmeans_pca_analysis(enc, save_plots=False, kmeans_model_dir=os.path.join(analysis_dir, 'kmeans'),
                            kmeans_pca_model_dir=os.path.join(analysis_dir, 'kmeans-pca'))
if False:
    specific_track_analysis(125548)

In [None]:
# Completion Music

import IPython
import fma_utils
tracks = commons.get_fma_meta("datasets/fma/fma_metadata", 'small')
track_id = np.random.choice(tracks.index)
track_path = fma_utils.get_audio_path('datasets/fma/fma_small', int(track_id))
IPython.display.Audio(filename=track_path, autoplay=True)