In [3]:
# Imports

import dataset
import importlib
import nets
import numpy as np
import pandas as pd
import os
import pickle
import sklearn
import keras
import scipy
import matplotlib.pyplot as plt
import pylab
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

%matplotlib inline

# Reload any packages here

# importlib.reload(dataset)

# Analysis config

save_dir = 'cached/fma_small_mfcc_conv_m10_fps5_test'
mfcc_save_path = os.path.join(save_dir, 'mfcc.npy')
tracks_save_path = os.path.join(save_dir, 'tracks')
data_prep_params_save_path = os.path.join(save_dir, 'data_prep_params')
training_params_save_path = os.path.join(save_dir, 'training_params')
encoder_save_path = os.path.join(save_dir, 'encoder')
model_save_path = os.path.join(save_dir, 'model')

analysis_mode = 'ae' # options: ae, genre
analysis_data_type = 'test' # options: teast, train

In [4]:
# Load all data

# Load mfccs, prams and norms
x = np.load(mfcc_save_path)# mfccs for all tracks
print('MFCCs data loaded. Data size', x.size)

tracks = pd.read_pickle(tracks_save_path)
print('Tracks data loaded. Data size', tracks.shape)

with open(data_prep_params_save_path, 'rb') as pf: # dataset processing params
    data_prep_params = pickle.load(pf)
num_tracks, sr, fps, num_mfcc, num_segments_per_track, save_dir = data_prep_params
# params_dict = {k: v for k, v in list(locals().items()) if len(list(filter(lambda x: x is v, params))) == 1}
print('Data prep params loaded', data_prep_params)

with open(training_params_save_path, 'rb') as nf:
    training_params = pickle.load(nf)
mean, std, data_split_ratio, num_net_scale_downs = training_params
print('Training params loaded', training_params)

MFCCs data loaded. Data size 72000
Tracks data loaded. Data size (10, 52)
Data prep params loaded (10, 44100, 5, 20, 20, 'cached/fma_small_mfcc_conv_m10_fps5_test')
Training params loaded (-1.8013664173677022, 58.824029222879325, 0.8, 3)


In [5]:
# Prepare test data

# Shape for training
num_mfcc_frames = int(x.size / (num_tracks * num_segments_per_track * num_mfcc))
x = x.reshape(num_tracks * num_segments_per_track, num_mfcc, num_mfcc_frames, 1)
print('Data reshaped', x.shape)

# Padding mfcc data to make the training/test dimensions divisible by 2 ** num_net_scale_downs
divisor = 2 ** num_net_scale_downs

num_pad_mfcc_frames = (0 if num_mfcc_frames % divisor == 0 else(int(num_mfcc_frames / divisor) + 1) * divisor - num_mfcc_frames)
x_pad_frames = np.zeros((num_tracks * num_segments_per_track, num_mfcc, num_pad_mfcc_frames, 1))
x = np.concatenate((x, x_pad_frames), axis=2)
num_mfcc_frames_new = x.shape[2]

num_pad_mfcc = (0 if num_mfcc % divisor == 0 else (int(num_mfcc / divisor) + 1) * divisor - num_mfcc)
x_pad_mfcc = np.zeros((num_tracks * num_segments_per_track, num_pad_mfcc, num_mfcc_frames_new, 1))
x = np.concatenate((x, x_pad_mfcc), axis=1)
num_mfcc_new = x.shape[1]
print('Data padded', x.shape)
print('New mfcc dimensions', (num_mfcc_new, num_mfcc_frames_new))

# Split
x_train, x_test = dataset.split_data(x, data_split_ratio)
print('Training shape', x_train.shape)
print('Test shape', x_test.shape)

# Choose data
x_analysis = x_test if analysis_data_type == 'test' else x_train

# Normalize
x_analysis = (x_analysis - mean) / std
print('Data normalized', scipy.stats.describe(x_analysis.reshape(-1)))

Data reshaped (200, 20, 18, 1)
Data padded (200, 24, 24, 1)
New mfcc dimensions (24, 24)
Training shape (160, 24, 24, 1)
Test shape (40, 24, 24, 1)
Data normalized DescribeResult(nobs=23040, minmax=(-5.380438857628132, 4.67300144232842), mean=0.05002037655904822, variance=1.0362581139814935, skewness=-0.27678801801817365, kurtosis=12.901607840377752)


In [6]:
# Process tracks metadata

track_index = tracks.index

num_segements_train = x_train.shape[0]
num_tracks_train = int(num_segements_train/num_segments_per_track)
tracks_train_idx = track_index[:num_tracks_train]
tracks_train = tracks.loc[tracks_train_idx, :]
print('Training tracks shape', tracks_train.shape)

num_segements_test = x_test.shape[0]
num_tracks_test = int(num_segements_test/num_segments_per_track)
tracks_test_idx = tracks.index[num_tracks_train:]
tracks_test = tracks.loc[tracks_test_idx, :]
print('Test tracks shape', tracks_test.shape)

tracks_analysis = tracks_test if analysis_data_type == 'test' else tracks_train
num_segements_analysis = num_segements_test if analysis_data_type == 'test' else num_segements_train
num_tracks_analysis = num_tracks_test if analysis_data_type == 'test' else num_tracks_train
tracks_analysis_idx = tracks_analysis.index
print('Analysis data type:', analysis_data_type, (num_segements_analysis, num_tracks_analysis))

# Separate a few genres
hiphop = tracks_analysis[tracks_analysis['track', 'genre_top'] == 'Hip-Hop']
folk = tracks_analysis[tracks_analysis['track', 'genre_top'] == 'Folk']
pop = tracks_analysis[tracks_analysis['track', 'genre_top'] == 'Pop']
electronic = tracks_analysis[tracks_analysis['track', 'genre_top'] == 'Electronic']
instrumental = tracks_analysis[tracks_analysis['track', 'genre_top'] == 'Instrumental']
experimental = tracks_analysis[tracks_analysis['track', 'genre_top'] == 'Experimental']
international = tracks_analysis[tracks_analysis['track', 'genre_top'] == 'International']
rock = tracks_analysis[tracks_analysis['track', 'genre_top'] == 'Rock']

Training tracks shape (8, 52)
Test tracks shape (2, 52)
Analysis data type: test (40, 2)


In [7]:
# Predictions

# Load net and weights
model = keras.models.load_model(model_save_path)
print('Model loaded', (len(model.layers),))
encoder = keras.models.load_model(encoder_save_path)
print('Encoder loaded', (len(encoder.layers),))
# model, encoder = classifiers.genre_classifier_conv(inpdimx, inpdimy, 2)
# model.load_weights(net_save_path)

# Predict
y = encoder.predict(x_analysis)
print('Prediction shape', y.shape)
# print(len(y))

Model loaded (15,)
Encoder loaded (8,)
Prediction shape (40, 3, 3, 1)




In [8]:
if type(y) is list:
    print('Multi layer output')
    raise Exception('Merge layers here')

# Reshape encodings
y_segment = y.reshape(num_tracks_analysis * num_segments_per_track, -1)
print('Flattened encodings by segment shape', y_segment.shape)
y = y.reshape(num_tracks_analysis, num_segments_per_track, -1)
print('Flattened encodings by track shape', y.shape)
raw_encoding_length = int(y_segment.size / (num_segements_analysis))
print('Raw encoding length:', raw_encoding_length)

Flattened encodings by segment shape (40, 9)
Flattened encodings by track shape (2, 20, 9)
Raw encoding length: 9


In [None]:
def generate_encodings(y_curr, tracks_curr_idx):
    encodings = {}
    for i, idx in enumerate(tracks_curr_idx):
        encodings[idx] = y_curr[i, :, :]
    print('Generated encoding shape', list(encodings.values())[0].shape)
    return encodings

encodings_map = generate_encodings(y, tracks_analysis_idx)
print('Encodings map generated', (len(encodings_map),))

In [None]:
# Transform encodings to principal components

def transform_encodings_with_pca(y_segment_curr=y_segment, 
                                 reduced_dim=raw_encoding_length,
                                 scale='feature',
                                _range=(0, 100)):

    pca = PCA(n_components=reduced_dim)
    pca.fit(y_segment_curr)
    y_segment_pca = pca.transform(y_segment_curr)
    print('Variance retained: {}%'.format(pca.explained_variance_ratio_.sum()*100))
    print('Data transformed', pd.Series(y_segment_pca.reshape(-1)).describe())

    if scale == 'feature':
        scaler = sklearn.preprocessing.MinMaxScaler(_range)
        scaler.fit(y_segment_pca.reshape(-1, 1))
        y_segment_pca = scaler.transform(y_segment_pca)
        print('Data scaled', pd.Series(y_segment_pca.reshape(-1)).describe())
    elif scale == 'all':
        y_segment_flat = y_segment_pca.reshape(-1, 1)
        scaler = sklearn.preprocessing.MinMaxScaler(_range)
        scaler.fit(y_segment_flat)
        y_segment_pca = scaler.transform(y_segment_flat).reshape(y_segment_pca.shape[0], -1)
        print('Data scaled', pd.Series(y_segment_pca.reshape(-1)).describe())
        
    if True:
        print('Variance by components')
        print(pca.explained_variance_ratio_.cumsum())
        
    return y_segment_pca
    
# y_segment_pca = transform_encodings_with_pca()
y_segment_pca = transform_encodings_with_pca()
print('Transformed encodings by segment shape', y_segment_pca.shape)
y_pca = y_segment_pca.reshape(num_tracks_analysis, num_segments_per_track, -1)
print('Transformed encodings by track shape', y_pca.shape)



In [None]:
# Find best cluster
y_segment_curr = y_segment_pca

_cluster_range = range( 1, 20 )
_cluster_errors = []

for _num_clusters in _cluster_range:
  _clusters = KMeans( _num_clusters )
  _clusters.fit( y_segment_pca )
  _cluster_errors.append( _clusters.inertia_ )

_clusters_df = pd.DataFrame( { "num_clusters":_cluster_range, "cluster_errors": _cluster_errors } )
print(_clusters_df)

plt.figure(figsize=(12,6))
plt.plot( _clusters_df.num_clusters, _clusters_df.cluster_errors, marker = "o" )


In [None]:
# Reduce dimensionality by using K means

# TODO: Scale to a range
def transform_encodings_with_kmeans(y_segment_curr=y_segment, reduced_dim=4):
    kmeans = KMeans(n_clusters=reduced_dim)
    kmeans.fit(y_segment_curr)
    y_segment_kmeans = kmeans.transform(y_segment_curr)
    print('Data transformed', pd.Series(y_segment_kmeans.reshape(-1)).describe())
    print('Score', kmeans.score(y_segment_curr))
    y_segment_kmeans = 1 / (1 + y_segment_kmeans)
    print('Data similarity', pd.Series(y_segment_kmeans.reshape(-1)).describe())
    return y_segment_kmeans

y_segment_kmeans = transform_encodings_with_kmeans()
print('Tansformed encodings by segment shape', y_segment_kmeans.shape)
y_kmeans = y_segment_kmeans.reshape(num_tracks_analysis, num_segments_per_track, -1)
print('Reduced encodings by track shape', y_kmeans.shape)


In [None]:
# Analysis: Encoding stats
y_curr = y# options: y, y_pca, y_kmeans
y_segment_curr = y_segment # options: y_segment, y_segment_pca, y_segment_kmeans

# Overall stats
if True:
    print('Num tracks:', y_curr.shape[0])
    print('Num segments:', y_segment_curr.shape[0])
    print('Distribution across entire encoding')
    print(pd.Series(y_curr.reshape(-1)).describe())

# Stats for all encoding components
if True:
    pylab.rcParams['figure.figsize'] = (14,8)
    _max_outputs = 200
    _idx = np.arange(y_segment_curr.shape[1])
    if _idx.size > _max_outputs:
        np.random.shuffle(_idx)
        _idx = _idx[:_max_outputs]
    _y_segment_curr = y_segment_curr[:, _idx]

    print('Plotting stats for {} components'.format(_idx.size))

    plt.subplot(2, 2, 1)
    plt.title('Mean')
    plt.bar(np.arange(_idx.size), _y_segment_curr[:, _idx].mean(axis=0))

    plt.subplot(2, 2, 2)
    plt.title('Min')
    plt.bar(np.arange(_idx.size), _y_segment_curr[:, _idx].min(axis=0))

    plt.subplot(2, 2, 4)
    plt.title('Max')
    plt.bar(np.arange(_idx.size), _y_segment_curr[:, _idx].max(axis=0))

    plt.subplot(2, 2, 3)
    plt.title('Variance')
    plt.bar(np.arange(_idx.size), _y_segment_curr[:, _idx].var(axis=0))

    plt.show()

# Percentiles combined
if True:
    print('Plotting percentiles {} components'.format(_idx.size))
    pylab.rcParams['figure.figsize'] = (14,12)
    _percentiles = [10, 30, 50 ,70, 90, 100]
    for i, _p in enumerate(_percentiles):
        plt.subplot(3, 2, i+1)
        plt.title('{} Percentile'.format(_p))
        plt.bar(np.arange(_idx.size), np.percentile(y_segment_curr[:, _idx], _p, axis=0))
    plt.show()

# Percentiles separated
if True:
    pylab.rcParams['figure.figsize'] = (14,12)
    _max_outputs = 20
    _percentiles = range(0,100,10)
    _idx = np.arange(y_segment_curr.shape[1])
    if _idx.size > _max_outputs:
        np.random.shuffle(_idx)
        _idx = _idx[:_max_outputs]
    y_segment_curr = y_segment_curr[:, _idx]
    for i, _index in enumerate(_idx):
        plt.subplot(int(_idx.size/2), int(_idx.size/2)+1, i+1)
        plt.title('Component {}'.format(_index))
        plt.bar(_percentiles, list(map(lambda _p: np.percentile(y_segment_curr[:, _index].reshape(-1), _p), _percentiles)))
#         plt.ylim(0,100)
    plt.show()

    
# print(print(pd.Series(y_segment[:,_index]).describe()))

In [None]:
# Visualize 2 dimensions of the encodings for multiple segments in scatter plots

pylab.rcParams['figure.figsize'] = (20, 4)

_y_curr = y # options: y, y_red
_y_segment_curr = y_segment # options: y_segment, y_segment_red
_encoding_length = int(_y_segment_curr.size / num_segements_analysis)

_encodings_map = generate_encodings(_y_curr, tracks_analysis_idx)

_sample = np.arange(int(num_segments_per_track))
np.random.shuffle(_sample)

for _genre in [folk, electronic]:
    for i,_idx in enumerate(_genre.index):
        if i==0:
            print('Plotting {0}'.format(_genre['track', 'genre_top'][_idx]))
        _toplot = range(0,_encoding_length ,2)
        for i, _dim in enumerate(_toplot):
            _dim1 = _dim
            _dim2 = _dim+1
            if _dim2 >= _encoding_length:
                continue
            _encoded = _encodings_map[_idx]
            _x = np.transpose(_encoded[_sample, _dim1])
            _y = np.transpose(_encoded[_sample, _dim2])
            plt.subplot(1,len(_toplot), i+1)
            plt.xlabel('Dim {0}'.format(_dim1))
            plt.ylabel('Dim {0}'.format(_dim2))
            plt.scatter(_x, _y, marker='^', c='blue')
    plt.show()

In [None]:
# Visualize encodings for random segment from multiple genres

pylab.rcParams['figure.figsize'] = (20, 12)

_y_curr = y # options: y, y_red
_y_segment_curr = y_segment # options: y_segment, y_segment_red
_encoding_length = int(_y_segment_curr.size / num_segements_analysis)

_encodings_map = generate_encodings(_y_curr, tracks_analysis_idx)

for i, _genre in enumerate([hiphop, folk, electronic, rock, instrumental, international, experimental]):
# for i, _genre in enumerate([folk, electronic]):
    _track_idx = _genre.index[np.random.randint(_genre.shape[0])]
    _encoding = _encodings_map[_track_idx]
    plt.subplot(3, 3, i+1)
    plt.title(_genre['track', 'genre_top'][_track_idx])
    plt.bar(np.arange(_encoding.shape[1]), _encoding[0,:])
plt.show()

In [None]:
# Visualize the encodings of multiple segmets of the same track

pylab.rcParams['figure.figsize'] = (20, 3)

_y_curr = y # options: y, y_red
_y_segment_curr = y_segment # options: y_segment, y_segment_red
_encoding_length = int(y_segment_curr.size / num_segements_analysis)

_encodings_map = generate_encodings(_y_curr, tracks_analysis_idx)
_segments_to_visulize = 10

# for i, genre in enumerate([hiphop, folk, electronic, rock, instrumental, international, experimental]):
for i, _genre in enumerate([folk, electronic]):
    _track_idx = _genre.index[np.random.randint(_genre.shape[0])]
    print('Plotting {0}'.format(_genre['track', 'genre_top'][_track_idx]))
    _encoding = _encodings_map[_track_idx]
    for i in range(_segments_to_visulize):
        plt.subplot(1, _segments_to_visulize, i+1)
        plt.bar(np.arange(_encoding.shape[1]), _encoding[i,:])
    plt.show()