In [2]:
import os
import pickle
import matplotlib.pyplot as plt  # import plt before librosa, conflict!
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm
import librosa.display
from sklearn.decomposition import PCA

PICKLE_DIR = 'D:\9999_OneDrive_ZHAW\OneDrive - ZHAW\BA_ZHAW_RTO\pickle'
root = 'Z:\\BA\\mimii_baseline\\dataset'

# Functions

In [8]:
# pickle I/O
def save_pickle(filename, save_data, root=PICKLE_DIR):
    """
    picklenize the data.

    filename : str
        pickle filename
    data : free datatype
        some data will be picklenized

    return : None
    """
    filepath = os.path.join(root, filename) + '.pickle'
    with open(filepath, 'wb') as sf:
        pickle.dump(save_data, sf)


def load_pickle(filename, root=PICKLE_DIR):
    """
    unpicklenize the data.

    filename : str
        pickle filename

    return : data
    """
    filepath = os.path.join(root, filename) + '.pickle'
    with open(filepath, 'rb') as lf:
        load_data = pickle.load(lf)
    return load_data


# Normalization
def normalize_data(x, lb, ub, max_v=1.0, min_v=-1.0):
    '''
    Max-Min normalize of 'x' with max value 'max_v' min value 'min_v'
    '''

    # Set-up
    if len(ub)==0:
        ub = x.max(0) # OPTION 1
        # applied to the first dimension (0) columns of the data
        #ub = np.percentile(x, 99.9, axis=0, keepdims=True) # OPTION 2:
        
    if len(lb)==0:
        lb = x.min(0) 
        #lb = np.percentile(x, 0.1, axis=0, keepdims=True)
    
    ub.shape = (1,-1)
    lb.shape = (1,-1)           
    max_min = max_v - min_v
    delta = ub-lb

    # Compute
    x_n = max_min * (x - lb) / delta + min_v
    if 0 in delta:
        idx = np.ravel(delta == 0)
        x_n[:,idx] = x[:,idx] - lb[:, idx]

    return x_n, lb, ub 


# extended pickle loader
def load_data(key_name, pickle_dir):
    file_locs = load_pickle(f'{key_name}', root=pickle_dir)
    data = load_pickle(f'{key_name}_data', root=pickle_dir)
    labels = load_pickle(f'{key_name}_labels', root=pickle_dir)
    data_n = load_pickle(f'{key_name}_data_n', root=pickle_dir)
    return file_locs, data, labels, data_n

# load data & normalize

## load total 6dB

In [5]:
six_norm = load_pickle('six_norm')
six_abnorm = load_pickle('six_abnorm')
six_norm_data = load_pickle('six_norm_data')
six_abnorm_data = load_pickle('six_abnorm_data')

six_norm_data_n, lb, ub = normalize_data(six_norm_data, [], [], max_v=1.0, min_v=0.0)
print(six_norm_data_n)

six_abnorm_data_n, lb, ub = normalize_data(six_abnorm_data, [], [], max_v=1.0, min_v=0.0)
print(six_abnorm_data_n)

[[0.50680536 0.49459252 0.5638561  ... 0.52019787 0.5181766  0.5108569 ]
 [0.6925782  0.6364584  0.70216686 ... 0.5696227  0.57920265 0.6265522 ]
 [0.65501666 0.5968383  0.70810354 ... 0.5323048  0.5360604  0.5510274 ]
 ...
 [0.73291856 0.76494384 0.623315   ... 0.28893778 0.29114878 0.24939412]
 [0.7974568  0.69309485 0.67207146 ... 0.30297497 0.2746842  0.2960698 ]
 [0.8139511  0.7084099  0.6487108  ... 0.25420263 0.2499758  0.27348074]]
[[0.33771682 0.27785784 0.26559427 ... 0.18123129 0.19518875 0.1974282 ]
 [0.28140482 0.26396242 0.3544001  ... 0.19414781 0.20980471 0.2064001 ]
 [0.31561092 0.3216098  0.32545492 ... 0.16656087 0.18521224 0.214315  ]
 ...
 [0.8104279  0.81536496 0.9197512  ... 0.13417602 0.16365665 0.20735407]
 [0.9157321  0.85799456 0.8580185  ... 0.1800388  0.1731957  0.19838832]
 [0.78237855 0.8666426  0.8218058  ... 0.17870124 0.17531979 0.22040789]]


## load total 0dB

In [None]:
zero_norm = load_pickle('zero_norm')
zero_abnorm = load_pickle('zero_abnorm')
zero_norm_data = load_pickle('zero_norm_data')
zero_abnorm_data = load_pickle('zero_abnorm_data')

zero_norm_data_n, lb, ub = normalize_data(zero_norm_data, [], [], max_v=1.0, min_v=0.0)
print(zero_norm_data_n)

zero_abnorm_data_n, lb, ub = normalize_data(zero_abnorm_data, [], [], max_v=1.0, min_v=0.0)
print(zero_abnorm_data_n)

## load total min6dB

In [None]:
min_six_norm = load_pickle('min_six_norm')
min_six_abnorm = load_pickle('min_six_abnorm')
min_six_norm_data = load_pickle('min_six_norm_data')
min_six_abnorm_data = load_pickle('min_six_abnorm_data')

min_six_norm_data_n, lb, ub = normalize_data(min_six_norm_data, [], [], max_v=1.0, min_v=0.0)
print(min_six_norm_data_n)

min_six_abnorm_data_n, lb, ub = normalize_data(min_six_abnorm_data, [], [], max_v=1.0, min_v=0.0)
print(min_six_abnorm_data_n)

## load partial: id_00 all dB 

In [None]:
id00_all_dB_norm = load_pickle('id00_all_dB_norm')
id00_all_dB_abnorm = load_pickle('id00_all_dB_abnorm')
id00_all_dB_norm_data = load_pickle('id00_all_dB_norm_data')
id00_all_dB_abnorm_data = load_pickle('id00_all_dB_abnorm_data')

id00_all_dB_norm_data_n, lb, ub = normalize_data(id00_all_dB_norm_data, [], [], max_v=1.0, min_v=0.0)
print(id00_all_dB_norm_data_n)

id00_all_dB_abnorm_data_n, lb, ub = normalize_data(id00_all_dB_abnorm_data, [], [], max_v=1.0, min_v=0.0)
print(id00_all_dB_abnorm_data_n)

### load partial: id_00 6dB 

In [10]:
file_name = 'id00_6dB'

id00_6dB_norm, id00_6dB_norm_data, id00_6dB_norm_labels, id00_6dB_norm_data_n = load_data(f'{file_name}_norm', PICKLE_DIR)
print(id00_6dB_norm_data_n)
id00_6dB_abnorm, id00_6dB_abnorm_data, id00_6dB_abnorm_labels, id00_6dB_abnorm_data_n = load_data(f'{file_name}_abnorm', PICKLE_DIR)
print(id00_6dB_abnorm_data_n)


[[0.50680536 0.49459252 0.534202   ... 0.48582694 0.43123582 0.4465847 ]
 [0.6925782  0.6364584  0.6819166  ... 0.5535278  0.5072264  0.592123  ]
 [0.65501666 0.5968383  0.688257   ... 0.5024107  0.45350498 0.49711692]
 ...
 [0.5391299  0.4365312  0.63625014 ... 0.5339344  0.48305562 0.5366724 ]
 [0.39504427 0.34794715 0.5226912  ... 0.49982813 0.5188925  0.5795343 ]
 [0.5010365  0.34402224 0.5903437  ... 0.5364552  0.5471933  0.5185452 ]]
[[0.33771682 0.27785784 0.26559427 ... 0.18123129 0.19518875 0.1974282 ]
 [0.28140482 0.26396242 0.3544001  ... 0.19414781 0.20980471 0.2064001 ]
 [0.31561092 0.3216098  0.32545492 ... 0.16656087 0.18521224 0.214315  ]
 ...
 [0.6177849  0.5451435  0.72553694 ... 0.37715858 0.41393426 0.4761081 ]
 [0.53179485 0.5609169  0.6898305  ... 0.32866263 0.41131774 0.47761726]
 [0.5921611  0.6126838  0.6097828  ... 0.28921074 0.36880115 0.4058862 ]]


### load partial: id_00 0dB 

In [11]:
file_name = 'id00_0dB'

id00_6dB_norm, id00_6dB_norm_data, id00_6dB_norm_labels, id00_6dB_norm_data_n = load_data(f'{file_name}_norm', PICKLE_DIR)
print(id00_6dB_norm_data_n)
id00_6dB_abnorm, id00_6dB_abnorm_data, id00_6dB_abnorm_labels, id00_6dB_abnorm_data_n = load_data(f'{file_name}_abnorm', PICKLE_DIR)
print(id00_6dB_abnorm_data_n)

[[0.7248219  0.6460692  0.6492822  ... 0.5508336  0.5041934  0.49037144]
 [0.84171057 0.80061233 0.81332225 ... 0.5977947  0.55973077 0.6350596 ]
 [0.86478496 0.8329002  0.7758017  ... 0.5705719  0.5328736  0.5486187 ]
 ...
 [0.75962055 0.773591   0.7736992  ... 0.54087806 0.4846559  0.55415016]
 [0.79902726 0.81111616 0.6325882  ... 0.52410984 0.5458681  0.60637325]
 [0.9471523  0.8824311  0.7789625  ... 0.5593604  0.57958084 0.55678195]]
[[0.21201208 0.20135647 0.41367725 ... 0.21131293 0.25032797 0.27131045]
 [0.13641445 0.24955566 0.4225957  ... 0.26952186 0.30411327 0.2917224 ]
 [0.37400025 0.29043868 0.30046636 ... 0.21397838 0.2161795  0.29956993]
 ...
 [0.8225706  0.68023056 0.4863877  ... 0.3685288  0.46625975 0.5089374 ]
 [0.7546989  0.72134197 0.7071129  ... 0.32083333 0.38415465 0.4734852 ]
 [0.8540363  0.72514147 0.7368955  ... 0.32999572 0.4417715  0.50300443]]


### load partial: id_00 -6dB 

## load partial: id_02 all dB 

In [None]:
id02_all_dB_norm = load_pickle('id02_all_dB_norm')
id02_all_dB_abnorm = load_pickle('id02_all_dB_abnorm')
id02_all_dB_norm_data = load_pickle('id02_all_dB_norm_data')
id02_all_dB_abnorm_data = load_pickle('id02_all_dB_abnorm_data')

id02_all_dB_norm_data_n, lb, ub = normalize_data(id02_all_dB_norm_data, [], [], max_v=1.0, min_v=0.0)
print(id02_all_dB_norm_data_n)

id02_all_dB_abnorm_data_n, lb, ub = normalize_data(id02_all_dB_abnorm_data, [], [], max_v=1.0, min_v=0.0)
print(id02_all_dB_abnorm_data_n)

### load partial: id_02 6dB 

### load partial: id_02 0dB 

### load partial: id_02 -6dB 

## load partial: id_04 all dB 

In [None]:
id04_all_dB_norm = load_pickle('id04_all_dB_norm')
id04_all_dB_abnorm = load_pickle('id04_all_dB_abnorm')
id04_all_dB_norm_data = load_pickle('id04_all_dB_norm_data')
id04_all_dB_abnorm_data = load_pickle('id04_all_dB_abnorm_data')

id04_all_dB_norm_data_n, lb, ub = normalize_data(id04_all_dB_norm_data, [], [], max_v=1.0, min_v=0.0)
print(id04_all_dB_norm_data_n)

id04_all_dB_abnorm_data_n, lb, ub = normalize_data(id04_all_dB_abnorm_data, [], [], max_v=1.0, min_v=0.0)
print(id04_all_dB_abnorm_data_n)

## load partial: id_06 all dB 

In [None]:
id06_all_dB_norm = load_pickle('id06_all_dB_norm')
id06_all_dB_abnorm = load_pickle('id06_all_dB_abnorm')
id06_all_dB_norm_data = load_pickle('id06_all_dB_norm_data')
id06_all_dB_abnorm_data = load_pickle('id06_all_dB_abnorm_data')

id06_all_dB_norm_data_n, lb, ub = normalize_data(id06_all_dB_norm_data, [], [], max_v=1.0, min_v=0.0)
print(id06_all_dB_norm_data_n)

id06_all_dB_abnorm_data_n, lb, ub = normalize_data(id06_all_dB_abnorm_data, [], [], max_v=1.0, min_v=0.0)
print(id06_all_dB_abnorm_data_n)

# AE