In [1]:
import h5py
import numpy as np
import os
import sys
import sklearn.decomposition

sys.path.append("../src")

import localmodule

composers = localmodule.get_composers()
#data_dir = localmodule.get_data_dir()
data_dir = '/Users/vl238/nemisig2018/nemisig2018_data'
dataset_name = localmodule.get_dataset_name()
eigenprogression_name = "_".join([
    dataset_name,
    "eigenprogression-transforms"])
eigenprogression_dir = os.path.join(
    data_dir, eigenprogression_name)
S2s = []
S2s_composers = []
ys = []

for composer_id, composer_str in enumerate(composers):
    S2s_composer = []
    composer_dir = os.path.join(
        eigenprogression_dir, composer_str)
    eigenprogression_names = os.listdir(composer_dir)
    n_pieces = len(eigenprogression_names)

    for piece_id in range(n_pieces):
        eigenprogression_name = eigenprogression_names[piece_id]
        eigenprogression_path = os.path.join(
            composer_dir, eigenprogression_name)

        h5py_file = h5py.File(eigenprogression_path)
        h5py_keys = list(h5py_file.keys())
        h5py_key = h5py_keys[0]
        S2 = h5py_file[h5py_key][:]
        S2s.append(S2)
        S2s_composer.append(S2)
        h5py_file.close()
        ys.append(composer_id)
        
    S2s_composer = np.stack(S2s_composer)
    S2s_composers.append(S2s_composer)
        
S2 = np.stack(S2s)
y = np.array(ys)
        
S2_slices = []
S2_slices.append(S2[:, :, 0, 0, 0, 0])
S2_slices.append(S2[:, :, :, 0, 0, 0])
S2_slices.append(S2[:, :, :, :, 0, 0])
S2_slices.append(S2[:, :, :, :, :, 0])
S2_slices.append(S2)

Xs = []
for S2_slice in S2_slices:
    # Collapse feature tensor into a feature vector.
    X = np.reshape(S2_slice, (S2_slice.shape[0], -1))
    
    # Discard the dimensions with zero variance.
    # They correspond to decreasing scattering time scales.
    X_stds = np.std(X, axis=0)
    #X_stds = np.linalg.norm(X, 2, axis=0)
    sorting_indices = np.argsort(X_stds)[::-1]
    X = X[:, sorting_indices]
    sorted_stds = X_stds[sorting_indices]
    dimensionality = np.where(sorted_stds < 1e-6)[0]
    if len(dimensionality) > 0:
        X = X[:, :dimensionality[0]]
    Xs.append(X)

In [10]:
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import GridSearchCV, LeaveOneOut
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, Normalizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


def select_features(X, n_features=1000):
    return X[:, :n_features]
select_features = FunctionTransformer(
    select_features, kw_args={"n_features": 1080})

normalizer = Normalizer()

svc = SVC(kernel="linear", C = 10000.0)

estimators = [
    ("Feature selection", select_features),
    ("Normalization", normalizer),
    ("SVM", svc)]

pipeline = Pipeline(estimators)
params = {"n_features": [1000, 1080, 1100]}
loo = LeaveOneOut()

names = [
    "Wavelets         ",
    "Eigentriads      ",
    "Scattering       ",
    "Eigenprogressions",
    "Spiral           "]

X = Xs[-1]
r = range(53, 55)
y_pred = []
# r = range(49, 59)

for test_id in r:
    y_train = np.delete(y, test_id)
    X_train = np.delete(X, test_id, axis=0)
    grid_search = GridSearchCV(pipeline, param_grid=params, cv=loo)
    grid_search.fit(X_train, y_train)
    print("Val:  {:5.2f}%.".format(100 * grid_search.best_score_))
    best_estimator = grid_search.best_estimator_
    test_pred = best_estimator.predict(
        X_train[test_id, :].reshape(1, -1))
    y_pred.append(test_pred)

y_pred = np.array(y_pred)
print("Test: {:5.2f}%".format(100*accuracy_score(y[r], y_pred)))

ValueError: Invalid parameter n_features for estimator Pipeline. Check the list of available parameters with `estimator.get_params().keys()`.

In [17]:



select_features.transform(X).shape

(107, 1080)

In [136]:
X_stds = np.std(X, axis=0)
sorting_indices = np.argsort(X_stds)[::-1]
X = X[:, sorting_indices]
sorted_stds = X_stds[sorting_indices]

tmp = np.linalg.norm(X, 2, axis=0) / np.linalg.norm(X, 2)
np.cumsum(tmp*tmp)[1650]

0.95084155

In [117]:
np.linalg.norm(np.linalg.norm(X, 2, axis=0) / np.linalg.norm(X, 2))

1.0115258

In [4]:
print("""1000 Features -> Normalizer -> linear SVM (C=10000)
Wavelets           67.29%
Eigentriads        71.03%
Scattering         71.96%
Eigenprogressions  76.64%
Spiral             80.37% 
""")



1000 Features -> Log -> Normalizer -> linear SVM (C=10000)
Wavelets           67.29%
Eigentriads        71.03%
Scattering         71.96%
Eigenprogressions  76.64%
Spiral             80.37% 



In [71]:
def mean_sparsity_ratio(X):
    l1_norms = np.linalg.norm(X, 1, axis=1)
    l2_norms = np.linalg.norm(X, 2, axis=1)
    return np.mean(l1_norms / l2_norms)

list(map(mean_sparsity_ratio, Xs))

[2.5666959, 4.5978212, 6.1332026, 16.925119, 42.431236]

(107, 1872)