In [1]:
import h5py
import numpy as np
import os
import sys
import sklearn.decomposition

sys.path.append("../src")

import localmodule

composers = localmodule.get_composers()
#data_dir = localmodule.get_data_dir()
data_dir = '/Users/vl238/nemisig2018/nemisig2018_data'
dataset_name = localmodule.get_dataset_name()
eigenprogression_name = "_".join([
    dataset_name,
    "eigenprogression-transforms"])
eigenprogression_dir = os.path.join(
    data_dir, eigenprogression_name)
S2s = []
S2s_composers = []
ys = []

for composer_id, composer_str in enumerate(composers):
    S2s_composer = []
    composer_dir = os.path.join(
        eigenprogression_dir, composer_str)
    eigenprogression_names = os.listdir(composer_dir)
    n_pieces = len(eigenprogression_names)

    for piece_id in range(n_pieces):
        eigenprogression_name = eigenprogression_names[piece_id]
        eigenprogression_path = os.path.join(
            composer_dir, eigenprogression_name)

        h5py_file = h5py.File(eigenprogression_path)
        h5py_keys = list(h5py_file.keys())
        h5py_key = h5py_keys[0]
        S2 = h5py_file[h5py_key][:]
        S2s.append(S2)
        S2s_composer.append(S2)
        h5py_file.close()
        ys.append(composer_id)
        
    S2s_composer = np.stack(S2s_composer)
    S2s_composers.append(S2s_composer)
        
S2 = np.stack(S2s)


y = np.array(ys)
        
S2_slices = []
S2_slices.append(S2[:, :, 0, 0, 0, 0])
S2_slices.append(S2[:, :, :, 0, 0, 0])
S2_slices.append(S2[:, :, :, :, 0, 0])
S2_slices.append(S2[:, :, :, :, :, 0])
S2_slices.append(S2)

Xs = []
for S2_slice in S2_slices:
    # Collapse feature tensor into a feature vector.
    X = np.reshape(S2_slice, (S2_slice.shape[0], -1))
    
    # Discard the dimensions with zero variance.
    # They correspond to decreasing scattering time scales.
    X_stds = np.std(X, axis=0)
    #X_stds = np.linalg.norm(X, 2, axis=0)
    sorting_indices = np.argsort(X_stds)[::-1]
    X = X[:, sorting_indices]
    sorted_stds = X_stds[sorting_indices]
    dimensionality = np.where(sorted_stds < 1e-6)[0]
    if len(dimensionality) > 0:
        X = X[:, :dimensionality[0]]
    Xs.append(X)

In [9]:
[X.shape[1] for X in Xs]

[8, 24, 129, 1677, 8385]

In [3]:
from sklearn.model_selection import GridSearchCV, LeaveOneOut
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, Normalizer
from sklearn.svm import SVC

log_transformer = FunctionTransformer(np.log)
normalizer = Normalizer()

svc = SVC(kernel="linear", C=10000.0)


estimators = [
    ("normalizer", normalizer),
    ("SVM", svc)]
pipe = Pipeline(estimators)
params = dict(normalizer = [normalizer])
loo = LeaveOneOut()

names = [
    "Wavelets         ",
    "Eigentriads      ",
    "Scattering       ",
    "Eigenprogressions",
    "Spiral           "]

for name, X in zip(names, Xs):
    grid_search = GridSearchCV(pipe, param_grid=params, cv=loo, n_jobs=-1)
    grid_search.fit(X, y)
    print(name + "  {:5.2f}%".format(100 * grid_search.best_score_))
    grid_search.best_estimator_

Wavelets           67.29%
Eigentriads        71.03%
Scattering         71.96%
Eigenprogressions  76.64%
Spiral             77.57%


In [4]:
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import GridSearchCV, LeaveOneOut
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, Normalizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import time
from tqdm import tqdm


def select_features(X, n_features=1000):
    return X[:, :n_features]
select_features = FunctionTransformer(
    select_features)

normalizer = Normalizer()
svc = SVC(kernel="linear", C = 10000.0)
estimators = [
    ("select_features", select_features),
    ("normalizer", normalizer),
    ("SVM", svc)]
pipeline = Pipeline(estimators)
param_grid = [
    {"select_features__kw_args":
         [{"n_features": n} for n in [1000]]}]
loo = LeaveOneOut()
X = Xs[-1]
y_pred = []

for test_id in tqdm(range(len(y))):
    y_train = np.delete(y, test_id)
    X_train = np.delete(X, test_id, axis=0)
    grid_search = GridSearchCV(
        pipeline, param_grid=param_grid, cv=loo)
    grid_search.fit(X_train, y_train)
    best_estimator = grid_search.best_estimator_
    best_n_features = best_estimator.steps[0][1].kw_args["n_features"]
    test_pred = best_estimator.predict(X[test_id, :].reshape(1, -1))
    y_pred.append(test_pred)

y_pred = np.array(y_pred)
print("Test: {:5.2f}%".format(100*accuracy_score(y, y_pred)))
sota_y_pred = y_pred

 15%|█▍        | 16/107 [00:51<04:51,  3.20s/it]

KeyboardInterrupt: 

In [None]:
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.model_selection import GridSearchCV, LeaveOneOut
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, Normalizer
from sklearn.svm import SVC


normalizer = Normalizer()

svc = SVC(kernel="linear", C=1000.0)


estimators = [
    ("normalizer", normalizer),
    ("SVM", svc)]
pipe = Pipeline(estimators)
params = dict(normalizer = [None])
loo = LeaveOneOut()

names = [
    "Wavelets         ",
    "Eigentriads      ",
    "Scattering       ",
    "Eigenprogressions",
    "Spiral           "]

for name, X in zip(names, Xs):
    grid_search = GridSearchCV(pipe, param_grid=params, cv=loo, n_jobs=-1)
    #grid_search.fit(X[:, 5:1080], y)
    grid_search.fit(X[:, :1000], y)
    print(name + "  {:5.2f}%".format(100 * grid_search.best_score_))
    grid_search.best_estimator_

In [None]:
from sklearn.decomposition import DictionaryLearning
from sklearn.model_selection import GridSearchCV, LeaveOneOut
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, Normalizer
from sklearn.svm import SVC


normalizer = Normalizer()
coder = sklearn.decomposition.NMF(n_components=100)
svc = SVC(kernel="linear", C=1000.0)


estimators = [
    ("coder", coder),
    ("normalizer", normalizer),
    ("SVM", svc)]
pipe = Pipeline(estimators)
params = dict(normalizer = [normalizer])
loo = LeaveOneOut()

names = [
    "Wavelets         ",
    "Eigentriads      ",
    "Scattering       ",
    "Eigenprogressions",
    "Spiral           "]

for name, X in zip(names, Xs):
    grid_search = GridSearchCV(pipe, param_grid=params, cv=loo, n_jobs=-1)
    grid_search.fit(X[:,:100], y)
    print(name + "  {:5.2f}%".format(100 * grid_search.best_score_))
    grid_search.best_estimator_

In [87]:
DictionaryLearning(fit_algorithm="cd").fit(X[:, 5:1080])

KeyboardInterrupt: 

In [71]:
def mean_sparsity_ratio(X):
    l1_norms = np.linalg.norm(X, 1, axis=1)
    l2_norms = np.linalg.norm(X, 2, axis=1)
    return np.mean(l1_norms / l2_norms)

list(map(mean_sparsity_ratio, Xs))

[2.5666959, 4.5978212, 6.1332026, 16.925119, 42.431236]

In [None]:
y[49:60]