In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import pandas as pd
from pathlib import Path

sys.path.append("..")

In [78]:
import tensorflow as tf
from librep.transforms import TSNE
from librep.transforms import UMAP
from librep.datasets.multimodal import TransformMultiModalDataset, ArrayMultiModalDataset
from librep.metrics.dimred_evaluator import DimensionalityReductionQualityReport

In [4]:
dataset = tf.keras.datasets.mnist.load_data(path="mnist.npz")
(train_x, train_y), (test_x, test_y) = dataset

In [7]:
train_x_reordered = train_x.reshape((60000,-1))
print('TRAIN_X_REORDERED', train_x_reordered.shape)

test_x_reordered = test_x.reshape((10000,-1))
print('TEST_X_REORDERED', test_x_reordered.shape)


TRAIN_X_REORDERED (60000, 784)
TEST_X_REORDERED (10000, 784)


In [13]:
mnist_dataset_train = ArrayMultiModalDataset(X=train_x_reordered, y=train_y, window_slices=[(0, 28*28)], 
                                             window_names=["px"])
mnist_dataset_test = ArrayMultiModalDataset(X=test_x_reordered, y=test_y, window_slices=[(0, 28*28)], 
                                             window_names=["px"])

In [14]:
transform_tsne = TSNE()
transformer = TransformMultiModalDataset(transforms=[transform_tsne])
train_applied_tsne = transformer(mnist_dataset_train)
test_applied_tsne = transformer(mnist_dataset_test)



In [175]:
metrics_reporter = DimensionalityReductionQualityReport(sampling_threshold=60000)
metrics_train_applied_tsne = metrics_reporter.evaluate([mnist_dataset_train, train_applied_tsne])
print(metrics_train_applied_tsne)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3398, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_5650/3781300316.py", line 2, in <cell line: 2>
    metrics_train_applied_tsne = metrics_reporter.evaluate([mnist_dataset_train, train_applied_tsne])
  File "/home/hubert/librep-hiaac/experiments/../librep/metrics/dimred_evaluator.py", line 70, in evaluate
    drms.append(DRMetrics(X_highdim[i[0]:i[1]], X_lowdim[i[0]:i[1]]))
  File "/usr/local/lib/python3.8/dist-packages/pyDRMetrics/pyDRMetrics.py", line 45, in __init__
    self.Dz = pd.DataFrame(pairwise_distances(dfz.values)).values
  File "/usr/local/lib/python3.8/dist-packages/sklearn/metrics/pairwise.py", line 2022, in pairwise_distances
    return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/metrics/pairwise.py", line 1563, in _parallel_pairwise
    return func(

In [77]:
metrics_reporter = DimensionalityReductionQualityReport(sampling_threshold=128)
metrics_train_applied_tsne = metrics_reporter.evaluate([mnist_dataset_train, train_applied_tsne])
print(metrics_train_applied_tsne)

{'residual variance (pearson)': 0.8301890626247299, 'residual variance (spearman)': 0.8775358629453783, 'trustworthiness': 0.8225352120810003, 'continuity': 0.8062537115864876, 'co k nearest neighbor size': 0.4635446369478756, 'local continuity meta criterion': 0.33646833802563925, 'local property': 0.46590098965171045, 'global property': 0.651624257710338}


In [36]:
metrics_reporter = DimensionalityReductionQualityReport(sampling_threshold=500)
metrics_train_applied_tsne = metrics_reporter.evaluate([mnist_dataset_train, train_applied_tsne])
print(metrics_train_applied_tsne)

{'residual variance (pearson)': 0.8628833319928074, 'residual variance (spearman)': 0.8932985862170509, 'trustworthiness': 0.9176937877653166, 'continuity': 0.8874884473916779, 'co k nearest neighbor size': 0.49067092518370076, 'local continuity meta criterion': 0.45854241112747585, 'local property': 0.4432237923261308, 'global property': 0.6448800761210081}


In [37]:
metrics_reporter = DimensionalityReductionQualityReport(sampling_threshold=1000)
metrics_train_applied_tsne = metrics_reporter.evaluate([mnist_dataset_train, train_applied_tsne])
print(metrics_train_applied_tsne)

{'residual variance (pearson)': 0.8711973356490541, 'residual variance (spearman)': 0.8974326725581581, 'trustworthiness': 0.9404433765742684, 'continuity': 0.9142095596783625, 'co k nearest neighbor size': 0.4543731231231231, 'local continuity meta criterion': 0.4383410589948666, 'local property': 0.4350409710732697, 'global property': 0.6426242381124041}


In [81]:
transform_umap = UMAP()
transformer = TransformMultiModalDataset(transforms=[transform_umap])
train_applied_umap = transformer(mnist_dataset_train)
test_applied_umap = transformer(mnist_dataset_test)

In [82]:
metrics_reporter = DimensionalityReductionQualityReport(sampling_threshold=128)
metrics_train_applied_umap = metrics_reporter.evaluate([mnist_dataset_train, train_applied_umap])
print(metrics_train_applied_umap)

{'residual variance (pearson)': 0.8789082633392178, 'residual variance (spearman)': 0.9321168529678, 'trustworthiness': 0.780808962260786, 'continuity': 0.7810681328944254, 'co k nearest neighbor size': 0.41886090093019596, 'local continuity meta criterion': 0.2917846020079596, 'local property': 0.4768176073417678, 'global property': 0.630489444423573}


In [83]:
metrics_reporter = DimensionalityReductionQualityReport(sampling_threshold=500)
metrics_train_applied_umap = metrics_reporter.evaluate([mnist_dataset_train, train_applied_umap])
print(metrics_train_applied_umap)

{'residual variance (pearson)': 0.9120860777989777, 'residual variance (spearman)': 0.9455947185002721, 'trustworthiness': 0.9261186710895912, 'continuity': 0.8660834047321697, 'co k nearest neighbor size': 0.5160091015364061, 'local continuity meta criterion': 0.48388058748018126, 'local property': 0.447971114794818, 'global property': 0.6236250835603652}


In [84]:
metrics_reporter = DimensionalityReductionQualityReport(sampling_threshold=1000)
metrics_train_applied_umap = metrics_reporter.evaluate([mnist_dataset_train, train_applied_umap])
print(metrics_train_applied_umap)

{'residual variance (pearson)': 0.920167565088619, 'residual variance (spearman)': 0.9489185806764261, 'trustworthiness': 0.9429731560570406, 'continuity': 0.8963074648487076, 'co k nearest neighbor size': 0.4733702452452453, 'local continuity meta criterion': 0.4573381811169888, 'local property': 0.4360080097788009, 'global property': 0.6212713223190469}


In [64]:
from librep.transforms.topo_ae import TopologicalDimensionalityReduction
from librep.estimators.ae.torch.models.topological_ae.topological_ae import TopologicallyRegularizedAutoencoder

In [129]:
topo_transformer = TopologicallyRegularizedAutoencoder()
optimizer = torch.optim.Adam(topo_transformer.parameters(), lr=1e-3, weight_decay=1e-5)
# topo_transformer.fit(train_x)
# autoencoder_model='DeepAE'
data_loader = torch.utils.data.DataLoader(dataset=train_x, batch_size=128, shuffle=True)
for img in data_loader:
    orig = img
    reshaped = 
    print('IMG', img.shape)
    assert 1==0

Using python to compute signatures
IMG torch.Size([128, 28, 28])


AssertionError: 

In [162]:
import torch
from torchvision import transforms as torch_transforms
import numpy as np
torch_transformer = torch_transforms.ToTensor()
X = train_x
patience = 10
max_loss = 1000
num_epochs = 1000
batch = 128
for epoch in range(num_epochs):
    topo_transformer.train()
    for img in data_loader:
        # print(img)
        # assert 1==0
        reshaped = np.reshape(img, (-1,1,28,28))
        reshaped = torch.Tensor(reshaped).float()
        loss, _ = topo_transformer(reshaped)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch:{epoch+1}, Loss:{loss.item():.4f}')
    if max_loss < loss.item():
        if patience == 0:
            break
        patience -= 1
    else:
        max_loss = loss.item()

Epoch:1, Loss:6598.7891


KeyboardInterrupt: 

In [None]:
import torch
from torchvision import transforms as torch_transforms
import numpy as np
torch_transformer = torch_transforms.ToTensor()
X = train_x
patience = 10
max_loss = 1000
num_epochs = 1000
batch = 128
for epoch in range(num_epochs):
    topo_transformer.train()
    for i in range(0,len(X), batch):
        # row = np.array([X[i]])
        # print('row',row.shape)
        orig_value = np.array(X[i:i+batch])
        # print('ORIG_VAL', orig_value.shape)
        reshaped = np.reshape(orig_value, (-1,1, 28, 28))
        # print('RESHAPED', reshaped.shape)
        # print(np.array(X[20*i:20*i + 200]).shape)
        sample = torch.Tensor(reshaped)
        # sample = torch_transformer(reshaped)
        # torch.squeeze(sample)
        loss, _ = topo_transformer(sample)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch:{epoch+1}, Loss:{loss.item():.4f}')
    if max_loss < loss.item():
        if patience == 0:
            break
        patience -= 1
    else:
        max_loss = loss.item()

Epoch:1, Loss:6032.2686


KeyboardInterrupt: 

In [163]:
# ae_model='DeepAE'
transform_topoae = TopologicalDimensionalityReduction()
transform_topoae.fit(train_x)

Using python to compute signatures
Epoch:1, Loss:7108.2979
Epoch:2, Loss:7092.2686
Epoch:3, Loss:7153.6641
Epoch:4, Loss:7176.3931
Epoch:5, Loss:7584.6494
Epoch:6, Loss:7030.2422
Epoch:7, Loss:7398.6040
Epoch:8, Loss:7235.0044
Epoch:9, Loss:7495.4717
Epoch:10, Loss:7051.5610
Epoch:11, Loss:7236.7314
Epoch:12, Loss:6753.2441
Epoch:13, Loss:7045.3691
Epoch:14, Loss:7645.3496
Epoch:15, Loss:7203.8330


<librep.transforms.topo_ae.TopologicalDimensionalityReduction at 0x7f89d7cefa30>

In [166]:
transform_topoae.fit(mnist_dataset_train.X)

Epoch:1, Loss:7023.1533


KeyboardInterrupt: 

In [171]:
transform_topoae.transform(mnist_dataset_test.X)

array([[[[ 6.76202   ,  6.76202   ],
         [ 2.9179964 ,  3.2432091 ]],

        [[42.01605   , 42.01605   ],
         [ 1.1252633 ,  3.9875753 ]],

        [[ 9.8636265 , 13.577072  ],
         [ 8.03079   ,  8.03079   ]],

        ...,

        [[11.331206  ,  5.5345616 ],
         [ 3.228788  ,  0.        ]],

        [[12.323138  ,  0.38622716],
         [12.323138  ,  2.823908  ]],

        [[27.669516  , 27.669516  ],
         [ 5.7188435 ,  5.500233  ]]],


       [[[ 6.6657114 , 18.82138   ],
         [ 0.        , 18.82138   ]],

        [[13.873988  , 18.49059   ],
         [ 8.47049   , 18.49059   ]],

        [[22.105213  , 12.938071  ],
         [22.105213  ,  1.5794593 ]],

        ...,

        [[ 6.8279905 ,  1.345787  ],
         [ 6.293439  ,  1.345787  ]],

        [[16.840761  ,  6.886689  ],
         [26.06776   , 26.06776   ]],

        [[23.972586  , 11.059387  ],
         [23.972586  , 11.059387  ]]],


       [[[ 8.110809  ,  8.429537  ],
         [ 0.458900

In [164]:
transformer = TransformMultiModalDataset(transforms=[transform_topoae])
train_applied_topoae = transformer(mnist_dataset_train)
test_applied_topoae = transformer(mnist_dataset_test)

Epoch:1, Loss:7181.7002
Epoch:2, Loss:7269.8188
Epoch:3, Loss:7055.5576
Epoch:4, Loss:6776.0493
Epoch:5, Loss:7176.3638
Epoch:6, Loss:6871.8525
Epoch:7, Loss:7178.8257
Epoch:8, Loss:6921.0229
Epoch:9, Loss:6966.6348
Epoch:10, Loss:7577.4990
Epoch:11, Loss:7870.6362
Epoch:12, Loss:7291.6445
Epoch:13, Loss:7430.4556
Epoch:14, Loss:7559.8550
TRANSFORM [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


TypeError: conv2d() received an invalid combination of arguments - got (numpy.ndarray, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)


In [5]:
trains_by_y = [[], [], [], [], [], [], [], [], [], []]

In [None]:
# ADD TUTORIAL
import numpy as np
import pandas as pd

from librep.base.data import SimpleDataset
from librep.transforms import UMAP
from librep.datasets.multimodal import TransformMultiModalDataset, ArrayMultiModalDataset

In [None]:
mnist_dataset_train = ArrayMultiModalDataset(X=train_x_reordered, y=train_y, window_slices=[(0, 28*28)], 
                                             window_names=["px"])
mnist_dataset_test = SimpleDataset(X=test_x_reordered, y=test_y)

In [None]:
transform_umap = UMAP()
transformer = TransformMultiModalDataset(transforms=[transform_umap])
transformed_dataset = transformer(mnist_dataset_train)

In [None]:
print(transformed_dataset.X.shape)

In [None]:
transformer = TransformMultiModalDataset(transforms=[transform_umap])
transformed_dataset = transformer(mnist_dataset_train.X)


In [None]:
mnist_dataset_train.X.shape

In [None]:
transformer.transforms

In [None]:
transform_umap = UMAP()
transformed_dataset = transform_umap.fit_transform(train_x_reordered)
# transformer = TransformMultiModalDataset(transforms=[transform_umap])
# transformed_dataset = transformer(mnist_dataset_train)
print(transformed_dataset)

In [None]:
class MNISTDataset(Dataset):
    def __init__(self, X: list, y: list):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return (self.X[index], self.y[index])

    # (optional) Simple interface to describe the object as string
    def __str__(self) -> str:
        return f"MNISTDataset: {len(self)} samples; {len(set(self.y))} classes"

    def __repr__(self) -> str:
        return str(self)

In [None]:
samples = [[1, 2, 3], [3, 2, 1], [0.5, 0.5, 0.5], [0, 0, 0]]
labels = [0, 0, 1, 1]

dataset = MNISTDataset(X=samples, y=labels)
dataset

In [None]:
dataset[0]

In [None]:
(train_x, train_y), (test_x, test_y) = dataset
trains_by_y = [[], [], [], [], [], [], [], [], [], []]

In [None]:
import numpy as np
from sklearn.manifold import TSNE
tsne_reducer = TSNE()
np_train_x = np.array(train_x)
print(np_train_x.shape)
reordered = np_train_x.reshape((60000,-1))
print(reordered.shape)
train_x_2d = tsne_reducer.fit_transform(np.array(reordered))

In [None]:
for index in range(len(train_y)):
    y_val = train_y[index]
    trains_by_y[y_val].append(train_x_2d[index])
# print(trains_by_y[0])

In [None]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
figure(figsize=(10, 10), dpi=80)
for i in range(len(trains_by_y)):
    data = trains_by_y[i]
    plt.plot(np.array(data)[:,0], np.array(data)[:,1], '.', markersize=10, label=str(i))
plt.legend()

In [None]:
from pathlib import Path  # For defining dataset Paths
import sys                # For include librep package

# This must be done if librep is not installed via pip,
# as this directory (examples) is appart from librep package root
sys.path.append("..")

# Third party imports
import pandas as pd
import numpy as np

# Librep imports
from librep.utils.dataset import PandasDatasetsIO          # For quick load train, test and validation CSVs
from librep.datasets.multimodal import PandasMultiModalDataset # Wrap CSVs to librep's `Dataset` interface

In [None]:
# Path for KuHar balanced view with the same activities (and labels numbers) as MotionSense
# It is assumed that the directory will contain (train.csv, test.csv and validation.csv)
kuhar_dataset_path = Path("../data/views/KuHar/balanced_motionsense_equivalent_view")

# Path for Motionsese balanced view
motionsense_dataset_path = Path("../data/views/MotionSense/balanced_view")