# Getting the embeddings

> This notebook gets the embeddings (or latent space) from a multivariate time series 
given by a encoder (e.g., autoencoder)

In [1]:
from dvats.all import *
from tsai.data.preparation import SlidingWindow
from fastcore.all import *
import wandb
wandb_api = wandb.Api()
from yaml import load, FullLoader



[?2004l
Octave is ready <oct2py.core.Oct2Py object at 0x7f2c40378430>
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l


## Config parameters
> Configuration parameters are obtained from 'config\03-embeddings.yaml'

### Get configuration artifact

In [2]:
config, job_type = get_artifact_config_embeddings(verbose = 0)

In [3]:
dvats.config.show_attrdict(config)

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/mvp:latest
input_ar: None
cpu: False


### Show configuration artifact

In [4]:
for key, value in config.items():
    print(f"{key}: {value}")

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/mvp:latest
input_ar: None
cpu: False


## Build W&B artifact

In [5]:
import os
path = os.path.expanduser("~/work/nbs_pipeline/")
name="03a_embeddings"
os.environ["WANDB_NOTEBOOK_NAME"] = path+name+".ipynb"
runname=name
print("runname: "+runname)

runname: 03a_embeddings


In [6]:
run = wandb.init(
    entity      = config.wandb_entity,
    project     = config.wandb_project if config.use_wandb else 'work-nbs', 
    group       = config.wandb_group,
    job_type    = job_type,
    mode        = 'online' if config.use_wandb else 'disabled',
    anonymous   = 'never' if config.use_wandb else 'must',
    config      = config,
    resume      = 'allow',
    name        = runname
)

[34m[1mwandb[0m: Currently logged in as: [33mmi-santamaria[0m. Use [1m`wandb login --relogin`[0m to force relogin


## Get trained model artifact

### Build artifact selector
> Botch to use artifacts offline

In [7]:
artifacts_gettr = run.use_artifact if config.use_wandb else wandb_api.artifact

### Get the model from W&B
> Restore the encoder model and its associated configuration

In [8]:
enc_artifact = artifacts_gettr(config.enc_artifact, type='learner')

In [9]:
# TODO: This only works when you run it two timeS! WTF?
try:
    enc_learner = enc_artifact.to_obj()
except:
    enc_learner = enc_artifact.to_obj()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


## Get dataset artifact from W&B
### Restore the dataset artifact used for training the encoder. 
> Even if we do not compute the dimensionality reduction over this dataset, we need to know the metadata of the encoder training set, to check that it matches with the dataset that we want to reduce.

In [10]:
enc_run = enc_artifact.logged_by()
enc_artifact_train = artifacts_gettr(enc_run.config['train_artifact'], type='dataset')
enc_artifact_train.name

'toy:v2'

In [11]:
dvats.config.show_attrdict(enc_run.config)

r: 0.71
w: 30
MVP: {'r': 0.71, 'lm': 3, 'crit': None, 'sync': False, 'fname': 'encoder_MVP', 'dropout': 0.1, 'verbose': False, 'stateful': True, 'save_best': True, 'nan_to_num': 0, 'custom_mask': None, 'future_mask': False, 'weights_path': None, 'variable_mask': False, 'subsequence_mask': True}
freq: 1s
alias: toy
n_inp: 1
device: cuda
epochs: 100
frozen: False
mvp_ws: [10, 30]
stride: 1
Learner: {'lr': 0.001, 'wd': None, 'arch': 'tsai.models.InceptionTimePlus.InceptionTimePlus', 'moms': [0.95, 0.85, 0.95], 'path': '.', '_name': '<fastai.learner.Learner object at 0x7f9c2ff38b20>', 'metrics': None, 'opt_func': 'fastai.optimizer.Adam', 'splitter': 'tsai.models.utils.ts_splitter', 'train_bn': True, 'loss_func': {'axis': -1, '_name': {'axis': -1, '_name': 'FlattenedLoss of MSELoss()', 'is_2d': False, 'flatten': True, 'floatify': True}, 'is_2d': False, 'flatten': True, 'floatify': True}, 'model_dir': 'models', 'wd_bn_bias': False, 'default_cbs': True}
Recorder: {'add_time': True, 'train_met

### Specify the dataset artifact that we want to get the embeddings from
> If no artifact is defined, the artifact to reduce will be the one used for validate the encoder.

In [12]:
enc_run.config['batch_size']

32

In [13]:
input_ar_name = ifnone(
    config.input_ar, 
    f'{enc_artifact_train.entity}/{enc_artifact_train.project}/{enc_artifact_train.name}'
)
wandb.config.update({'input_ar': input_ar_name}, allow_val_change=True)
input_ar = artifacts_gettr(input_ar_name)
input_ar.name

'toy:v2'

In [14]:
df = input_ar.to_df()
df.head()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


Unnamed: 0,T3,T2,T1
1970-01-01 00:00:00,0.741822,0.63718,0.565117
1970-01-01 00:00:01,0.739731,0.629415,0.493513
1970-01-01 00:00:02,0.718757,0.53922,0.46935
1970-01-01 00:00:03,0.730169,0.57767,0.4441
1970-01-01 00:00:04,0.752406,0.57018,0.373008


In [15]:
df.shape

(550, 3)

In [16]:
enc_input, _ = SlidingWindow(window_len=enc_run.config['w'], 
                             stride=enc_run.config['stride'], 
                             get_y=[])(df)
enc_input.shape

(521, 3, 30)

In [17]:
enc_learner.dls.bs

1

In [18]:
enc_learner.dls

<tsai.data.core.TSDataLoaders at 0x7f2b9ef9e230>

In [19]:
import dvats.utils as ut

In [20]:
timer = ut.Time()
timer.start()

1727174150.2361288

In [31]:
embs = get_enc_embs_set_stride_set_batch_size(
    X          = enc_input, 
    enc_learn  = enc_learner, 
    stride     = enc_run.config['stride'],
    cpu        = config.cpu, 
    to_numpy   = True,
    batch_size = enc_run.config['batch_size'],
    verbose    = 1
)

--> get_enc_embs_set_stride_set_batch_size
get_enc_embs_set_stride_set_batch_size | Check CUDA | X ~  521
get_enc_embs_set_stride_set_batch_size | CUDA device id: 0
get_enc_embs_set_stride_set_batch_size | CUDA device name:  NVIDIA GeForce RTX 3090
get_enc_embs_set_stride_set_batch_size | Ensure empty cache & move 2 GPU
get_enc_embs_set_stride_set_batch_size | Set dataset from X (enc_learn does not contain dls)
get_enc_embs_set_stride_set_batch_size | Get module
get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl len 17
get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl.batch_len  1
get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl.bs  32
get_enc_embs_set_stride_set_batch_size | Get acts and grads | total_mem  25438126080
get_enc_embs_set_stride_set_batch_size | Get acts and grads | used_mem  3659776
get_enc_embs_set_stride_set_batch_size | Get acts and grads | reserved_mem  10485760
get_enc_embs_set_stride_set_batch_size | Get acts

In [22]:
timer.end()
timer.show()

[] Start: 1727174150.2361288 | End: 1727174151.3655138 | Duration: 1.129384994506836 seconds


1.129384994506836

In [32]:
embs.shape

(521, 128)

In [24]:
if config.use_wandb: 
    run.log_artifact(ReferenceArtifact(embs, 'embeddings', metadata=dict(run.config)), 
                     aliases=f'run-{run.project}-{run.id}')

In [25]:
run.finish()

VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…