# Getting the embeddings

> This notebook gets the embeddings (or latent space) from a multivariate time series 
given by a encoder (e.g., autoencoder)

In [1]:
from dvats.all import *
from tsai.data.preparation import SlidingWindow
from fastcore.all import *
import wandb
wandb_api = wandb.Api()
from yaml import load, FullLoader
import dvats.utils as ut



[?2004l
Octave is ready <oct2py.core.Oct2Py object at 0x7f444aa24ac0>
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l


## Config parameters
> Configuration parameters are obtained from 'config\03-embeddings.yaml'

### Get configuration artifact

In [2]:
config, job_type = get_artifact_config_embeddings(verbose = 0)

In [3]:
dvats.config.show_attrdict(config)

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/mvp:latest
input_ar: None
cpu: False


### Show configuration artifact

In [4]:
for key, value in config.items():
    print(f"{key}: {value}")

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/mvp:latest
input_ar: None
cpu: False


## Build W&B artifact

In [6]:
import os
path = os.path.expanduser("~/work/nbs_pipeline/")
name="03a_embeddings-hooks"
os.environ["WANDB_NOTEBOOK_NAME"] = path+name+".ipynb"
runname=name
print("runname: "+runname)

runname: 03a_embeddings-hooks


In [9]:
run = wandb.init(
    entity      = config.wandb_entity,
    project     = config.wandb_project if config.use_wandb else 'work-nbs', 
    group       = config.wandb_group,
    job_type    = job_type,
    mode        = 'online' if config.use_wandb else 'disabled',
    anonymous   = 'never' if config.use_wandb else 'must',
    config      = config,
    resume      = 'allow',
    name        = runname
)

[34m[1mwandb[0m: Currently logged in as: [33mmi-santamaria[0m. Use [1m`wandb login --relogin`[0m to force relogin


## Get trained model artifact

### Build artifact selector
> Botch to use artifacts offline

In [10]:
artifacts_gettr = run.use_artifact if config.use_wandb else wandb_api.artifact

### Get the model from W&B
> Restore the encoder model and its associated configuration

In [11]:
enc_artifact = artifacts_gettr(config.enc_artifact, type='learner')

In [12]:
# TODO: This only works when you run it two timeS! WTF?
try:
    enc_learner = enc_artifact.to_obj()
except:
    enc_learner = enc_artifact.to_obj()

[34m[1mwandb[0m: Downloading large artifact mvp:latest, 1321.41MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:3.0


## Get dataset artifact from W&B
### Restore the dataset artifact used for training the encoder. 
> Even if we do not compute the dimensionality reduction over this dataset, we need to know the metadata of the encoder training set, to check that it matches with the dataset that we want to reduce.

In [13]:
enc_run = enc_artifact.logged_by()
enc_artifact_train = artifacts_gettr(enc_run.config['train_artifact'], type='dataset')
enc_artifact_train.name

'toy:v2'

In [14]:
dvats.config.show_attrdict(enc_run.config)

r: 0.71
w: 30
freq: 1s
alias: toy
epochs: 100
mvp_ws: [10, 30]
stride: 1
time_col: None
data_cols: []
mask_sync: False
use_wandb: True
batch_size: 32
csv_config: {}
data_fpath: ~/data/toy.csv
valid_size: 0.2
mask_future: False
wandb_group: None
analysis_mode: online
artifact_name: toy
mask_stateful: True
norm_by_sample: False
train_artifact: mi-santamaria/deepvats/toy:latest
valid_artifact: None
norm_use_single_batch: False
norm_use_by_single_batch: [False]


### Specify the dataset artifact that we want to get the embeddings from
> If no artifact is defined, the artifact to reduce will be the one used for validate the encoder.

In [15]:
enc_run.config['batch_size']

32

In [16]:
input_ar_name = ifnone(
    config.input_ar, 
    f'{enc_artifact_train.entity}/{enc_artifact_train.project}/{enc_artifact_train.name}'
)
wandb.config.update({'input_ar': input_ar_name}, allow_val_change=True)
input_ar = artifacts_gettr(input_ar_name)
input_ar.name

'toy:v2'

In [17]:
df = input_ar.to_df()
df.head()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


Unnamed: 0,T3,T2,T1
1970-01-01 00:00:00,0.741822,0.63718,0.565117
1970-01-01 00:00:01,0.739731,0.629415,0.493513
1970-01-01 00:00:02,0.718757,0.53922,0.46935
1970-01-01 00:00:03,0.730169,0.57767,0.4441
1970-01-01 00:00:04,0.752406,0.57018,0.373008


In [18]:
df.shape

(550, 3)

In [19]:
enc_input, _ = SlidingWindow(window_len=enc_run.config['w'], 
                             stride=enc_run.config['stride'], 
                             get_y=[])(df)
enc_input.shape

(521, 3, 30)

In [20]:
timer = ut.Time()
timer.start()

1728121830.848305

### Trying to use hooks

In [25]:
import torch

In [26]:
class VerboseExecution(torch.nn.Module):
    def __init__(self, model: torch.nn.Module):
        try:
            super().__init__()
        except:
            print("Asumming model has already been initialized")
        self.model = model

        # Register a hook for each layer
        for name, layer in self.model.named_children():
            layer.__name__ = name
            layer.register_forward_hook(
                lambda layer, _, output: print(f"{layer.__name__}: {output}")
            )

    def forward(self, **module_kwargs):
        return self.model(**module_kwargs)

In [29]:
enc_learner_full = VerboseExecution(enc_learner)

In [41]:
? dvats.encoder.create_future_mask

[0;31mSignature:[0m  [0mdvats[0m[0;34m.[0m[0mencoder[0m[0;34m.[0m[0mcreate_future_mask[0m[0;34m([0m[0mo[0m[0;34m,[0m [0mr[0m[0;34m=[0m[0;36m0.15[0m[0;34m,[0m [0msync[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mFile:[0m      /usr/local/share/miniconda3/envs/env/lib/python3.10/site-packages/tsai/callback/MVP.py
[0;31mType:[0m      function

In [47]:
x_enc = enc_input
#r = config.r --> Incluir en la lectura de la configuración
#sync = config.mask_synk
#stateful = config.mask_stateful
r = 0.71
sync = False
stateful = True
#future_mask = dvats.encoder.create_future_mask(torch.from_numpy(x_enc[0]), r, sync=sync)
input_mask = dvats.encoder.create_subsequence_mask(torch.from_numpy(x_enc[0]), r, stateful=stateful, sync=sync)


In [50]:
enc_learner_full(x_enc = x_enc, mask = None, input_mask = input_mask)

RuntimeError: Number of dimensions of repeat dims can not be smaller than number of dimensions of tensor

In [21]:
enc_learner

MOMENTPipeline(
  (normalizer): RevIN()
  (tokenizer): Patching()
  (patch_embedding): PatchEmbedding(
    (value_embedding): Linear(in_features=8, out_features=1024, bias=False)
    (position_embedding): PositionalEmbedding()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
  

In [18]:
embs = get_enc_embs_set_stride_set_batch_size(
    X          = enc_input, 
    enc_learn  = enc_learner, 
    stride     = enc_run.config['stride'],
    cpu        = config.cpu, 
    to_numpy   = True,
    batch_size = enc_run.config['batch_size'],
    verbose    = 1
)

--> get_enc_embs_moment
get_enc_embs_moment | Using CUDA
get_enc_embs_moment | Get Outputs
get_enc_embs_moment | Get Embeddings
get_enc_embs_moment -->


In [19]:
timer.end()
timer.show()

[] Start: 1727276588.427751 | End: 1727276589.7187667 | Duration: 1.291015625 seconds


1.291015625

In [20]:
embs.shape

(521, 1024)