In [1]:
#| export
print_flag                    = None
check_memory_usage            = None
time_flag                     = None
show_plots                    = None
reset_kernel                  = None
check_parameters              = True
cuda_device                   = None
enc_artifact                  = "mvp:v93"

In [2]:
#| export
print_flag                    = True  if print_flag is None else print_flag
check_memory_usage            = True  if check_memory_usage is None else check_memory_usage
time_flag                     = True  if time_flag is None else time_flag
show_plots                    = False if show_plots is None else show_plots
reset_kernel                  = False  if reset_kernel is None else reset_kernel
cuda_device                   = 0 if  cuda_device is None else cuda_device
enc_artifact                  = "MVP:latest" if enc_artifact is None else enc_artifact

In [3]:
if check_parameters:
    print("--- Check parameters ---")
    print(
        "print_flag:", print_flag,
        "check_memory_usage", check_memory_usage,
        "time_flag:", time_flag,
        "show_plots:",show_plots,
        "reset_kernel:",reset_kernel,
        "cuda_device", cuda_device,
        "enc_artifact", enc_artifact
)

--- Check parameters ---
print_flag: True check_memory_usage True time_flag: True show_plots: False reset_kernel: False cuda_device 0 enc_artifact mvp:v93


In [4]:
#| export
# This is only needed if the notebook is run in VSCode
import sys
import dvats.utils as ut
if '--vscode' in sys.argv:
    print("Executing inside vscode")
    ut.DisplayHandle.update = ut.update_patch

In [5]:
#| export
import dvats.config as cfg_

# Getting the embeddings

> This notebook gets the embeddings (or latent space) from a multivariate time series 
given by a encoder (e.g., autoencoder)

In [6]:
from dvats.all import *
from tsai.data.preparation import SlidingWindow
from fastcore.all import *
import wandb
wandb_api = wandb.Api()
from yaml import load, FullLoader

[?2004l
Octave is ready <oct2py.core.Oct2Py object at 0x7f3a829e2e90>
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l


## Config parameters
> Configuration parameters are obtained from 'config\03-embeddings.yaml'

### Get configuration artifact

In [7]:
config, job_type = get_artifact_config_embeddings(print_flag = False)

In [8]:
config.enc_artifact = config.enc_artifact[:config.enc_artifact.rfind('/')]+"/"+enc_artifact

In [9]:
config.enc_artifact

'mi-santamaria/deepvats/mvp:v93'

In [10]:
cfg_.show_attrdict(config)

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/mvp:v93
input_ar: None
cpu: False


### Show configuration artifact

In [11]:
for key, value in config.items():
    print(f"{key}: {value}")

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/mvp:v93
input_ar: None
cpu: False


## Build W&B artifact

In [12]:
import os
path = os.path.expanduser("~/work/nbs_pipeline/")
name="03a_embeddings"
os.environ["WANDB_NOTEBOOK_NAME"] = path+name+".ipynb"
runname=name
print("runname: "+runname)

runname: 03a_embeddings


In [13]:
run = wandb.init(
    entity      = config.wandb_entity,
    project     = config.wandb_project if config.use_wandb else 'work-nbs', 
    group       = config.wandb_group,
    job_type    = job_type,
    mode        = 'online' if config.use_wandb else 'disabled',
    anonymous   = 'never' if config.use_wandb else 'must',
    config      = config,
    resume      = 'allow',
    name        = runname
)

[34m[1mwandb[0m: Currently logged in as: [33mmi-santamaria[0m. Use [1m`wandb login --relogin`[0m to force relogin


## Get trained model artifact

### Build artifact selector
> Botch to use artifacts offline

In [14]:
artifacts_gettr = run.use_artifact if config.use_wandb else wandb_api.artifact

### Get the model from W&B
> Restore the encoder model and its associated configuration

In [15]:
enc_artifact = artifacts_gettr(config.enc_artifact, type='learner')

In [16]:
# TODO: This only works when you run it two timeS! WTF?
try:
    enc_learner = enc_artifact.to_obj()
except:
    enc_learner = enc_artifact.to_obj()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


## Get dataset artifact from W&B
### Restore the dataset artifact used for training the encoder. 
> Even if we do not compute the dimensionality reduction over this dataset, we need to know the metadata of the encoder training set, to check that it matches with the dataset that we want to reduce.

In [17]:
enc_run = enc_artifact.logged_by()
enc_artifact_train = artifacts_gettr(enc_run.config['train_artifact'], type='dataset')
enc_artifact_train.name

'toy:v2'

### Specify the dataset artifact that we want to get the embeddings from
> If no artifact is defined, the artifact to reduce will be the one used for validate the encoder.

In [18]:
input_ar_name = ifnone(
    config.input_ar, 
    f'{enc_artifact_train.entity}/{enc_artifact_train.project}/{enc_artifact_train.name}'
)
wandb.config.update({'input_ar': input_ar_name}, allow_val_change=True)
input_ar = artifacts_gettr(input_ar_name)
input_ar.name

'toy:v2'

In [19]:
df = input_ar.to_df()
df.head()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


Unnamed: 0,T3,T2,T1
1970-01-01 00:00:00,0.741822,0.63718,0.565117
1970-01-01 00:00:01,0.739731,0.629415,0.493513
1970-01-01 00:00:02,0.718757,0.53922,0.46935
1970-01-01 00:00:03,0.730169,0.57767,0.4441
1970-01-01 00:00:04,0.752406,0.57018,0.373008


In [20]:
df.shape

(550, 3)

In [21]:
enc_input, _ = SlidingWindow(window_len=enc_run.config['w'], 
                             stride=enc_run.config['stride'], 
                             get_y=[])(df)
enc_input.shape

(521, 3, 30)

In [22]:
embs = get_enc_embs(enc_input, enc_learner, cpu=config.cpu, to_numpy=True)

--> Check CUDA
--> Ensure empty cache
--> Use CUDA |Get enc embs GPU 
CUDA está disponible
Dispositivo CUDA actual:  0
Nombre del dispositivo CUDA actual:  NVIDIA GeForce RTX 3090
--> Set dataset from X (enc_learn does not contain dls)
--> Get module
--> Get enc embs bs:  1
--> Concat
Fit in GPU
--> reduce
--> 2 numpy


In [23]:
if config.use_wandb: 
    run.log_artifact(ReferenceArtifact(embs, 'embeddings', metadata=dict(run.config)), 
                     aliases=f'run-{run.project}-{run.id}')

In [24]:
run.finish()

VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [None]:
from dvats.imports import beep
beep(1)
if reset_kernel:
    import os
    os.exit(00)