# Pretrain Downprojections

In [19]:
from umap import UMAP

In [20]:
import torch
import numpy as np

In [21]:
device = ("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

Using cuda device


In [22]:
# Import local modules from 'src/utils' as package 'utils'
import sys; sys.path.insert(0, '/mnt/src')

## Load ParallelTrajectoryDatasets

In [23]:
from pathlib import Path
from utils.file_io import read_parallel_trajectory_datasets, save_dataset
from torch.utils.data import DataLoader

In [24]:
data_path = Path("/mnt/data/")

feature_columns = [
    'left_boom_base_yaw_joint', 
    'left_boom_base_pitch_joint',
    'left_boom_main_prismatic_joint',
    'left_boom_second_roll_joint',
    'left_boom_second_yaw_joint',
    'left_boom_top_pitch_joint',
    'left_boom_ee_joint',
    'cable1_property(length,youngsmodule(bend,twist))',
    'cable2_property(length,youngsmodule(bend,twist))',
    'cable3_property(length,youngsmodule(bend,twist))'
]

label_features = [
    ('cable1_lowest_point', np.array([2], dtype=np.int64)),
    ('cable2_lowest_point', np.array([2], dtype=np.int64)),
    ('cable3_lowest_point', np.array([2], dtype=np.int64))
]

normalized_features = [
    ('cable1_property(length,youngsmodule(bend,twist))', np.array([1,2], dtype=np.int64)),
    ('cable2_property(length,youngsmodule(bend,twist))', np.array([1,2], dtype=np.int64)),
    ('cable3_property(length,youngsmodule(bend,twist))', np.array([1,2], dtype=np.int64))
]

In [25]:
data_path = Path("/mnt/data")
train_set, _, validation_set, _ = read_parallel_trajectory_datasets(data_path, 0.8, 0, 0.2, window_size=256, 
                                                                  feature_columns=feature_columns, label_features=label_features, 
                                                                  normalized_features=normalized_features)

Reading .csv files: 1it [00:00,  3.22it/s]
  return np.where(x_max != x_min, (features - x_min) / (x_max - x_min), 1).astype(dtype=np.float32)
  return np.where(x_max != x_min, (features - x_min) / (x_max - x_min), 1).astype(dtype=np.float32)


Reshaping dataframe for learning


In [26]:
features, labels = train_set[0] 
print(features.shape, labels.shape)
input_shape, output_shape = features.shape[-1], labels.shape[-1]
num_parallel_trajectories = features.shape[0]
print(f"Data shape {input_shape} / {output_shape} of total {len(train_set) + len(validation_set)} data rows!")

torch.Size([1, 256, 16]) torch.Size([1, 256, 3])
Data shape 16 / 3 of total 40 data rows!


In [27]:
pretrain_path = Path("/mnt/models/two_stage/")
data_path = pretrain_path / "data"
train_path = data_path / "train_set.pt"
validation_path = data_path / "validation_set.pt"

In [28]:
save_dataset(train_set, train_path)
save_dataset(validation_set, validation_path)

train_dataloader = DataLoader(train_set, batch_size=128, shuffle = True)
validation_dataloader = DataLoader(validation_set, batch_size=128, shuffle = True)

In [29]:
model_dim_params = []

n_neighbors = [3, 5, 7, 10, 15]
model_dim = [output_shape]

for d in model_dim:
    num_heads = []
    for i in range(2, d + 1):
        if d % i == 0: num_heads.append(i)

    for h in num_heads:
        for n in n_neighbors:
            model_dim_params.append((d, h, n))

In [30]:
from models.transformer import pretrain_downprojections
from utils.file_io import load_downprojections

In [31]:
downprojections_dir = pretrain_path / "downprojections"

In [32]:
downprojections = pretrain_downprojections(model_dim_params, train_dataloader, downprojections_dir)

Pretraining downprojections: 100%|█████████████████████████████████████| 5/5 [00:33<00:00,  6.70s/it]


In [33]:
loaded = load_downprojections(downprojections_dir)

In [34]:
features, labels = train_set[0]
diffs = []
for _, _, n_neighbors in model_dim_params:
    loaded_proj = loaded[n_neighbors]
    proj = downprojections[n_neighbors]
    feats = features.flatten(start_dim=0, end_dim=-2).cpu()
    loaded_proj_feats = loaded_proj.transform(feats)
    proj_feats = proj.transform(feats)
    diffs.append(np.sum(loaded_proj_feats - proj_feats))

print(diffs)

[-0.3397229, -0.0011856556, -0.7934592, 0.39487523, 0.02472993]


In [35]:
import shutil

In [36]:
#shutil.rmtree(pretrain_path)

print(pretrain_path.exists())

True
