# Verify the Conv-VAE sensorprocessing

Load a pre-trained model specified by an experiment/run trained by Train-Conv-VAE

This notebook runs a number of visualizations that allow to illustrate the performance of the trained encoding. The verification here primarily happens through visual observation. 

In [1]:
import sys
sys.path.append("..")
# adding the Julian-8897-Conv-VAE-PyTorch into the path

from exp_run_config import Config
Config.PROJECTNAME = "BerryPicker"

sys.path.append(Config()["conv_vae"]["code_dir"])
# from encoding_conv_vae.conv_vae import latest_json_and_model

from sensorprocessing import sp_conv_vae
from sensorprocessing import sp_helper
from demonstration.demonstration import Demonstration

import matplotlib.pyplot as plt
# import pathlib
import random

import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# At some point in the development, this hack was necessary for some reason. 
# It seems that as of Feb 2025, the code runs on Windows and Linux without it.
#temp = pathlib.PosixPath
#pathlib.PosixPath = pathlib.WindowsPath

***ExpRun**: Loading pointer config file:
	C:\Users\lboloni\.config\BerryPicker\mainsettings.yaml
***ExpRun**: Loading machine-specific config file:
	G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\settings-LotziYoga.yaml
Using device: cuda


In [2]:
# Specify the experiment
experiment = "sensorprocessing_conv_vae"
run = "proprio_128" 
exp = Config().get_experiment(experiment, run)
print(exp)
# import yaml
# import textwrap
# text = yaml.dump(exp.values)
# text = textwrap.indent(text, prefix="    ")
# text = "Experiment:" + "\n" + text
# print(text)

sp = sp_conv_vae.ConvVaeSensorProcessing(exp, device)

***ExpRun**: Configuration for exp/run: sensorprocessing_conv_vae/proprio_128 successfully loaded
Experiment:
    data_dir: c:\Users\lboloni\Documents\Code\_TempData\BerryPicker-experiments\sensorprocessing_conv_vae\proprio_128
    epochs: 5
    exp_run_sys_dep_file: G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\experiment-config\LotziYoga\sensorprocessing_conv_vae\proprio_128_sysdep.yaml
    exp_run_sys_indep_file: C:\Users\lboloni\Documents\Code\_Checkouts\BerryPicker\src\experiment_configs\sensorprocessing_conv_vae\proprio_128.yaml
    experiment_name: sensorprocessing_conv_vae
    json_template_name: conv-vae-config-default.json
    latent_size: 128
    model_checkpoint: checkpoint-epoch5.pth
    model_dir: models
    model_name: VAE_Robot
    model_subdir: 0517_195655
    run_name: proprio_128
    save_period: 5
    subrun_name: null
    time_started: '2025-05-17 19:59:54'
    training_data:
    - - freeform
      - '2024_12_26__16_40_20'
    - - freeform
      

In [3]:
print(exp["model_subdir"])
print(exp["model_checkpoint"])

0517_195655
checkpoint-epoch5.pth


## Verify the Conv-VAE by visual reconstruction
We can verify a Conv-VAE model visually based on its ability to recover the input image from the encoding. The intuition here would be that information that is lost during the recovery is not present in the encoding, and thus it won't be usable by the algorithms using this encoding either.

In [None]:
def visualize_VAE(sp, picture_name, axoriginal, axreconstr):
    """Helper function to show the original and the reconstruction in fields of a picture."""
    transform = sp_helper.get_transform_to_sp()
    input, image = sp_helper.load_picturefile_to_tensor(picture_name, transform)
    # Running the input on the output
    output, mu, logvar = sp.model(input)
    # Output: the visual reconstruction
    output_for_pic = output[0].cpu().permute(1, 2, 0).detach().numpy()
    # Showing the input and the reconstruction    
    axoriginal.imshow(image)
    axoriginal.set_title("Original")
    axreconstr.imshow(output_for_pic)
    axreconstr.set_title("Reconstruct")
    return output, mu, logvar

In [20]:
demos = []
# load the demonstrations specified in the experiment validation data
for val in exp["validation_data"]:
    run = val[0]
    demo_name = val[1]
    exp_demo = Config().get_experiment("demonstration", run)
    demo = Demonstration(exp_demo, demo_name)
    demos.append(demo)

# Choose n pictures from the validation set
n = 6
demo = demos[0]
images = []
imagefiles = []
for i in range(demo.metadata["maxsteps"]):
    rnd = random.randint(0, demo.metadata["maxsteps"] - 1)
    imagefiles.append(demo.get_image_path(rnd))
    image, _ = demo.get_image(rnd)
    images.append(image)


***ExpRun**: Experiment default config C:\Users\lboloni\Documents\Code\_Checkouts\BerryPicker\src\experiment_configs\demonstration\_demonstration.yaml was empty, ok.
***ExpRun**: No system dependent experiment file
	 G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\experiment-config\LotziYoga\demonstration\freeform_sysdep.yaml,
	 that is ok, proceeding.
***ExpRun**: Configuration for exp/run: demonstration/freeform successfully loaded
***ExpRun**: Experiment default config C:\Users\lboloni\Documents\Code\_Checkouts\BerryPicker\src\experiment_configs\demonstration\_demonstration.yaml was empty, ok.
***ExpRun**: No system dependent experiment file
	 G:\My Drive\LotziStudy\Code\PackageTracking\BerryPicker\settings\experiment-config\LotziYoga\demonstration\freeform_sysdep.yaml,
	 that is ok, proceeding.
***ExpRun**: Configuration for exp/run: demonstration/freeform successfully loaded


In [None]:
from sensorprocessing.sp_helper import get_transform_to_sp, load_picturefile_to_tensor


for imagefile in imagefiles:
    print(imagefile)
    z = sp.process_file(imagefile)

    print(z)


    #sensor_readings, _ = load_picturefile_to_tensor(imagefile, self.transform)
    from torchvision import transforms

    #transform = transforms.Compose([
    #      transforms.ToTensor(),
    #    ])

    # FIXME: the reason why the SP process is not working directly is because 
    # it needs a specific transform, the get_transform_to_robot

# THIS NEEDS TO BE CLEANED UP

    transform = get_transform_to_sp()
    sensor_readings, _ = load_picturefile_to_tensor(imagefile, transform)
    output = sp.process(sensor_readings)
    print(output)

    # image, _ = demo.get_image(rnd)
    # images.append(image)

c:\Users\lboloni\Documents\Code\_TempData\BerryPicker-experiments\demonstration\freeform\2024_12_26__16_40_20\00355_dev2.jpg
[-0.08704153 -0.36291322 -0.3669641  -0.17649347 -0.26547924 -0.17691427
 -0.11555507  0.03489242  0.03677555 -0.21899742  0.18221554  0.01800795
 -0.01776381  0.04714919 -0.27215204  0.1145191  -0.14728534 -0.11139791
 -0.01535051 -0.05899965 -0.28400347  0.27098855 -0.23778377  0.11740203
  0.23907788  0.21675077  0.00949249  0.25590214  0.16167751 -0.43627515
  0.18450563  0.16417779 -0.16075853  0.43970835  0.0644059   0.15498716
 -0.35716334 -0.12747672 -0.08817744 -0.07776044 -0.22514933  0.17943376
 -0.24929316 -0.07954757 -0.14556015 -0.14822924 -0.24588606 -0.1868952
  0.03632686 -0.14902642 -0.01698648 -0.14394058  0.02881269  0.05762321
 -0.17272857  0.02346824  0.10411291 -0.13534006 -0.60246736 -0.12045183
  0.02377321  0.06713808 -0.0374112  -0.28059065 -0.08207196 -0.18089506
  0.26154625  0.04676932  0.22142416 -0.2678932  -0.04356472 -0.02599456


In [None]:
# This cell simply verifies whether we can run the sensorprocessing on files
for image in images:


    z = sp.process(image)
    print(f"The encoding is\n {z}")

ValueError: expected 4D input (got 3D input)

In [None]:
# This cell visualizes the original and reconstructed pictures by going inside 
# the sensorprocessing object and accessing the model
fig, axs = plt.subplots(2, n, figsize=(10, 5))
for i in range(n):
    output, mu, logvar = visualize_VAE(sp, randomjpg[i], axs[0,i], axs[1,i])
    print(f"Pictures{i}\nmu={mu}\nlogvar={logvar}")

## Reconstruction from noisy latent encoding


In [None]:
# Visualize the original and reconstructed pictures 
fig, axs = plt.subplots(3, n, figsize=(10, 5))
for i in range(n):
    output, mu, logvar = visualize_VAE(sp, randomjpg[i], axs[0,i], axs[1,i])
    # print(f"Pictures{i}\nmu={mu}\nlogvar={logvar}")
    # this samples a new z with its logvar
    z2 = sp.model.reparameterize(mu, logvar)
    # adding some noise to the encoding (FIXME: add random noise)
    for j in range(exp["latent_size"]):
        z2[0][j] = z2[0][j] + 0.001
    #output2 = sp.model.decode(z2)
    output2 = sp.model.decode(mu)
    output_for_pic2 = output2[0].cpu().permute(1, 2, 0).detach().numpy()
    axs[2,i].imshow(output_for_pic2)
    axs[2,i].set_title("Noised")


# Generating random samples from the model

In [None]:

# this can be actually done by just calling sp.model.sample!
samples = sp.model.sample(num_samples = 25, current_device=device)
fig, axs = plt.subplots(5, 5, figsize=(10, 10))
i = 0
for x in range(0, 5):
    for y in range(0, 5):
        output_for_pic = samples[i].cpu().permute(1, 2, 0).detach().numpy()
        axs[x][y].imshow(output_for_pic)
        i += 1
