In [1]:
%load_ext autoreload
%autoreload 2
import sys  
sys.path.insert(1, '../')
# from utils.dataset_utils import OriginalDataset
import torch
import torch.nn as nn
print(torch.cuda. get_device_name(torch.cuda.current_device()))

NVIDIA GeForce RTX 4080


In [6]:
import os
from utils.dataset_utils import read_img, get_storage
from torchvision.datasets import VisionDataset
import numpy as np

def norm_value(s):
    # return (s + 255.0) / 510.0
    return s / 255

def denorm_value(s):
    # return (s * 510.0) - 255.0
    # print(f"{s = }")
    return torch.tensor(s * 255, dtype=torch.int8)

class OriginalDataset(VisionDataset):
    def __init__(self, data_path: str, color: bool=True):
        super(OriginalDataset, self).__init__(self)
        self.data_path = data_path
        self.color = color

    def __getitem__(self, idx: int):
        """
        this dataset returns the image corresponding to the index
        """

        if idx >= len(self) or idx < 0:
            # needed for iterator stop condition
            raise IndexError
        # the img_file_path existence check
        img_path = f'{self.data_path}/idx_{idx}.png'
        img2_path = f'{self.data_path}/idx_{idx+1}.png'
        assert os.path.exists(img_path), f"Invalid img_path: {img_path} in {self.data_path}"
        img1 = read_img(img_path, self.color)
        # img2 = read_img(img2_path, self.color)
        img1 = np.array(img1, dtype = np.float32)
        # img2 = np.array(img2, dtype = np.float32)
        # return norm_value(img2 - img1)
        return norm_value(img1)

    def __len__(self) -> int:
        dirpath, dir_names, files = next(os.walk(self.data_path))
        # return len([i for i in files if "resize" not in i]) - 1
        return len([i for i in files if "resize" not in i])

    def __str__(self):
        return f"OriginalDataset({self.data_path})"

    def get_storage_size(self, num_images):
        "returns the total storage size of the dataset"
        total_storage = 0
        for data in [self[i] for i in range(num_images)]:
            total_storage += get_storage(data)
        return total_storage

In [7]:


# sample_feature = torch.randn(1, 3, 37, 72)
# next_vector_predictor = NextVectorPredictor()
# pred = next_vector_predictor(sample_feature)
# print(f"{pred.shape = }")
# torch.save(next_vector_predictor.state_dict(), "./transformer_encoder.pt")
        

In [8]:
original_dataset = OriginalDataset('../datasets/droid_100_sample_pictures')
len_ = (original_dataset.__len__())
print(len_)

166


In [9]:
original_dataset[0].shape

(180, 320, 3)

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import math
from autoencoder_predictor_combined import CNNAutoencoder, OriginalDataset


[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/suman97/utn-msc/f5eea314a0c8498398d70e95568ea7bd



Training the model

In [None]:
model = CNNAutoencoder()
model.fit(original_dataset)

Epoch [ 1/200], Loss: 0.096654
Epoch [ 2/200], Loss: 0.039727
Epoch [ 3/200], Loss: 0.030326
Epoch [ 4/200], Loss: 0.025328
Epoch [ 5/200], Loss: 0.022478
Epoch [ 6/200], Loss: 0.019687
Epoch [ 7/200], Loss: 0.017365
Epoch [ 8/200], Loss: 0.015677
Epoch [ 9/200], Loss: 0.014368
Epoch [10/200], Loss: 0.013124
Epoch [11/200], Loss: 0.012379
Epoch [12/200], Loss: 0.011810
Epoch [13/200], Loss: 0.011329
Epoch [14/200], Loss: 0.010694


In [None]:
model.save_next_model_predictor()
model.save_decoder()

In [None]:
from autoencoder import load_model
import matplotlib.pyplot as plt

model = load_model(checkpoint="../checkpoints/lowest_loss_ae.pt")
train_loader = torch.utils.data.DataLoader(original_dataset, batch_size=4, shuffle=False)
test_image = next(iter(train_loader))[:1]
print(f"{test_image.shape = }")
with torch.no_grad():
    feature, next_feature, reconstructed = model.test(test_image.permute(0, 3, 1, 2))
    reconstructed = reconstructed.permute(0, 2, 3, 1)
print(f"{reconstructed.shape = }")
print(denorm_value(test_image[0][0][0]))
print(denorm_value(reconstructed[0][0][0]))
fig, axes = plt.subplots(1, 2, figsize=(15, 3))
axes[0].imshow(denorm_value(test_image.squeeze()))
axes[0].axis('off')
axes[1].imshow(denorm_value(reconstructed.squeeze()))
axes[1].axis('off')
print(f"{feature.shape= }, {next_feature.shape = }")

In [None]:
from autoencoder import NextVectorPredictor
next_vector_predictor = NextVectorPredictor()
next_vector_predictor.load_state_dict(torch.load("./next_vector_predictor.pt"))
decoder = torch.load("./decoder.pt")
print(next_vector_predictor)

next_feature = feature
features = [next_feature]
for i in range(len(original_dataset) - 1):
    next_feature = next_vector_predictor(next_feature)
    features.append(next_feature)
features = torch.stack(features)

In [None]:
import json
from comet_ml import Experiment
with open("/var/lit2425/jenga/suman/UTN/AIR/semester3/common/common_utils/comet_config.json", "r") as f:
    comet_config = json.load(f)
if not comet_config["local_debug"]:
    exp = Experiment(**comet_config.get("comet_cfg"))
    exp.set_name(comet_config["experiment_name"])

In [None]:
from torchvision.utils import make_grid
import torch
import numpy as np
import matplotlib.pyplot as plt

import torchvision.transforms.functional as F


plt.rcParams["savefig.bbox"] = 'tight'


def show(imgs):
    if not isinstance(imgs, list):
        imgs = [imgs]
    fig, axs = plt.subplots(ncols=len(imgs), squeeze=False)
    for i, img in enumerate(imgs):
        img = img.detach()
        img = F.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])

######
decoded_imgs = [decoder(feat) for feat in features]
decoded_imgs = [img.squeeze(dim=0) for img in decoded_imgs][:]
decoded_imgs = torch.stack(decoded_imgs)
print(decoded_imgs.shape)
grid = make_grid(decoded_imgs)

img_grid = grid
img_grid = img_grid.permute(1, 2, 0)
print(f"{img_grid.shape = }")
import json

# try:
# orig_images = torch.stack([original_dataset[i] for i in range(len(original_dataset)))
# grid = make_grid(orig_images)
name = f"next vector prediction images"
exp.log_image(img_grid, name=name)
print(f"{img_grid.shape = }")
img_grid = img_grid.permute(2, 0, 1)
show(img_grid)
# except Exception as e:
#     print("The comet experiment could not be logged in!")

In [None]:
decoded_imgs[0].shape

# @todo Billy can you do this?
we need to encode all the 166 images with the encoder, save the encoded vectors get the residual and write a function that will give us the total size of the models and the encoded vector.

In [5]:
import sys  
sys.path.insert(1, '../')
from autoencoder import load_model, denorm_value, norm_value
from utils.dataset_utils import OriginalDataset
import matplotlib.pyplot as plt
import torch
import numpy as np

model = load_model(checkpoint="../checkpoints/lowest_loss_ae.pt")
# model = load_model(checkpoint="../checkpoints/lowest_loss_ae.pt")
original_dataset = OriginalDataset('../datasets/droid_100_sample_pictures')
len_ = (original_dataset.__len__())
print("start")
all_images = []
for i in range(len_):
    img_org = original_dataset[i]
    all_images.append(img_org)
print(len(all_images))
all_images = torch.tensor(all_images, dtype=torch.float)
print(all_images.shape)
all_images = norm_value(all_images)
with torch.no_grad():
    feature, next_feature, reconstructed = model.test(all_images.permute(0, 3, 1, 2))
    reconstructed = reconstructed.permute(0, 2, 3, 1)
print(feature.shape)
# print(img_org[0][0])
print(denorm_value(all_images[0][0][0]))
print(denorm_value(reconstructed[0][0][0]))
fig, axes = plt.subplots(1, 2, figsize=(15, 3))
axes[0].imshow(denorm_value(all_images[0].squeeze()))
axes[0].axis('off')
axes[1].imshow(denorm_value(reconstructed[0].squeeze()))
axes[1].axis('off')

  return self.fget.__get__(instance, owner)()


RuntimeError: Error(s) in loading state_dict for CNNAutoencoder:
	size mismatch for encoder.0.weight: copying a param with shape torch.Size([8, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([16, 3, 3, 3]).
	size mismatch for encoder.0.bias: copying a param with shape torch.Size([8]) from checkpoint, the shape in current model is torch.Size([16]).
	size mismatch for encoder.2.weight: copying a param with shape torch.Size([16, 8, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 16, 7, 7]).
	size mismatch for encoder.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for encoder.4.weight: copying a param with shape torch.Size([8, 16, 7, 7]) from checkpoint, the shape in current model is torch.Size([3, 32, 7, 7]).
	size mismatch for encoder.4.bias: copying a param with shape torch.Size([8]) from checkpoint, the shape in current model is torch.Size([3]).
	size mismatch for decoder.0.weight: copying a param with shape torch.Size([8, 16, 7, 7]) from checkpoint, the shape in current model is torch.Size([3, 32, 7, 7]).
	size mismatch for decoder.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for decoder.2.weight: copying a param with shape torch.Size([16, 8, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 16, 7, 7]).
	size mismatch for decoder.2.bias: copying a param with shape torch.Size([8]) from checkpoint, the shape in current model is torch.Size([16]).
	size mismatch for decoder.4.weight: copying a param with shape torch.Size([8, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([16, 3, 3, 3]).

In [None]:
i, h, w, c = feature.shape
feature_pixel_channel = i * h * w * c
print(f"data size: {feature_pixel_channel * 4 / 1024 / 1024} MB")

In [None]:
reconstructed = np.array(denorm_value(reconstructed), dtype=np.int16)
all_images = np.array(denorm_value(all_images), dtype=np.int16)
diff = all_images - reconstructed
i, h, w, c = diff.shape
pixel_channel = i * h * w * c
zero_cnt = np.sum(diff == 0)
threshold = 8
threshold_cnt = np.sum(np.abs(diff) <= threshold)
print(f"total number of pixels: {pixel_channel}")
print(f"zero pixels: {zero_cnt}")
print(f"zero pixels ratio: {zero_cnt / pixel_channel}")
print(f"threshold pixels: {threshold_cnt}")
print(f"threshold pixels ratio: {threshold_cnt / pixel_channel}")



In [None]:
import sys  
sys.path.insert(1, '../')
from utils.huffman_encoding import (get_freq_dict,
                                    build_huffman_tree,
                                    generate_huffman_codes)
freq = get_freq_dict(diff)
root = build_huffman_tree(freq)
huffmanCode = generate_huffman_codes(root)

sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
encoded_bits = 0
for (char, frequency) in sorted_freq:
    # print(f"Character: {char:>3}, Code: {huffmanCode[char]:>17}, Length of Code: {len(huffmanCode[char]):>2}, Frequency: {frequency:>5}")
    encoded_bits += (len(huffmanCode[char]) * frequency)
dict_bits = (len(freq) * 2) * 4 * 8
total_bits = encoded_bits + dict_bits
print(f"{len(freq) = }")
print(f"{total_bits = }")
print(f"In Bytes  = {total_bits / 8}")
print(f"In KB     = {total_bits / 8 / 1024}")
print(f"In MB     = {total_bits / 8 / 1024 / 1024}")

In [None]:
torch.save(model.decoder.state_dict(), "../checkpoints/test.pt")

In [None]:
import fpzip
import os

data = np.array(feature, dtype=np.float32) # up to 4d float or double array
print(data.shape)
# Compress data losslessly, interpreting the underlying buffer in C (default) or F order.
compressed_bytes = fpzip.compress(data, precision=0, order='C') # returns byte string
with open("test.fpzip", "wb") as f:
    f.write(compressed_bytes)

fpsize = os.path.getsize("test.fpzip")
print(f"fpsize: {fpsize / 1024 / 1024} MB")

with open("test.fpzip", "rb") as f:
    compressed_bytes = f.read()
# Back to 3d or 4d float or double array, decode as C (default) or F order.
data_again = fpzip.decompress(compressed_bytes, order='C') 
print(data_again.shape)