<a href="https://colab.research.google.com/github/rpimaster/OAI/blob/main/02-autoencoder/decoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from ipywidgets import interact, IntSlider
from IPython.display import display, clear_output

In [2]:
# architecture
class Autoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(8, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(8, 8, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(8, 8, kernel_size=3),
            nn.Sigmoid(),
            nn.Flatten()
        )
        self.decoder = nn.Sequential(
            nn.Unflatten(1, (8, 3, 3)),
            nn.ConvTranspose2d(8, 8, kernel_size=3),
            nn.ReLU(),
            nn.ConvTranspose2d(8, 8, kernel_size=3),
            nn.ReLU(),
            nn.Upsample(scale_factor=2, mode='nearest'),
            nn.Conv2d(8, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(8, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Upsample(scale_factor=2, mode='nearest'),
            nn.Conv2d(16, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 1, kernel_size=3, padding=1),
            nn.Sigmoid()
        )
    def forward(self, x):
        # apply encoder
        features = self.encoder(x)
        # apply decoder
        return self.decoder(features)
    def __str__(self):
        return str(self.encoder)+str(self.decoder)

In [4]:
autoencoder = Autoencoder()

In [5]:
!wget http://agentspace.org/download/pytorch_mnist_autoencoder_model.pth
model_name = 'pytorch_mnist_autoencoder_model.pth'
#from google.colab import files
#print('upload',model_name)
#files.upload()
autoencoder.load_state_dict(torch.load(model_name))

--2025-11-05 15:32:15--  http://agentspace.org/download/pytorch_mnist_autoencoder_model.pth
Resolving agentspace.org (agentspace.org)... 62.168.101.9
Connecting to agentspace.org (agentspace.org)|62.168.101.9|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://www.agentspace.org/download/pytorch_mnist_autoencoder_model.pth [following]
--2025-11-05 15:32:15--  https://www.agentspace.org/download/pytorch_mnist_autoencoder_model.pth
Resolving www.agentspace.org (www.agentspace.org)... 62.168.101.9
Connecting to www.agentspace.org (www.agentspace.org)|62.168.101.9|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 52651 (51K)
Saving to: ‘pytorch_mnist_autoencoder_model.pth.1’


2025-11-05 15:32:16 (156 KB/s) - ‘pytorch_mnist_autoencoder_model.pth.1’ saved [52651/52651]



<All keys matched successfully>

In [6]:
decoder = autoencoder.decoder
decoder.eval()

Sequential(
  (0): Unflatten(dim=1, unflattened_size=(8, 3, 3))
  (1): ConvTranspose2d(8, 8, kernel_size=(3, 3), stride=(1, 1))
  (2): ReLU()
  (3): ConvTranspose2d(8, 8, kernel_size=(3, 3), stride=(1, 1))
  (4): ReLU()
  (5): Upsample(scale_factor=2.0, mode='nearest')
  (6): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU()
  (8): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU()
  (10): Upsample(scale_factor=2.0, mode='nearest')
  (11): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): ReLU()
  (13): Conv2d(16, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (14): Sigmoid()
)

In [7]:
from google.colab import files
print('upload inp00009.png')
_ = files.upload()

upload inp00009.png


Saving inp00009.png to inp00009.png


In [8]:
sample_image = cv2.imread('inp00009.png',cv2.IMREAD_GRAYSCALE)
sample_blob = cv2.dnn.blobFromImage(sample_image,1.0/255.0)
sample_features = autoencoder.encoder(torch.tensor(sample_blob))
features = sample_features.squeeze(0).detach().cpu().numpy()

In [9]:
print(sample_features.shape)

torch.Size([1, 72])


In [10]:
print(sample_features[0].detach())

tensor([0.5754, 0.6163, 0.5524, 0.6218, 0.4943, 0.4683, 0.4881, 0.2794, 0.3320,
        0.4780, 0.4828, 0.3566, 0.5752, 0.5284, 0.4273, 0.4236, 0.4648, 0.5302,
        0.3240, 0.3628, 0.5027, 0.3343, 0.4841, 0.6230, 0.3785, 0.6230, 0.5662,
        0.4372, 0.3531, 0.3809, 0.6386, 0.6662, 0.4974, 0.5390, 0.6289, 0.4678,
        0.3126, 0.4544, 0.4570, 0.3673, 0.4026, 0.4549, 0.3876, 0.5876, 0.5772,
        0.5572, 0.5634, 0.3381, 0.6944, 0.5923, 0.3163, 0.4991, 0.5512, 0.3083,
        0.4340, 0.6226, 0.5650, 0.4831, 0.4854, 0.3728, 0.4570, 0.3826, 0.3475,
        0.3725, 0.4741, 0.4663, 0.5897, 0.6974, 0.6091, 0.4557, 0.5075, 0.3970])


In [11]:
# Keep a copy of features for updating
features = sample_features[0].clone().detach()
last_index = -1

In [12]:
# Function to update and display the generated image
def update_latent(index=0, value=64):
    global features, last_index
    if last_index != index:
        last_index = index
        value_slider.value = int(features[index].item()*127)
    else:
        features[index] = value/127.0
    features = features
    with torch.no_grad():
        coded = features.unsqueeze(0)
        decoded = decoder(coded).detach().squeeze(0).squeeze(0)
    decoded = (decoded.numpy()*255).astype(np.uint8)
    decoded_resized = cv2.resize(decoded, (420, 420), interpolation=cv2.INTER_NEAREST)
    clear_output(wait=True)
    fig, ax = plt.subplots(figsize=(6,6))
    ax.imshow(decoded_resized, cmap='gray')
    ax.axis('off')
    display(fig)
    plt.close(fig)

In [13]:
# Interactive sliders for exploring the latent space
latent_dim = features.shape[0]
print(latent_dim)
index_slider = IntSlider(min=0, max=latent_dim-1, step=1, value=0, description='Index')
value_slider = IntSlider(min=0, max=127, step=1, value=int(features[0].item()*127), description='Value')

72


In [14]:
def interactive_update(index, value):
    update_latent(index, value)

In [15]:
interact(interactive_update, index=index_slider, value=value_slider)

interactive(children=(IntSlider(value=0, description='Index', max=71), IntSlider(value=73, description='Value'…