In [6]:
import torch
from pathlib import Path
from torchvision import transforms
from utils import ids_to_tokens, img_transformation
from modelArchitecture.modelCustomCNN import Encoder, Decoder
from GUIconverter.GUIconverter import GUIconverter
from IPython.display import display, HTML, Image
from vocab import Vocab
from PIL import Image

In [7]:
# Configuration parameters
model_file_path = "./ED--epoch-85--loss-0.01651.pth" 
img_crop_size = 224
seed = 42

# Load the saved model
loaded_model = torch.load(model_file_path)
vocab = loaded_model['vocab']

embed_size = 64
hidden_size = 256
num_layers = 2

encoder = Encoder(embed_size)
decoder = Decoder(embed_size, hidden_size, len(vocab), num_layers)

# Load model weights
encoder.load_state_dict(loaded_model["encoder_model_state_dict"])
decoder.load_state_dict(loaded_model["decoder_model_state_dict"])

<All keys matched successfully>

In [8]:
encoder.eval()

Encoder(
  (custom_cnn): CustomCNN(
    (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout1): Dropout(p=0.15, inplace=False)
    (dropout2): Dropout(p=0.25, inplace=False)
    (dropout3): Dropout(p=0.35, inplace=False)
    (activation): LeakyReLU(negative_slope=0.01)
    (fc1): Linear(in_features=50176, out_features=512, bias=True)
    (fc2): Linear(in_features=512, out_features=64, bias=True)
  )
  (BatchNorm): BatchNorm1d(64, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
)

In [9]:
decoder.eval()

Decoder(
  (embed): Embedding(17, 64)
  (lstm): LSTM(64, 256, num_layers=2, batch_first=True)
  (linear): Linear(in_features=256, out_features=17, bias=True)
)

In [10]:
# Load the image
image_path = './viewer2.png'  # Change to your image's path
image = Image.open(image_path).convert('RGB')
transform = img_transformation(img_crop_size)
transformed_image = transform(image)

In [11]:
# Model prediction
features = encoder(transformed_image.unsqueeze(0))  # Unsqueeze to add batch dimension
predicted_ids = decoder.sample(features).cpu().data.numpy()
prediction = ids_to_tokens(vocab, predicted_ids)  # Assuming this function converts ids to tokens

# Convert to HTML
transpiler = GUIconverter(style='style6')
predicted_html_string = transpiler.transpile(prediction, insert_random_text=True)

In [12]:
def display_html_string(html_string):
    page = HTML(html_string)
    display(page)

In [13]:
print(predicted_html_string)

<!DOCTYPE html>
<html>
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
<style>
.body { background-color: #0597F2; margin: 0px; }
.header { background-color: #3321A6; padding-left: 25px; }
nav ul { list-style: none; padding: 10px; display: flex; border-radius: 4px; margin: 0px; }
nav ul li { padding: 10px; flex: 1; text-align: center; margin: 0px; }
.flex-container { display: flex; flex-wrap: wrap; justify-content: space-between; margin: 10px 30px; }
.flex-item { padding: 20px; background-color: #F2D8EB; border-radius: 5px; margin: 10px; box-sizing: border-box; }
.half { flex: 0 0 48%; }
.full { flex: 0 0 99%; }
.quarter { flex: 0 0 23%; }
a.button { padding: 10px 15px; background-color: #0583F2; color: white; border-radius: 4px; display: inline-block; text-decoration: none; transition: transform 0.3s; }
a.button:hover { transform: scale(1.05); }
a.button-header { padding: 10px 15px; margin: 10px 30px 10px 5px; background-colo

In [14]:
display_html_string(predicted_html_string)