In [27]:
import jupyter_fix

jupyter_fix.fix_jupyter_path()

### Import packages

In [28]:
import src.utility.symbols_loader as sl
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.nn import functional as F
import einops
import os
import numpy as np
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
from tqdm import tqdm
from pathlib import Path
import datetime
from src.architectures.course_autoencoder import Autoencoder, Encoder, Decoder


### Configure PyTorch

In [29]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True # should improve speed if input size don't change

### Set constants

In [30]:
LATENT_WIDTH = 32
path = "data/models/"

In [31]:


emnist_Autoencoder = Autoencoder(Encoder(LATENT_WIDTH), Decoder(LATENT_WIDTH))
kuzushiji_Autoencoder = Autoencoder(Encoder(LATENT_WIDTH), Decoder(LATENT_WIDTH))

emnist_Autoencoder.load_state_dict(torch.load(path + '1672367363_emnist_course_autoencoder_32.pth'))
kuzushiji_Autoencoder.load_state_dict(torch.load(path + "1672368015_kuzushiji_course_autoencoder_32.pth"))

<All keys matched successfully>

In [32]:
emnist_Encoder = emnist_Autoencoder.encoder
emnist_Encoder.to(device)
kuzushiji_Encoder = kuzushiji_Autoencoder.encoder

In [33]:
from src.architectures.letters_dataset import LettersDataset

print(sl.load_emnist_pages(5, trial='trial_4').shape)
mnist_dataset = LettersDataset(sl.load_emnist_pages(5, trial='trial_4'))

emnist_val_loader = DataLoader(
    mnist_dataset,
    batch_size=1,
    num_workers=4,
    shuffle=False,
    pin_memory=True,
)

(11688, 32, 32)


In [34]:
kuzushiji_dataset = LettersDataset(sl.load_kuzushiji_pages(5, trial='trial_4'))
print(sl.load_kuzushiji_pages(5, trial='trial_4').shape)
kuzushiji_val_loader = DataLoader(
    kuzushiji_dataset,
    batch_size=1,
    num_workers=4,
    shuffle=False,
    pin_memory=True,
)

(11750, 32, 32)


In [35]:
emnist_preds = []

with torch.no_grad() as nograd:
    for batch in tqdm(emnist_val_loader):

        images = batch.to(device)
        predictions = emnist_Encoder(images)

        emnist_preds.append(predictions.cpu())

100%|██████████| 11688/11688 [00:19<00:00, 589.24it/s]


In [36]:
kuzushiji_Encoder.to(device)

kuzushiji_preds = []

with torch.no_grad() as nograd:
    for batch in tqdm(kuzushiji_val_loader):

        images = batch.to(device)
        predictions = kuzushiji_Encoder(images)

        kuzushiji_preds.append(predictions.cpu())

100%|██████████| 11750/11750 [00:19<00:00, 614.77it/s]


In [37]:
emnist_preds_numpy = np.zeros((len(emnist_preds), LATENT_WIDTH))

iterator = 0
for tensor in emnist_preds:
    for block in range(tensor.shape[0]):
        emnist_preds_numpy[iterator] = tensor[block].numpy().copy()
        iterator += 1


In [38]:
emnist_preds_numpy.shape

(11688, 32)

In [39]:
path = "data/encoded_data/"
np.savez_compressed(path + "emnist_preds.npz", emnist_preds_numpy)

In [40]:
kuzushiji_preds_numpy = np.zeros((len(kuzushiji_preds), LATENT_WIDTH))

iterator = 0
for tensor in kuzushiji_preds:
    for block in range(tensor.shape[0]):
        kuzushiji_preds_numpy[iterator] = tensor[block].numpy().copy()
        iterator += 1

In [41]:
kuzushiji_preds_numpy.shape

(11750, 32)

In [42]:
np.savez_compressed(path + "kuzushiji_preds.npz", kuzushiji_preds_numpy)