In [None]:
import numpy as np
from glob import glob
from tqdm import tqdm 
from model import VAE
import torch
import cv2
import json 

# BSIdictionary update (PRPR)
file_path = '.\KV_468.json'

# Open and read the JSON file
with open(file_path, 'r') as file:
    KVdict = json.load(file) 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device, torch.cuda.is_available())

checkpoint_path = "./weights/lat_10.ckpt"
# checkpoint_path = "./anar_lat_25_052324.ckpt"
checkpoint = torch.load(checkpoint_path)
model = VAE(10).eval().to(device)

image_folder = './outputs-4bar/'
mechType = 'PRPR'
value = KVdict[mechType]
z_folder = './outputs-z/'
e_folder = './outputs-encoded/'
imgStrings = glob(image_folder + mechType + '/*')



In [None]:
# Read data from string 
def process_string_mech(dir, toNpy = True):
    # I do not know why but this works for windows os. You may need to change this if you are using linux/macbook
    # Zhijie: you can test using strings like: 
    # ./outputs-4bar/-0.001 2.728 5.504 -1.565 -5.632 -2.481 -8.711 9.682 1.320 -5.630 -7.171 3.601 RRRP  0.42 0.026 0.732 -0.026 0.42 2.011 0. 0. 1. .jpg
    input_string = dir.split('\\')[-1].split('.j')[0] 
    
    # Split the string by spaces
    parts = input_string.split()
    
    # Initialize lists to hold floats
    floats_before = []
    floats_after = []
    letter_string = None
    
    # Iterate over parts to separate floats and the letter string
    for part in parts:
        try:
            # Try to convert part to float
            num = float(part)
            # Add to floats_before if letter_string is not yet found
            if letter_string is None:
                floats_before.append(num)
            else:
                floats_after.append(num)
        except ValueError:
            # If conversion fails, this part is the letter string
            letter_string = part
    
    if toNpy:
        floats_before = np.array(floats_before).reshape((-1, 2))
        floats_after = np.matrix(floats_after).reshape((3, 3))
    
    # if len(floats_before) != 10: # security check... you should change this for your specific mechanism. 
    #     print('you got fucked', dir, '\n' , floats_after, '\n')
    return floats_before, letter_string, floats_after

In [None]:
setSize = 5000 # len(imgStrings)
batchSize = 1000
batchImg = []
result_zSet = []
result_featSet = []


for i in tqdm(range(setSize)): 
    # print(imgStrings[i])
    batchImg.append(cv2.imread(imgStrings[i], cv2.IMREAD_GRAYSCALE)/ 255) # This /255 works better than not doing it 
    floats_before, letter_string, floats_after = process_string_mech(imgStrings[i], toNpy = False)
    result_featSet.append(np.array(floats_before + [KVdict[letter_string]] + floats_after, dtype= float).flatten().tolist())
    if len(batchImg) >= batchSize:
        images = torch.from_numpy(np.array([batchImg])).swapaxes(0,1).float().to(device)
        x = model.encoder(images)
        mean, logvar = x[:, : model.latent_dim], x[:, model.latent_dim :]
        z = model.reparameterize(mean, logvar)
        z = z.cpu().detach().numpy()
        result_zSet.append(z)
        batchImg = []

if len(batchImg) > 0:
    images = torch.from_numpy(np.array([batchImg])).swapaxes(0,1).float().to(device)
    x = model.encoder(images)
    mean, logvar = x[:, : model.latent_dim], x[:, model.latent_dim :]
    z = model.reparameterize(mean, logvar)
    z = z.cpu().detach().numpy()
    result_zSet.append(z)
    batchImg = []

result_zSet = np.concatenate(result_zSet)



In [None]:
date = '06202024'

indices = {
    "RRRR":49, 
    "RRRP":64, 
    "RRPR":155, 
    "PRPR":175
}

batchZname = z_folder + date + '-z-' + str(indices[mechType])
batchEname = e_folder + date + '-encoded-' + str(indices[mechType])

np.save(batchZname, np.array(result_zSet))
np.save(batchEname, np.array(result_featSet))
