In [1]:
import sys
sys.path.append("/home/jupyter-group3/reconstruction/reconstruction-deep-network")

In [2]:
import numpy as np
import os
import torch
from argparse import ArgumentParser
import yaml
import re

import reconstruction_deep_network
from reconstruction_deep_network.trainer.trainer import ModelTrainer
from reconstruction_deep_network.data_loader.custom_loader import CustomDataLoader

In [3]:
module_dir = reconstruction_deep_network.__path__[0]
root_dir = os.path.dirname(module_dir)
data_dir = os.path.join(root_dir, "data", "v1")
scans_dir = os.path.join(data_dir, "scans")
all_scans = [scan for scan in os.listdir(scans_dir) if not scan.endswith('.ipynb_checkpoints')]

In [4]:
all_scans

['8WUmhLawc2A',
 '29hnd4uzFmX',
 'EDJbREhghzL',
 'EU6Fwq7SyZv',
 'D7N2EKCX4Sj',
 'dhjEzFoUFzH',
 'e9zR4mvMWw7',
 '1pXnuDYAj8r',
 'E9uDoFAP3SH',
 'ARNzJeq3xxb',
 '8194nk5LbLH',
 'b8cTxDM8gDG',
 'B6ByNegPMKs',
 '2azQ1b91cZZ',
 '2t7WUuJeko7',
 'fzynW3qQPVF',
 '1LXtFkjw3qL',
 '5ZKStnWn8Zo',
 '5LpN3gDmAk7',
 '17DRP5sb8fy',
 '759xd9YjKW5',
 'ac26ZMwG7aT',
 '7y3sRwLe3Va',
 'D7G3Y4RVNrH',
 'aayBHfsNo7d',
 'cV4RVeZvu5T',
 'GdvgFV5R1Z5',
 '5q7pvUzZiYa',
 '2n8kARJN3HM',
 '82sE5b5pLXE']

In [5]:
def save_text_encoding(text_encoding: np.ndarray, scan_id: str, img_name: str, text_encoding_dir: str):
    dir_name = os.path.join(text_encoding_dir, scan_id)
    if not os.path.isdir(dir_name):
        os.makedirs(dir_name)
    
    file_name = os.path.join(dir_name, f"{img_name}.npz")    
    np.savez_compressed(file_name, latent = text_encoding)

In [6]:
def parse_args(args=None):
    parser = ArgumentParser()
    parser.add_argument("--data_path", dest = "data_path", type = str, default = data_dir)
    parser.add_argument("--debug", dest = "debug", type = str, default = "true")
    parser.add_argument("--id", dest = "id", type = str, default = None)
    # parser.add_argument("--file_type", dest = "file_type", type = str)
    # parser.add_argument("--dataset_mode", dest = "dataset_mode", type = str)

    args = parser.parse_args(args)
    return args

In [7]:
def main(args):
    
    main_data_dir = args.data_path
    scans_dir = os.path.join(main_data_dir, "scans")
    txt_latents_dir = os.path.join(main_data_dir, "text_embeddings")
    if not os.path.isdir(txt_latents_dir):
        os.makedirs(txt_latents_dir)

    # initialize model trainer to encode images
    model_trainer = ModelTrainer()

    house_scans = None
    if args.debug == "true":
        house_scans = ["17DRP5sb8fy"]
    else:
        if args.id == "ALL":
            house_scans = all_scans
        else:
            house_scans = [args.id]

    for itr, house_scan in enumerate(house_scans):

        dataset = CustomDataLoader(debug = False)
        dataset.metadata = dataset._limit_dataset(scan_id = house_scan)

        print(f"House Scan: {house_scan}, Number of Scenes: {len(dataset)}")

        for scene_idx in range(len(dataset)):

            scene_dict = dataset[scene_idx]
            img_paths = scene_dict["images_paths"]
            path_components = img_paths[0].split("/")
            img_name = path_components[-1].split("_")[0]
            if scene_dict["text_embedding"] is not None: continue

            prompts = scene_dict["prompt"]
            prompt_embedding = []
            for prompt in prompts:                                           
                txt_encoding = model_trainer.encode_text(prompt, "cpu")[0]
                txt_encoding = txt_encoding.numpy().squeeze()
                prompt_embedding.append(txt_encoding)
            
            prompt_embedding = np.stack(prompt_embedding, axis=0)
            save_text_encoding(prompt_embedding, house_scan, img_name, txt_latents_dir)
            print(f"Saved encoding: {house_scan}, scene: {img_name}")

In [8]:
args = parse_args(["--data_path", data_dir, "--debug", "false", "--id", "ALL"])

In [9]:
main(args)

Loading text embedders...
House Scan: 8WUmhLawc2A, Number of Scenes: 115
House Scan: 29hnd4uzFmX, Number of Scenes: 143
Saved encoding: 29hnd4uzFmX, scene: ee1a56ee68544ff9a09fbb3044bf1cb3
Saved encoding: 29hnd4uzFmX, scene: ef12b1ecc3be4693b327e30468697a9b
Saved encoding: 29hnd4uzFmX, scene: efcb9e4c410841ca8665f6cab94e943b
Saved encoding: 29hnd4uzFmX, scene: f461fdc9b5c54386bcb1350bf4072015
Saved encoding: 29hnd4uzFmX, scene: f64f236a92db4eb981cf6af366d900c4
Saved encoding: 29hnd4uzFmX, scene: f68715fa4e134a89958dad587b6cebbc
Saved encoding: 29hnd4uzFmX, scene: f7dfad43a3a6462cb96145242bd8bdd0
Saved encoding: 29hnd4uzFmX, scene: f868b98ee5334981a93aa1d3c8441f52
Saved encoding: 29hnd4uzFmX, scene: f8f72f32f6594620b17161a621b9c872
Saved encoding: 29hnd4uzFmX, scene: f9ea869fc8a04b05bef902bed791696d
Saved encoding: 29hnd4uzFmX, scene: fa30e6859ca249a78879975b0b659bfb
Saved encoding: 29hnd4uzFmX, scene: fb1eef3a8c1b457985484ed1208a2f5f
Saved encoding: 29hnd4uzFmX, scene: fd934e994f6144b3

In [21]:
data_dir

'/home/jupyter-group3/reconstruction/reconstruction-deep-network/data/v1'