In [1]:
from alignment import *
import time
from dataset import *

  from .autonotebook import tqdm as notebook_tqdm


In [35]:
try:
    dinov2_encoder = load_dinov2()
    clip_encoder = load_clip()
    dinov2_encoder.eval()
    clip_encoder.eval()
    print('All Models loaded succesfully and set to eval mode')
except:
    print('Error in Loading Models')

Dinov2 Loaded Successfully!
CLIP Model Loaded Successfully!
All Models loaded succesfully and set to eval mode


In [8]:
class ChoiceEmbeddingDataset(Dataset):
    """Creates a paired modality dataset that returns text image and pc embedding (from pretrained encoders)

    Args:
        Dataset (_type_): _description_
    """
    def __init__(self, dataset_path, embd_dir):
        super().__init__()
        # For Text
        self.dataframe = pd.read_csv(dataset_path)
        self.embed_dir = embd_dir
        """ 
        data_dict = {
            "mesh_id": all_text_emb,
            "text_emb": [3, 768],
            "img_emb": [4, 384],
            "pc_emb": [8, 768],
        }
        """
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        """
        Returns:
            idx (int): Index
            tokenized_text (torch.Tensor): Tokenized text for CLIP (B, 77)
            image_tensor (torch.Tensor): preprocessed image for Dinov2 (B, 3, 518, 518)
            point_cloud (torch.Tensor): point cloud of mesh (B, 1024, 3)
        """
        mesh_id = self.dataframe.loc[idx, 'fullId']
        dict_path = os.path.join(self.embed_dir, f'{mesh_id}.pt')
        data_dict = torch.load(dict_path)
        # Retrieve the corresponding embedding using the index
        text_embedding = data_dict['text_emb']
        img_embedding = data_dict['img_emb']
        pc_embedding = data_dict['pc_emb']

        text_index = random.randint(0, text_embedding.shape[0] - 1)
        img_index = random.randint(0, img_embedding.shape[0] - 1)
        pc_index = random.randint(0, pc_embedding.shape[0] - 1)

        # Now return the embedding (and other data if needed)
        return idx, text_embedding[text_index], img_embedding[img_index], pc_embedding[pc_index]

In [9]:
dataset_path = "Data/ShapeNetSem/Datasets/final_template_1k.csv"
image_dir = "Data/ShapeNetSem/Images/final_template_1k/"
depth_dir = "Data/ProcessedData/final_template_1k_dmaps/"
embd_dir = "Embeddings/PRETRAINED/final_template_1k/"

device = "cuda" if torch.cuda.is_available() else "cpu"

dataset = ChoiceEmbeddingDataset(dataset_path, embd_dir)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

for i, batch in enumerate(dataloader):
    idx, text_embd, img_embd, pc_embd = batch
    print(idx, text_embd.shape, img_embd.shape, pc_embd.shape)
    break

tensor([598, 319, 361, 891, 540, 329, 555, 734]) torch.Size([8, 768]) torch.Size([8, 384]) torch.Size([8, 768])
