In [7]:
import os
import json
from pathlib import Path

def fix_clip_feats_ids():
    # Load the existing CLIP features
    with open('data/CLIP_feats.json', 'r') as f:
        clip_feats = json.load(f)
    
    # Create backup of original file
    with open('data/CLIP_feats.json.backup', 'w') as f:
        json.dump(clip_feats, f)
    
    # Process each split
    for split in ['train', 'validation', 'test']:
        split_dir = f'data/img_{split}_subset'
        if not os.path.exists(split_dir):
            continue
            
        # Get all unique model IDs from the directory structure
        unique_models = set()
        for root, _, files in os.walk(split_dir):
            if any(f.endswith('.jpg') for f in files):
                rel_path = os.path.relpath(root, split_dir)
                unique_models.add(rel_path.replace("\\", "/"))
        
        # Create a new list for this split with unique models
        new_features = []
        processed_ids = set()
        
        # Process each feature
        for feat in clip_feats[split]:
            old_id = feat['id']
            # Convert to forward slashes for consistency
            old_id = old_id.replace("\\", "/")
            
            # Only add if we haven't processed this ID before
            if old_id in unique_models and old_id not in processed_ids:
                processed_ids.add(old_id)
                new_features.append({"id": old_id, "CLIP_feat": feat['CLIP_feat']})
        
        # Update the split with deduplicated features
        clip_feats[split] = new_features
        
        print(f"Split {split}:")
        print(f"Original features: {len(clip_feats[split])}")
        print(f"Unique models found: {len(unique_models)}")
        print(f"Features after deduplication: {len(new_features)}")
    
    # Save the corrected features
    with open('data/CLIP_feats.json', 'w') as f:
        json.dump(clip_feats, f)
    
    print("\nFixed IDs in CLIP_feats.json")
    print("Original file backed up as CLIP_feats.json.backup")


fix_clip_feats_ids()



Split train:
Original features: 100
Unique models found: 5985
Features after deduplication: 100
Split validation:
Original features: 100
Unique models found: 998
Features after deduplication: 100
Split test:
Original features: 100
Unique models found: 500
Features after deduplication: 100

Fixed IDs in CLIP_feats.json
Original file backed up as CLIP_feats.json.backup


In [29]:
# Create backup of original file
with open('data/CLIP_feats.json', 'r') as f:
    clip_feats = json.load(f)

In [1]:
from config.config_adaptor import ConfigAdaptor
from dataset.clip_latent_dataset import get_dataloader

# Create config
cfg = ConfigAdaptor('train')

# Get dataloaders
train_loader = get_dataloader('train', cfg)
val_loader = get_dataloader('validation', cfg)
test_loader = get_dataloader('test', cfg)

# Example of using the dataloader
for batch in train_loader:
    clip_features = batch['clip_feature']    # Shape: (batch_size, 512)
    latent_codes = batch['latent']          # Shape: (batch_size, 256)
    model_ids = batch['id']   
    print(clip_features.shape)
    print(latent_codes.shape)
    print(model_ids)
    break



Loading CLIP features from data/CLIP_feats.json
Loading latent codes from proj_log/newDeepCAD/results/all_zs_ckpt1000.h5
Loaded 5985 samples for train
CLIP feature shape: torch.Size([24, 512])
Latent shape: torch.Size([6000, 256])
Loading CLIP features from data/CLIP_feats.json
Loading latent codes from proj_log/newDeepCAD/results/all_zs_ckpt1000.h5
Loaded 998 samples for validation
CLIP feature shape: torch.Size([24, 512])
Latent shape: torch.Size([1000, 256])
Loading CLIP features from data/CLIP_feats.json
Loading latent codes from proj_log/newDeepCAD/results/all_zs_ckpt1000.h5
Loaded 500 samples for test
CLIP feature shape: torch.Size([24, 512])
Latent shape: torch.Size([500, 256])
torch.Size([32, 512])
torch.Size([32, 256])
['0005/00058814', '0093/00937422', '0074/00744352', '0056/00569593', '0048/00487621', '0016/00164758', '0005/00054510', '0068/00684486', '0080/00802651', '0092/00928534', '0044/00449508', '0085/00852082', '0016/00163753', '0098/00984023', '0052/00520847', '0048/

In [11]:
import json
import os 
# with open('data/CLIP_feats.json', 'r') as f:
#         clip_feats = json.load(f)
test_data = clip_feats['test']
for data in test_data:
    print(data['id'])

with open('data/CLIP_feats.json', 'w') as f:
        json.dump(clip_feats, f)


0000\00006807
0000\00006807
0000\00006807
0001\00017284
0001\00017284
0001\00017284
0001\00017284
0002\00029675
0002\00029675
0002\00029675
0003\00037008
0003\00037008
0003\00037008
0003\00037008
0003\00037008
0004\00047984
0004\00047984
0005\00056405
0005\00056405
0005\00056405
0005\00056405
0006\00069198
0006\00069198
0006\00069198
0006\00069198
0006\00069198
0007\00074551
0008\00083216
0008\00083216
0009\00096364
0009\00096364
0009\00096364
0010\00107201
0010\00107201
0010\00107201
0010\00107201
0010\00107201
0010\00107201
0010\00107201
0010\00107201
0011\00118368
0011\00118368
0011\00118368
0012\00126039
0012\00126039
0012\00126039
0012\00126039
0012\00126039
0012\00126039
0013\00138202
0013\00138202
0013\00138202
0013\00138202
0013\00138202
0014\00145936
0014\00145936
0014\00145936
0014\00145936
0015\00158940
0015\00158940
0015\00158940
0015\00158940
0015\00158940
0016\00160940
0016\00160940
0017\00177495
0017\00177495
0017\00177495
0017\00177495
0017\00177495
0018\00189501
0018\0

In [4]:
import json
path = 'data/train_val_test_split.json'

with open(path, 'r') as f:
    data = json.load(f)

print(data.keys())
print(len(data['train']))
print(len(data['validation']))
print(len(data['test']))







dict_keys(['train', 'validation', 'test'])
161240
8946
8052


In [1]:
import h5py

# Open the HDF5 file
with h5py.File('proj_log/newDeepCAD/results/all_zs_ckpt1000.h5', 'r') as f:
    # Print the keys (datasets) in the file
    print("Keys:", list(f.keys()))

    # Print the shape of each dataset
    print("Shape of train_zs:", f['train_zs'].shape)
    print("Shape of validation_zs:", f['validation_zs'].shape)
    print("Shape of test_zs:", f['test_zs'].shape)

Keys: ['test_zs', 'train_zs', 'validation_zs']
Shape of train_zs: (6000, 256)
Shape of validation_zs: (1000, 256)
Shape of test_zs: (500, 256)
