In [10]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from chamferdist import ChamferDistance
tqdm.pandas()

In [11]:
wn_df = pd.read_csv("/scratch/noam/ShapeNetCore.v1/03001627.csv")

def uid_to_wnlemma(uid):
    uid_df = wn_df[wn_df["fullId"] == f"3dw.{uid.split('/')[-1]}"]
    if len(uid_df) == 0:
        return "chair"
    return ",".join([row["wnlemmas"] for i, row in uid_df.iterrows()])

df = pd.read_csv('/scratch/noam/shapetalk/language/chair_train.csv')
df.drop(columns=['l2_distance', 'chamfer_distance'], inplace=True)
df['llama3_uttarance'] = df['llama3_uttarance'].str.removesuffix('.')
df = df.rename(columns={'llama3_uttarance': 'llama3_utterance'})
df["source_wnlemmas"] = df["source_uid"].apply(lambda x: uid_to_wnlemma(x))
df["target_wnlemmas"] = df["target_uid"].apply(lambda x: uid_to_wnlemma(x))
df["intersect_wnlemmas"] = df.apply(lambda x: ",".join(set(x["source_wnlemmas"].split(",")).intersection(set(x["target_wnlemmas"].split(",")))), axis=1)

  df = pd.read_csv('/scratch/noam/shapetalk/language/chair_train.csv')


In [12]:
def build_prompt(wnlemmas, llama3_utterance):
    if pd.isna(wnlemmas) or wnlemmas == "":
        return "Unknown"
    if llama3_utterance == "Unknown" or wnlemmas == "chair":
        return llama3_utterance
    wnlemmas = wnlemmas.split(",")
    if "chair" in wnlemmas:
        wnlemmas.remove("chair")
    wnlemma = random.choice(wnlemmas)
    if wnlemma[0].lower() in 'aeiou':
        return llama3_utterance.replace("a chair", f"an {wnlemma}")
    return llama3_utterance.replace("a chair", f"a {wnlemma}")

df["llama3_wnlemma_utterance"] = df.progress_apply(lambda x: build_prompt(x["intersect_wnlemmas"], x["llama3_utterance"]), axis=1)

100%|██████████| 62392/62392 [00:00<00:00, 180140.88it/s]


In [13]:
tqdm.pandas()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
chamfer_distance = ChamferDistance().to(device)

def eval_chamfer_distance(uid1, uid2, num_points=2048):
    path1 = f'/scratch/noam/shapetalk/point_clouds/scaled_to_align_rendering/{uid1}.npz'
    path2 = f'/scratch/noam/shapetalk/point_clouds/scaled_to_align_rendering/{uid2}.npz'
    data1 = np.load(path1)
    data2 = np.load(path2)
    pc1 = torch.tensor(data1['pointcloud']).to(device)
    pc2 = torch.tensor(data2['pointcloud']).to(device)
    pc1 = pc1[torch.randperm(pc1.size(0))[:num_points]]
    pc2 = pc2[torch.randperm(pc2.size(0))[:num_points]]
    data1 = pc1.unsqueeze(0)
    data2 = pc2.unsqueeze(0)
    return chamfer_distance(data1,data2).item()

df["chamfer_distance"] = df.progress_apply(lambda x: eval_chamfer_distance(x["source_uid"], x["target_uid"]), axis=1)

100%|██████████| 62392/62392 [04:10<00:00, 249.46it/s]


In [14]:
df.to_csv("/scratch/noam/control_point_e/datasets/chair/train.csv", index=False)

In [17]:
len(df[df["llama3_wnlemma_utterance"] != "Unknown"])

7576