In [21]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from chamferdist import ChamferDistance
tqdm.pandas()

In [22]:
wn_df = pd.read_csv("/scratch/noam/ShapeNetCore.v1/03001627.csv")

def uid_to_wnlemma(uid):
    uid_df = wn_df[wn_df["fullId"] == f"3dw.{uid.split('/')[-1]}"]
    if len(uid_df) == 0:
        return "chair"
    return ",".join([row["wnlemmas"] for i, row in uid_df.iterrows()])

df = pd.read_csv('/scratch/noam/shapetalk/language/chair_test.csv')
# df.drop(columns=['l2_distance', 'chamfer_distance'], inplace=True)
df['llama3_uttarance'] = df['llama3_uttarance'].str.removesuffix('.')
df = df.rename(columns={'llama3_uttarance': 'llama3_utterance'})
df["source_wnlemmas"] = df["source_uid"].apply(lambda x: uid_to_wnlemma(x))
df["target_wnlemmas"] = df["target_uid"].apply(lambda x: uid_to_wnlemma(x))
df["intersect_wnlemmas"] = df.apply(lambda x: ",".join(set(x["source_wnlemmas"].split(",")).intersection(set(x["target_wnlemmas"].split(",")))), axis=1)

In [23]:
def build_random_wnlemma(wnlemmas):
    if pd.isna(wnlemmas) or wnlemmas == "":
        return wnlemmas
    wnlemmas = wnlemmas.split(",")
    if "chair" in wnlemmas and len(wnlemmas) > 1:
        wnlemmas.remove("chair")
    wnlemma = random.choice(wnlemmas)
    if wnlemma[0].lower() in 'aeiou':
        return f"an {wnlemma}"
    return f"a {wnlemma}"

def build_prompt(wnlemma, llama3_utterance):
    if pd.isna(wnlemma) or wnlemma == "" or llama3_utterance == "Unknown":
        return "Unknown"
    return llama3_utterance.replace("a chair", wnlemma)


df["random_wnlemma"] = df.progress_apply(lambda x: build_random_wnlemma(x["intersect_wnlemmas"]), axis=1)
df["llama3_wnlemma_utterance"] = df.progress_apply(lambda x: build_prompt(x["random_wnlemma"], x["llama3_utterance"]), axis=1)

100%|██████████| 7540/7540 [00:00<00:00, 217136.43it/s]
100%|██████████| 7540/7540 [00:00<00:00, 183761.88it/s]


In [25]:
tqdm.pandas()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
chamfer_distance = ChamferDistance().to(device)

def eval_chamfer_distance(uid1, uid2, num_points=2048):
    path1 = f'/scratch/noam/shapetalk/point_clouds/scaled_to_align_rendering/{uid1}.npz'
    path2 = f'/scratch/noam/shapetalk/point_clouds/scaled_to_align_rendering/{uid2}.npz'
    data1 = np.load(path1)
    data2 = np.load(path2)
    pc1 = torch.tensor(data1['pointcloud']).to(device)
    pc2 = torch.tensor(data2['pointcloud']).to(device)
    pc1 = pc1[torch.randperm(pc1.size(0))[:num_points]]
    pc2 = pc2[torch.randperm(pc2.size(0))[:num_points]]
    data1 = pc1.unsqueeze(0)
    data2 = pc2.unsqueeze(0)
    return chamfer_distance(data1,data2).item()

df["chamfer_distance"] = df.progress_apply(lambda x: eval_chamfer_distance(x["source_uid"], x["target_uid"]), axis=1)

100%|██████████| 62392/62392 [04:28<00:00, 232.03it/s]


In [26]:
len(df)

62392

In [27]:
len(df[df["llama3_wnlemma_utterance"] != "Unknown"])

7576

In [28]:
df.columns

Index(['workerid', 'prompt', 'utterance', 'assignmentid', 'worktimeinseconds',
       'source_model_name', 'source_object_class', 'source_dataset',
       'target_model_name', 'target_object_class', 'target_dataset',
       'is_patched', 'target_uid', 'source_uid', 'hard_context',
       'target_original_object_class', 'source_original_object_class',
       'saliency', 'tokens', 'tokens_len', 'utterance_spelled',
       'target_unary_split', 'source_unary_split', 'listening_split',
       'changeit_split', 'tokens_encoded', 'llama3_utterance',
       'source_wnlemmas', 'target_wnlemmas', 'intersect_wnlemmas',
       'random_wnlemma', 'llama3_wnlemma_utterance', 'chamfer_distance'],
      dtype='object')

In [29]:
df["random_wnlemma"].unique()

array(['a lawn chair', 'a chair', 'a side chair', '', 'a straight chair',
       'an armchair', 'a club chair', 'a swivel chair', 'a Windsor chair',
       'a lounge chair', 'a rocker', 'a vis-a-vis', 'a rex chair',
       'a bench', 'an easy chair', 'a rocking chair',
       'an overstuffed chair', 'a cantilever chair', 'a ball chair',
       'a wing chair', 'a folding chair', 'a sofa', 'a zigzag chair',
       'a lounge', 'a NO. 14 chair', 'a tete-a-tete', 'a couch',
       'a deck chair', 'a bean chair', 'a love seat', 'a lounger',
       'a park bench', 'a barcelona chair', 'a wassily chair', 'a daybed',
       'a Morris chair', 'a chaise', 'a garden chair', 'a loveseat',
       'a tulip chair', 'a wheelchair', 'a camp chair',
       'a reclining chair', 'a chaise longue', 'a beach chair',
       'a X chair', 'an Eames chair', 'a recliner'], dtype=object)

In [30]:
df["llama3_wnlemma_utterance"].unique()

array(['Unknown', 'a chair with a spindle backrest',
       'a straight chair with a thick seat', 'a chair with short legs',
       'a chair with long legs', 'a chair with a curved backrest',
       'a chair with two legs', 'a chair without armrests',
       'an armchair with four legs', 'a chair with armrests',
       'a chair with four legs', 'a chair with thick legs',
       'a lounge chair without armrests', 'a chair with thin legs',
       'a chair with a short backrest', 'a straight chair with two legs',
       'an armchair with a short backrest',
       'a straight chair with thick legs',
       'a chair without a legs strecher', 'a chair with a thick seat',
       'a chair with a wide seat',
       'a straight chair with a solid backrest',
       'a side chair without armrests', 'a chair with a thin seat',
       'a vis-a-vis with thin legs', 'an armchair with a solid backrest',
       'a chair with a solid backrest',
       'a side chair with a rounded backrest',
       'an ar

In [31]:
df.to_csv("/scratch/noam/control_point_e/datasets/chair/train.csv", index=False)

In [10]:
df['llama3_utterance'].unique()

array(['a chair with short legs', 'a chair with a long backrest',
       'Unknown', 'a chair with four legs', 'a chair without armrests',
       'a chair with thick legs', 'a chair with a rounded backrest',
       'a chair with a solid backrest',
       'a chair with a straight backrest', 'a chair with a thick seat',
       'a chair with a thin seat', 'a chair with armrests',
       'a chair with thin legs', 'a chair with a short backrest',
       'a chair with a curved backrest', 'a chair with long legs',
       'a chair with two legs', 'a chair with a spindle backrest',
       'a chair with a wide seat', 'a chair with a legs strecher',
       'a chair with a squared backrest',
       'a chair without a legs strecher', 'a chair with a narrow seat'],
      dtype=object)

In [36]:
new_df = df[df['llama3_utterance'].isin(['a chair without armrests', 'a chair with armrests'])]

In [37]:
new_df['llama3_utterance'].unique()

array(['a chair without armrests', 'a chair with armrests'], dtype=object)

In [38]:
headers = ['negative_uid', 'positive_uid', 'wnlemma', 'negative_utterance', 'positive_utterance']
rows = []
for i, row in new_df.iterrows():
    if row['llama3_utterance'] == 'a chair without armrests':
        negative_uid = row['target_uid']
        positive_uid = row['source_uid']
        wnlemma = row['random_wnlemma']
        negative_utterance = row['llama3_wnlemma_utterance']
        positive_utterance = row['llama3_wnlemma_utterance'].replace("without armrests", "with armrests")
    elif row['llama3_utterance'] == 'a chair with armrests':
        negative_uid = row['source_uid']
        positive_uid = row['target_uid']
        wnlemma = row['random_wnlemma']
        negative_utterance = row['llama3_wnlemma_utterance'].replace("with armrests", "without armrests")
        positive_utterance = row['llama3_wnlemma_utterance']
    rows.append([negative_uid, positive_uid, wnlemma, negative_utterance, positive_utterance])
new_df = pd.DataFrame(rows, columns=headers)
new_df = new_df[new_df["negative_utterance"] != "Unknown"]
new_df.to_csv("/scratch/noam/control_point_e/datasets/chair_armrests_lora/val.csv", index=False)



In [41]:
new_df['wnlemma'].unique()

array(['a chair', 'a swivel chair', 'a rocker', 'an armchair',
       'a club chair', 'a side chair', 'an easy chair', 'a garden chair',
       'a rex chair', 'a zigzag chair', 'a lounge chair', 'a ladder-back'],
      dtype=object)