# Run through all training data 

In [1]:
import json 
import pandas as pd 
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

with open('prompts.json') as f:
    prompts = json.load(f)

In [2]:
attr_df = pd.read_parquet('category_attributes.parquet')



In [3]:
cat_attr_dict = {k:v for k,v in zip(attr_df['Category'].tolist(), attr_df['Attribute_list'].tolist())}

In [4]:
test_df = pd.read_csv('test_template.csv')

In [5]:
cat_list = attr_df['Category'].tolist()

In [6]:
from transformers import AutoModel, AutoProcessor

processor = AutoProcessor.from_pretrained('patrickjohncyh/fashion-clip') 
model = AutoModel.from_pretrained('./clip-finetuned_patrick_b512_lowlowlr/checkpoint-8870/').cuda()




# Test 

In [9]:
from tqdm import tqdm 
import torch 
import numpy as np 
from PIL import Image
import torch.nn.functional as F 
import time

test_df_pred = test_df.copy(deep=True)
cat_list = attr_df['Category'].tolist()

total_time = 0 

for i in tqdm(range(len(test_df))):

    type = test_df.iloc[i,1]
    model_id = cat_list.index(type)
    attr_list = cat_attr_dict[type]


    inputs_im = processor(images=Image.open('test_images/' + str(test_df.iloc[i,0]).zfill(6) + '.jpg'), return_tensors="pt") 
    inputs_im['pixel_values'] = inputs_im['pixel_values'].cuda()

    torch.cuda.synchronize()  
    start_time = time.perf_counter()

    with torch.no_grad():
        im_feats = model.get_image_features(**inputs_im)
        im_feats = F.normalize(im_feats, p=2, dim=-1)

    torch.cuda.synchronize()  
    end_time = time.perf_counter()

    total_time += (end_time-start_time)
    
    for idx, attr in enumerate(attr_list):
        
        if attr == 'ocassion':
            attr = 'occasion'
        classes = list(prompts[type][attr].keys())
        text_prompts = prompts[type][attr]
        text_prompts_list = list(text_prompts.values())
        
        inputs = processor(text=text_prompts_list, return_tensors="pt", padding=True)
        for k in inputs.keys():
            inputs[k] = inputs[k].cuda()

        torch.cuda.synchronize()  
        start_time = time.perf_counter()
    
        with torch.no_grad():
            text_feats = model.get_text_features(**inputs)
            text_feats = F.normalize(text_feats, p=2, dim=-1)

        logits_per_image = im_feats @ text_feats.T
        probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
        pred = classes[torch.argmax(probs)]

        torch.cuda.synchronize()  
        end_time = time.perf_counter()
    
        total_time += (end_time-start_time)

        test_df_pred.loc[i, f'attr_{idx+1}'] = pred 

    if i==1000:
        break

  3%|███▏                                                                                            | 1000/30205 [00:50<24:42, 19.69it/s]


In [11]:
inference_time = 1000*total_time/1000
print(f"Inference time: {inference_time:.6f} ms")


Inference time: 40.008846 ms


In [17]:
test_df_pred.to_csv('out.csv', index=None)
