In [17]:
import torch
import sys
sys.path.insert(0, '..')
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"/model")
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from sklearn.metrics import top_k_accuracy_score
from sklearn.model_selection import train_test_split
import pickle
from tqdm import tqdm
import gc
import os
import pickle
import cv2
import faiss
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
import time
import os
from models.blip_itm import blip_itm
from PIL import Image


In [18]:
class CONFIG:
    device = "cuda" if torch.cuda.is_available() else "cpu"
    batch_size  = 84
    test_batch_size = 64
    epoch = 5
    learning_rate = 5e-6
    betas = (0.9,0.98)
    epsilion = 1e-6
    weight_decay = 0.05
    image_data_dir = "/home/ubuntu/Downloads/Auto-retail-syndata-release"
    image_data_folder = "syn_image_train"
    df_path = "/home/ubuntu/Desktop/CVPR 2022 AliProducts Challenge/code/eda/train_data_v1.csv"
    test_image_data_dir = "../../"
    test_image_data_folder = "val_imgs"
    test_df_path = "/home/ubuntu/Desktop/CVPR 2022 AliProducts Challenge/code/data/val_data_map.csv"
    image_col = "full_path"
    label_col = "caption"
    test_image_col = "product"
    test_label_col = "caption"  
    global_random_state = 101
    model_name = "ViT-B/32"
    group = model_name+'-'+str(int(time.time()))
    wandb_api_key = os.environ.get("WANDB_API_KEY")
    

In [19]:
# col_to_test =  [c for c in clean_df.columns.values if "caption" in c] + [c for c in clean_df.columns.values if "step" in c]
col_to_test =  "caption"

In [20]:
col_to_test

'caption'

In [21]:
image_size = 384
preprocess = transforms.Compose([
        transforms.Resize((image_size,image_size),interpolation=InterpolationMode.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
        ]) 


In [22]:



class ALIPRODUCT_DATASET():
    def __init__(self,images,texts,dir,folder,preprocess,tokenize=True,test=False):
        self.images = images
        self.texts = texts
        self.dir = dir
        self.folder = folder
        self.preprocess = preprocess
        self.test = test
        self.tokenize = tokenize
        # self.z_shot_labels,self.z_shot_label_map = self.create_z_shot_labels(self.df["label_name"].unique(),self.df["label"].unique())
    def __len__(self):
        return(len(self.images))
    def __getitem__(self,idx):
        image_name = self.images[idx]
        text = self.texts[idx]
        if self.test:
            image = cv2.imread(f"{self.dir}/{self.folder}/{image_name}")
        else:
            image = cv2.imread(image_name)
        image = Image.fromarray(image).convert("RGB")
        image = self.preprocess(image)
        if self.tokenize:
             text_caption = clip.tokenize(text,truncate=True)
        else:
            text_caption = text
        return image,text_caption
        
def prepare_data(df_path,image_data_dir,image_data_folder,image_col,label_col,batch_size,preprocess,random_state,split_size=0.2,test=False,use_all=False,tokenize=True):
    df = pd.read_csv(df_path)
    images = df[image_col].values.tolist()
    texts = df[label_col].values.tolist()
    if test:
        test_data = ALIPRODUCT_DATASET(images,texts,image_data_dir,image_data_folder,preprocess,test=test,tokenize=tokenize)
        test_dataloader = DataLoader(test_data,batch_size,num_workers=12,pin_memory=True)
        return test_dataloader,df

    else:
        if use_all:
            train_data =  ALIPRODUCT_DATASET(images,texts,image_data_dir,image_data_folder,preprocess,test=test,tokenize=tokenize)
            train_dataloader = DataLoader(train_data,batch_size,shuffle=True,num_workers=12,pin_memory=True)
            return train_dataloader,None

        else:
            train_image,val_image ,train_text,val_text = train_test_split(images,texts,random_state= random_state,test_size=split_size)
            train_data =  ALIPRODUCT_DATASET(train_image,train_text,image_data_dir,image_data_folder,preprocess,test=test,tokenize=tokenize)
            val_data = ALIPRODUCT_DATASET(val_image,val_text,image_data_dir,image_data_folder,preprocess,test=test,tokenize=tokenize)
            train_dataloader = DataLoader(train_data,batch_size,shuffle=True,num_workers=12,pin_memory=True)
            val_dataloader = DataLoader(val_data,batch_size,shuffle=False,num_workers=12)


            return torch.tensor(train_dataloader,val_dataloader)

        





In [23]:
class ALIPRODUCT_BLIP(pl.LightningModule):
    def __init__(self,pretrained, image_size, vit='base'):
        super().__init__()
        self.med_config_path = "/home/ubuntu/Desktop/CVPR 2022 AliProducts Challenge/code/model/BLIP/configs/med_config.json"
        self.model = blip_itm(med_config= self.med_config_path,pretrained=pretrained, image_size=image_size, vit=vit)
    def forward(self,img,label):
        image_features,text_features = self.model(img,label,match_head='itc')
        return image_features,text_features 

    def predict_step(self,batch,batch_idx):
        with torch.no_grad():
            self.model.eval()
            image,text = batch
            image_features,text_features = self(image,text)
            image_features = image_features.detach().cpu()
            text_features = text_features.detach().cpu()
            return image_features,text_features


In [24]:
def test_feature(caption_col,clip_model):
    test_loader,df = prepare_data("/home/ubuntu/Desktop/CVPR 2022 AliProducts Challenge/code/data/val_data_prompt_clean.csv",
    CONFIG.test_image_data_dir,CONFIG.test_image_data_folder
    ,CONFIG.test_image_col,caption_col,CONFIG.test_batch_size,preprocess,CONFIG.global_random_state,test=True,tokenize=False)
    trainer = Trainer(gpus=1)
    pred = trainer.predict(clip_model,test_loader)
    full_pred = tuple(map(torch.concat, zip(*pred)))
    image_embed,text_embed = full_pred
    print(image_embed.size())
    print(text_embed.size())
    faiss_index = faiss.IndexFlatIP(256)
    print(image_embed.numpy().astype(np.float32).shape)
    faiss_index.add(image_embed.numpy().astype(np.float32))
    top5_k_e,top5_k_y_pred = faiss_index.search(text_embed.numpy().astype(np.float32),5)
    top10_k_e,top10_k_y_pred = faiss_index.search(text_embed.numpy().astype(np.float32),10)
    top5_preds = np.array([1 if y_true in y_pred else 0 for y_true,y_pred in zip(df.index.values,top5_k_y_pred)])
    print(caption_col,"num correct pred",sum(top5_preds))
    top10_preds = np.array([1 if y_true in y_pred else 0 for y_true,y_pred in zip(df.index.values,top10_k_y_pred)])
    print(caption_col,"num correct pred",sum(top10_preds))
    top5_acc = top5_preds[top5_preds ==1].shape[0] / top5_preds.shape[0]
    top10_acc = top10_preds[top10_preds ==1].shape[0] / top10_preds.shape[0]
    return top5_acc,top10_acc

In [25]:
checkpoints = {"base":"https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth",
"epoch_1":"/home/ubuntu/Desktop/CVPR 2022 AliProducts Challenge/code/model/BLIP/output/save_checkpoint_1_sat.pth",
"epoch_2":"/home/ubuntu/Desktop/CVPR 2022 AliProducts Challenge/code/model/BLIP/output/save_checkpoint_2_sun.pth",
"epoch_3":"/home/ubuntu/Desktop/CVPR 2022 AliProducts Challenge/code/model/BLIP/output/save_checkpoint_3_mon.pth",
"epoch_4":"/home/ubuntu/Desktop/CVPR 2022 AliProducts Challenge/code/model/BLIP/output/save_checkpoint_4_mon.pth"}
top5_acc_preds = []
top10_acc_preds = []

for checkpoint in checkpoints.keys():
    clip_model =ALIPRODUCT_BLIP(checkpoints[checkpoint],image_size)
    top5_acc,top10_acc = test_feature(col_to_test,clip_model)
    top5_acc_preds.append(top5_acc)
    top10_acc_preds.append(top10_acc)


load checkpoint from https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Predicting: 0it [00:00, ?it/s]

[ WARN:0@909.754] global /io/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('../..//val_imgs/O1CN01GbMUGP1eUnMqcqyrP_!!661293875.jpg'): can't open/read file: check file path/integrity
[ WARN:0@909.755] global /io/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('../..//val_imgs/O1CN01Zo7ryX1qzjUmiOrGu_!!2939425567.jpg'): can't open/read file: check file path/integrity
[ WARN:0@909.755] global /io/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('../..//val_imgs/O1CN01cSoTwD1spJos7ZSF6_!!0-item_pic.jpg'): can't open/read file: check file path/integrity
[ WARN:0@909.755] global /io/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('../..//val_imgs/O1CN01iAE0Ph1z5FHu6pHeO_!!0-item_pic.jpg'): can't open/read file: check file path/integrity
[ WARN:0@909.756] global /io/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('../..//val_imgs/O1CN01k1kldB1spJonBuM0c_!!0-item_pic.jpg'): can't open/read fil

AttributeError: Caught AttributeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/ubuntu/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/ubuntu/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_21397/4277652911.py", line 20, in __getitem__
    image = Image.fromarray(image).convert("RGB")
  File "/home/ubuntu/anaconda3/lib/python3.9/site-packages/PIL/Image.py", line 2825, in fromarray
    arr = obj.__array_interface__
AttributeError: 'NoneType' object has no attribute '__array_interface__'


In [None]:
pred_df = {"model_checkpoint":checkpoints.keys(),"top_5":top5_acc_preds,"top_10":top10_acc_preds}

In [None]:
top5_acc_preds

[0.1106, 0.2961, 0.32496, 0.3381, 0.36436]

In [None]:
pred_df = pd.DataFrame(pred_df)

In [None]:
pred_df

Unnamed: 0,model_checkpoint,top_5,top_10
0,base,0.1106,0.15852
1,epoch_1,0.2961,0.40596
2,epoch_2,0.32496,0.44076
3,epoch_3,0.3381,0.45302
4,epoch_4,0.36436,0.48788
