In [1]:
!pip install ftfy regex tqdm torch
!pip install git+https://github.com/openai/CLIP.git
!pip install pytorch-lightning

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-j4e5pmuk
  Running command git clone -q https://github.com/openai/CLIP.git /tmp/pip-req-build-j4e5pmuk
  Resolved https://github.com/openai/CLIP.git to commit b4ae44927b78d0093b556e3ce43cbdcff422017a


In [2]:
import clip
import torch
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from sklearn.metrics import top_k_accuracy_score
from sklearn.model_selection import train_test_split
import pickle

In [3]:
!nvidia-smi

Wed Apr 27 12:40:57 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.60.02    Driver Version: 510.60.02    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA TITAN X ...  Off  | 00000000:02:00.0 Off |                  N/A |
| 23%   32C    P8     9W / 250W |      8MiB / 12288MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA TITAN X ...  Off  | 00000000:03:00.0 Off |                  N/A |
| 23%   27C    P8     9W / 250W |      8MiB / 12288MiB |      0%      Default |
|       

In [4]:
class CONFIG:
    device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {CONFIG.device}")

Using device: cuda


In [5]:
image_data_dir = "../../"
image_data_folder = "val_imgs"
df = pd.read_csv("../data/val_data_map.csv")
df.head()

Unnamed: 0,caption,product,label
0,Tanabata Valentine's Day Shenzhen Bao'an M Nan...,O1CN01cSoTwD1spJos7ZSF6_!!0-item_pic.jpg,32820
1,Children's Toys Little Girl over 6 Years Old G...,O1CN01iI5sGv1vIkV5dfICu_!!0-item_pic.jpg,8326
2,sm qing qu Alternative Sex between Men and Wom...,O1CN01xnnyaz248W0n5le2q_!!131027346.jpg,47598
3,Traditional Chinese Painting Burnin' Up Yingke...,O1CN01lfHuuA1D3K2MEM63p_!!160-0-lubanu.jpg,36608
4,lgnace Lee Men's Jeans Thick Section Distresse...,TB23FV3afBNTKJjy0FdXXcPpVXa_!!1944606990.jpg,45566


In [6]:
x_train,x_test = train_test_split(df,random_state= 101,test_size=0.2)

In [7]:
class AI_CITY_DATASET():
    def __init__(self,df,dir,folder,preprocess):
        self.df = df
        self.dir = dir
        self.folder = folder
        self.preprocess = preprocess
        self.z_shot_labels,self.z_shot_label_map = self.create_z_shot_labels(self.df["caption"].unique(),self.df["label"].unique())
    def __len__(self):
        return(len(self.df))
    def __getitem__(self,idx):
        image_name = self.df.iloc[idx,:]["product"]
        image = Image.open(f"{self.dir}/{self.folder}/{image_name}").convert("RGB")
        image = self.preprocess(image)
        return image,z_shot_labels
    def create_z_shot_labels(self,label_names,label):
        z_shot_labels = []
        z_shot_label_map  = {}
        for label_name, label, idx in zip(label_names, label, range(len(label))):
            z_shot_label = f"this is a {label_name}"
            z_shot_labels.append(z_shot_label)
            z_shot_label_map[idx] = {"label_name": label_name, "label": label}
        z_shot_labels = clip.tokenize(z_shot_labels,truncate=True)
        return z_shot_labels,z_shot_label_map

            
        

In [8]:
clip.available_models()

['RN50',
 'RN101',
 'RN50x4',
 'RN50x16',
 'RN50x64',
 'ViT-B/32',
 'ViT-B/16',
 'ViT-L/14']

In [9]:
model,preprocess = clip.load('RN50',CONFIG.device,jit=False)

In [10]:
data = AI_CITY_DATASET(x_test,image_data_dir,image_data_folder,preprocess)
pred_dataloader = DataLoader(data,1,shuffle=False,num_workers=12)
z_shot_labels = data.z_shot_labels
z_shot_label_map = data.z_shot_label_map

In [16]:
z_shot_labels.size()

tensor([[49406,   589,   533,  ...,     0,     0,     0],
        [49406,   589,   533,  ...,     0,     0,     0],
        [49406,   589,   533,  ...,     0,     0,     0],
        ...,
        [49406,   589,   533,  ...,     0,     0,     0],
        [49406,   589,   533,  ...,     0,     0,     0],
        [49406,   589,   533,  ...,     0,     0,     0]])

In [12]:
z_shot_label_map

{0: {'label_name': 'I Customized Middle/High School Girls Gift Movie Star Peng 500/1000 Pieces of Wood Fun Puzzle',
  'label': 17726},
 1: {'label_name': 'Nordic Art Basin One Color Washbasin Small Pillar Column Courtyard X Landing Triangle Basin Basin Feet',
  'label': 24830},
 2: {'label_name': 'Commercial Leshan bobo chicken Condiment Sichuan Spicy zanthoxylum armatum dc. Taste Cold Pot Served chuanchuanxiang Primer Red Oil Getting Flavoring Bags',
  'label': 9445},
 3: {'label_name': "Children Hat Scarf Two Winter Plush Ear Protection Siamese One Cap Boys Warm Baby Women's Hat",
  'label': 7229},
 4: {'label_name': 'dong bei lao shi lv dou gao lv dou gao gao dian pi fa chuan tong shou gong lv dou gao zheng zong dong bei lv dou gao 450g',
  'label': 43156},
 5: {'label_name': 'Summer 4 Girls Skirt Suits 5 er tong zhuang 6 Little Girl 7 Summer F8 xia kuan 412-Year-Old 9 Parent-Child Dresses',
  'label': 31285},
 6: {'label_name': "Children's Sleepwear Girls Cotton Summer Thin mu nv z

In [13]:
filename = "label_map.pkl"
file = open(filename,"wb")
pickle.dump(z_shot_label_map,file)
file.close()

In [14]:
class AI_CITY_CLIP(pl.LightningModule):
    def __init__(self,model):
        super().__init__()
        self.model = model

    def forward(self,batch):
        img,label = batch
        image_features = self.model.encode_image(img)
        text_features = self.model.encode_text(label[0])
        

        return image_features,text_features
    def predict_step(self,batch,batch_idx):
        with torch.no_grad():

            image_features,text_features = self(batch)
            image_features= image_features.detach().cpu()
            text_features= text_features.detach().cpu()
            image_features /= image_features.norm(dim=-1, keepdim=True)
            text_features /= text_features.norm(dim=-1, keepdim=True)
            similarity = (100.0 * image_features @ text_features.T)
            pred = similarity.softmax(dim=-1)
            return pred


In [15]:
clip_model = AI_CITY_CLIP(model)
trainer = Trainer(gpus=1)
pred = trainer.predict(clip_model,pred_dataloader)


  rank_zero_warn(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /home/ubuntu/Desktop/CVPR 2022 AliProducts Challenge/code/model_utils/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Predicting DataLoader 0:   0%|          | 0/10000 [00:00<?, ?it/s]

RuntimeError: CUDA out of memory. Tried to allocate 2.93 GiB (GPU 0; 11.91 GiB total capacity; 8.31 GiB already allocated; 2.52 GiB free; 8.58 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
full_pred = torch.concat(pred)

In [None]:
full_pred.size()

torch.Size([23300, 116])

In [None]:
full_pred_np = full_pred.numpy()
y_true = x_test["label"].values.tolist()

In [None]:
def topk_accuracy(y_true,pred,label_map,k):
    pred_count = []
    remap = np.vectorize(lambda x:label_map[x]["label"])
    #get top k values 
    top_k_pred = remap(pred.argsort(axis=1)[:,-k:][:,::-1])
    # append 1 if true label is in top k values else append 0
    for i in range(len(y_true)):
        if y_true[i] in top_k_pred[i]:
            pred_count.append(1)
        else:
            pred_count.append(0)
    return sum(pred_count)/len(pred_count)


In [None]:
print("top 1 accuracy: "+str(topk_accuracy(y_true,full_pred_np,z_shot_label_map,k=1)*100)+"%")
print("top 5 accuracy: "+str(topk_accuracy(y_true,full_pred_np,z_shot_label_map,k=5)*100)+"%")
print("top 10 accuracy: "+str(topk_accuracy(y_true,full_pred_np,z_shot_label_map,k=10)*100)+"%")

top 1 accuracy: 99.5107296137339%
top 5 accuracy: 99.98283261802575%
top 10 accuracy: 99.99570815450643%


In [None]:
top1 = full_pred.type(torch.float32).topk(1,dim=1)
top5 = full_pred.type(torch.float32).topk(5,dim=1)
top10 = full_pred.type(torch.float32).topk(10,dim=1)
remap = np.vectorize(lambda x:z_shot_label_map[x]["label_name"])
final_preds =list({l:p} for l,p in zip(remap(top1[1].flatten().tolist()),top1[0].flatten().tolist()))
final_preds_top5 =list(dict(zip(remap(l),p)) for l,p in zip(top5[1].tolist(),top5[0].tolist()))
final_preds_top10 =list(dict(zip(remap(l),p)) for l,p in zip(top10[1].tolist(),top10[0].tolist()))


In [None]:
top5[1]

tensor([[  0,  85,  32,  20,  81],
        [  1, 109,  48, 108,  23],
        [  2,  11,  24, 108,  79],
        ...,
        [  8,  75,  31,  70,  65],
        [ 55,   6,  23, 109,  48],
        [ 44,  25,  93,  27,  14]])

In [None]:
preds_df = x_test
preds_df["preds"] = final_preds
preds_df["preds_top_5"] = final_preds_top5
preds_df["preds_top_10"] = final_preds_top10
preds_df.drop("Unnamed: 0",axis=1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  preds_df["preds"] = final_preds
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  preds_df["preds_top_5"] = final_preds_top5
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  preds_df["preds_top_10"] = final_preds_top10
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in

In [None]:
preds_df.head()

Unnamed: 0,syn_image,label,label_name,preds,preds_top_5,preds_top_10
2583,00096_4236.jpg,96,Sour Patch Kids,{'Sour Patch Kids': 0.984375},"{'Sour Patch Kids': 0.984375, 'Raisinets': 0.0...","{'Sour Patch Kids': 0.984375, 'Raisinets': 0.0..."
20947,00020_81893.jpg,20,Cane Sugar,{'Cane Sugar': 0.99609375},"{'Cane Sugar': 0.99609375, 'Benadryl Allergy T...","{'Cane Sugar': 0.99609375, 'Benadryl Allergy T..."
21674,00111_94461.jpg,111,Vick's Pure Zzz's,{'Vick's Pure Zzz's': 0.998046875},"{'Vick's Pure Zzz's': 0.998046875, 'Children_s...","{'Vick's Pure Zzz's': 0.998046875, 'Children_s..."
94206,00100_36853.jpg,100,Sunmaid Raisins,{'Sunmaid Raisins': 0.94140625},"{'Sunmaid Raisins': 0.94140625, 'Barnums Anima...","{'Sunmaid Raisins': 0.94140625, 'Barnums Anima..."
63404,00103_80236.jpg,103,Tide Pods,{'Tide Pods': 0.72119140625},"{'Tide Pods': 0.72119140625, 'All Free and Cle...","{'Tide Pods': 0.72119140625, 'All Free and Cle..."


In [None]:
preds_df.to_csv("aicity4_all_pred.csv")

In [None]:
filename = "al_pred.pkl"
file = open(filename,"wb")
pickle.dump(full_pred,file)
file.close()

In [None]:
#test laod pickle 
file = open(filename,"rb")
pkl_preds = pickle.load(file)
file.close()

In [None]:
pkl_preds

tensor([[9.8438e-01, 0.0000e+00, 0.0000e+00,  ..., 1.4305e-06, 6.5029e-05,
         1.2338e-04],
        [0.0000e+00, 9.9609e-01, 3.3975e-06,  ..., 2.2054e-06, 0.0000e+00,
         2.9802e-07],
        [0.0000e+00, 1.1921e-07, 9.9805e-01,  ..., 0.0000e+00, 2.9802e-07,
         0.0000e+00],
        ...,
        [0.0000e+00, 4.1723e-07, 1.0788e-05,  ..., 1.1921e-07, 4.6492e-06,
         0.0000e+00],
        [0.0000e+00, 2.7275e-04, 1.7333e-04,  ..., 0.0000e+00, 0.0000e+00,
         4.7684e-07],
        [5.7638e-05, 0.0000e+00, 0.0000e+00,  ..., 5.9605e-08, 5.9605e-08,
         4.7088e-06]], dtype=torch.float16)