<a href="https://colab.research.google.com/github/mukulsn/Machine-Learning/blob/main/Projects/CI/Model%20and%20results/Image_matching_with_RAPIDS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Date 15 march 2024

source --> https://www.kaggle.com/code/mukul001/part-2-rapids-tfidfvectorizer-cv-0-700/edit

source to install rapids
https://colab.research.google.com/drive/13sspqiEZwso4NYTbsflpPyNFaVAAxUgr#scrollTo=B0C8IV5TQnjN

In [1]:
# This get the RAPIDS-Colab install files and test check your GPU.  Run this and the next cell only.
# Please read the output of this cell.  If your Colab Instance is not RAPIDS compatible, it will warn you and give you remediation steps.
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!python rapidsai-csp-utils/colab/pip-install.py

Cloning into 'rapidsai-csp-utils'...
remote: Enumerating objects: 460, done.[K
remote: Counting objects: 100% (191/191), done.[K
remote: Compressing objects: 100% (100/100), done.[K
remote: Total 460 (delta 131), reused 124 (delta 91), pack-reused 269[K
Receiving objects: 100% (460/460), 126.19 KiB | 1.03 MiB/s, done.
Resolving deltas: 100% (233/233), done.
Collecting pynvml
  Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 53.1/53.1 kB 1.7 MB/s eta 0:00:00
Installing collected packages: pynvml
Successfully installed pynvml-11.5.0
***********************************************************************
Woo! Your instance has a Tesla T4 GPU!
We will install the latest stable RAPIDS via pip 24.2.*!  Please stand by, should be quick...
***********************************************************************

Looking in indexes: https://pypi.org/simple, https://pypi.nvidia.com
Collecting cudf-cu12==24.2.*
  Downloading https://pypi.nvidia.

In [2]:
# Load Libraries

import numpy as np, pandas as pd, gc
import cv2, matplotlib.pyplot as plt
import cudf, cuml, cupy
from cuml.feature_extraction.text import TfidfVectorizer
from cuml.neighbors import NearestNeighbors
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
print('RAPIDS',cuml.__version__)
print('TF',tf.__version__)

# RESTRICT TENSORFLOW TO 1GB OF GPU RAM
# SO THAT WE HAVE 15GB RAM FOR RAPIDS
LIMIT = 1
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*LIMIT)])
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    print(e)
print('We will restrict TensorFlow to max %iGB GPU RAM'%LIMIT)
print('then RAPIDS can use %iGB GPU RAM'%(16-LIMIT))



RAPIDS 24.02.00
TF 2.15.0
1 Physical GPUs, 1 Logical GPUs
We will restrict TensorFlow to max 1GB GPU RAM
then RAPIDS can use 15GB GPU RAM


In [6]:
# getting data from google drive

# !cp -r '/content/drive/MyDrive/Colab Notebooks/ML projects/March 2024/CI project extension/Data/' .
#!cp '/content/drive/MyDrive/Colab Notebooks/ML projects/March 2024/CI project extension/Model/efficientnetb0_notop.h5.zip' .
#!unzip '/content/efficientnetb0_notop.h5.zip'
#!unzip '/content/Data/ajio_dress_material_images.zip'
!cp -r '/content/content/ajio_dress_material_images/' ./abc

Archive:  /content/Data/ajio_dress_material_images.zip
   creating: content/ajio_dress_material_images/
  inflating: content/ajio_dress_material_images/AJIO_464928602_purple.jpg  
  inflating: content/ajio_dress_material_images/AJIO_466606457_pink.jpg  
  inflating: content/ajio_dress_material_images/AJIO_466611581_pink.jpg  
  inflating: content/ajio_dress_material_images/AJIO_466337251_orange.jpg  
  inflating: content/ajio_dress_material_images/AJIO_466798382_grey.jpg  
  inflating: content/ajio_dress_material_images/AJIO_466378857_navy.jpg  
  inflating: content/ajio_dress_material_images/AJIO_466955994_red.jpg  
  inflating: content/ajio_dress_material_images/AJIO_469492347_multi.jpg  
  inflating: content/ajio_dress_material_images/AJIO_469493333_khaki.jpg  
  inflating: content/ajio_dress_material_images/AJIO_464767242_green.jpg  
  inflating: content/ajio_dress_material_images/AJIO_466388944_maroon.jpg  
  inflating: content/ajio_dress_material_images/AJIO_467020432_blue.jpg  


In [None]:
np.arange( len(test) )

In [None]:

test = pd.read_csv('/content/Data/data ajio.csv')
test = test.dropna(subset='product_image_url')
test['image'] = 'AJIO_' + test['PID'] + '.jpg'
test = test.head(200)
test_gf = cudf.DataFrame(test)
print('Using train as test to compute CV (since commit notebook). Shape is', test_gf.shape )

test_gf.head()

In [40]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, df, img_size=256, batch_size=32, path=''):
        self.df = df
        self.img_size = img_size
        self.batch_size = batch_size
        self.path = path
        self.indexes = np.arange( len(self.df) )

    def __len__(self):
        'Denotes the number of batches per epoch'
        ct = len(self.df) // self.batch_size
        ct += int(( (len(self.df)) % self.batch_size)!=0)
        return ct

    # THIS CLASS IS USING TF UTILS SEQUENCE WHICH IS USEFUL FOR MULTIPROCESSING AND DATAGENERATION
    # whenever this class is called or in model.predict() this class instance is used, it calls __get_item__
    # automatically, the index parameter is generated from model.predict
    # CHUNK size matters, because the batch size is 32 and CHUNK size is 34 which is twice of batch size
    # so model.predict automatically calls 2 times with batch size and correspondingly index 0,1 etc
    #
    def __getitem__(self, index):
        'Generate one batch of data'
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        X = self.__data_generation(indexes)
        return X

    def __data_generation(self, indexes):
        'Generates data containing batch_size samples'
        X = np.zeros((len(indexes),self.img_size,self.img_size,3),dtype='float32')
        df = self.df.iloc[indexes]
        for i,(index,row) in enumerate(df.iterrows()):
            img = cv2.imread(self.path+row.image)
            # print(f'image path {self.path+row.image}')
            # print(f'debug image {type(img),img.size}')
            X[i,] = cv2.resize(img,(self.img_size,self.img_size)) #/128.0 - 1.0
        return X

In [41]:
BASE = '/content/abc/'
WGT = '/content/efficientnetb0_notop.h5'
model = EfficientNetB0(weights=WGT,include_top=False, pooling='avg', input_shape=None)

embeds = []
CHUNK = 64

print('Computing image embeddings...')
CTS = len(test)//CHUNK
if len(test)%CHUNK!=0: CTS += 1
for i,j in enumerate( range( CTS ) ):
    a = j*CHUNK
    b = (j+1)*CHUNK
    b = min(b,len(test))
    print('chunk',a,'to',b)
    ""
    test_gen = DataGenerator(test.iloc[a:b], batch_size=32, path=BASE)
    image_embeddings = model.predict(test_gen,verbose=1,use_multiprocessing=True, workers=4)
    # image_embeddings = model.predict(test_gen,verbose=1,use_multiprocessing=False)
    embeds.append(image_embeddings)
    # print(image_embeddings)

    #if i>=1: break

del model
_ = gc.collect()
image_embeddings = np.concatenate(embeds)
print('image embeddings shape',image_embeddings.shape)

Computing image embeddings...
for loop activate 0
chunk 0 to 64
for loop activate 1
chunk 64 to 128
for loop activate 2
chunk 128 to 192
for loop activate 3
chunk 192 to 200
image embeddings shape (200, 1280)


In [7]:
KNN = 50
if len(test)==3: KNN = 2
model = NearestNeighbors(n_neighbors=KNN)
model.fit(image_embeddings)

In [11]:
preds = []
CHUNK = 64*4

print('Finding similar images...')
CTS = len(image_embeddings)//CHUNK
if len(image_embeddings)%CHUNK!=0: CTS += 1
for j in range( CTS ):

    a = j*CHUNK
    b = (j+1)*CHUNK
    b = min(b,len(image_embeddings))
    print('chunk',a,'to',b)
    distances, indices = model.kneighbors(image_embeddings[a:b,])

    for k in range(b-a):
        IDX = np.where(distances[k,]<6.0)[0]
        IDS = indices[k,IDX]
        o = test.iloc[IDS].PID.values
        preds.append(o)

del model, distances, indices, image_embeddings, embeds
_ = gc.collect()

Finding similar images...
chunk 0 to 256
chunk 256 to 512
chunk 512 to 768
chunk 768 to 1024
chunk 1024 to 1280
chunk 1280 to 1310


In [12]:
test['preds2'] = preds
test.head()

Unnamed: 0.1,Unnamed: 0,Brand,Link,PID,Product,Dp,Sp,offer_price_AJIO,Exclusive_AJIO,Product_discount_percentage,product_image_url,total_items,image,preds2
0,0,Ethnic Junction,https://www.ajio.com/ethnic-junction-embroider...,465471146_pink,Embroidered 3-Piece Unstitched Dress Material,799,3196.0,,,75%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_465471146_pink.jpg,"[465471146_pink, 466127599_peach]"
1,1,GRIVA DESIGNER,https://www.ajio.com/griva-designer-embroidere...,465589788_pink,Embroidered Unstiched Dress Material,840,5999.0,,,86%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_465589788_pink.jpg,[465589788_pink]
2,2,Peachmode,https://www.ajio.com/peachmode-women-embroider...,466827194_white,Women Embroidered 3-Piece Unstitched Dress Mat...,1054,2899.0,986.0,,64%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_466827194_white.jpg,[466827194_white]
3,3,LEELI PEERI DESIGNER,https://www.ajio.com/leeli-peeri-designer-embr...,466266885_pink,Embroidered Unstitched Dress Material,840,3999.0,,,79%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_466266885_pink.jpg,"[466266885_pink, 466675535_pink, 466706056_pin..."
4,4,Zonfab,https://www.ajio.com/zonfab-women-embroidered-...,466854574_black,Women Embroidered Semi-Stitched Straight Dress...,1480,3699.0,1258.0,,60%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_466854574_black.jpg,"[466854574_black, 466854574_pink]"


In [13]:
test.head()

Unnamed: 0.1,Unnamed: 0,Brand,Link,PID,Product,Dp,Sp,offer_price_AJIO,Exclusive_AJIO,Product_discount_percentage,product_image_url,total_items,image,preds2
0,0,Ethnic Junction,https://www.ajio.com/ethnic-junction-embroider...,465471146_pink,Embroidered 3-Piece Unstitched Dress Material,799,3196.0,,,75%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_465471146_pink.jpg,"[465471146_pink, 466127599_peach]"
1,1,GRIVA DESIGNER,https://www.ajio.com/griva-designer-embroidere...,465589788_pink,Embroidered Unstiched Dress Material,840,5999.0,,,86%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_465589788_pink.jpg,[465589788_pink]
2,2,Peachmode,https://www.ajio.com/peachmode-women-embroider...,466827194_white,Women Embroidered 3-Piece Unstitched Dress Mat...,1054,2899.0,986.0,,64%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_466827194_white.jpg,[466827194_white]
3,3,LEELI PEERI DESIGNER,https://www.ajio.com/leeli-peeri-designer-embr...,466266885_pink,Embroidered Unstitched Dress Material,840,3999.0,,,79%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_466266885_pink.jpg,"[466266885_pink, 466675535_pink, 466706056_pin..."
4,4,Zonfab,https://www.ajio.com/zonfab-women-embroidered-...,466854574_black,Women Embroidered Semi-Stitched Straight Dress...,1480,3699.0,1258.0,,60%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_466854574_black.jpg,"[466854574_black, 466854574_pink]"





Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.



In [14]:
print('Computing text embeddings...')
model = TfidfVectorizer(stop_words='english', binary=True, max_features=25_000)
text_embeddings = model.fit_transform(test_gf.Product).toarray()
print('text embeddings shape',text_embeddings.shape)

Computing text embeddings...
text embeddings shape (1310, 105)


In [16]:
preds = []
CHUNK = 64*4

print('Finding similar titles...')
CTS = len(test)//CHUNK
if len(test)%CHUNK!=0: CTS += 1
for j in range( CTS ):

    a = j*CHUNK
    b = (j+1)*CHUNK
    b = min(b,len(test))
    print('chunk',a,'to',b)

    # COSINE SIMILARITY DISTANCE
    cts = cupy.matmul( text_embeddings, text_embeddings[a:b].T).T

    for k in range(b-a):
        IDX = cupy.where(cts[k,]>0.7)[0]
        o = test.iloc[cupy.asnumpy(IDX)].PID.values
        preds.append(o)

del model, text_embeddings
_ = gc.collect()

Finding similar titles...
chunk 0 to 256
chunk 256 to 512
chunk 512 to 768
chunk 768 to 1024
chunk 1024 to 1280
chunk 1280 to 1310


In [17]:
test['preds'] = preds
test.head()

Unnamed: 0.1,Unnamed: 0,Brand,Link,PID,Product,Dp,Sp,offer_price_AJIO,Exclusive_AJIO,Product_discount_percentage,product_image_url,total_items,image,preds2,preds
0,0,Ethnic Junction,https://www.ajio.com/ethnic-junction-embroider...,465471146_pink,Embroidered 3-Piece Unstitched Dress Material,799,3196.0,,,75%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_465471146_pink.jpg,"[465471146_pink, 466127599_peach]","[465471146_pink, 466827194_white, 466266885_pi..."
1,1,GRIVA DESIGNER,https://www.ajio.com/griva-designer-embroidere...,465589788_pink,Embroidered Unstiched Dress Material,840,5999.0,,,86%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_465589788_pink.jpg,[465589788_pink],[465589788_pink]
2,2,Peachmode,https://www.ajio.com/peachmode-women-embroider...,466827194_white,Women Embroidered 3-Piece Unstitched Dress Mat...,1054,2899.0,986.0,,64%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_466827194_white.jpg,[466827194_white],"[465471146_pink, 466827194_white, 466266885_pi..."
3,3,LEELI PEERI DESIGNER,https://www.ajio.com/leeli-peeri-designer-embr...,466266885_pink,Embroidered Unstitched Dress Material,840,3999.0,,,79%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_466266885_pink.jpg,"[466266885_pink, 466675535_pink, 466706056_pin...","[465471146_pink, 466827194_white, 466266885_pi..."
4,4,Zonfab,https://www.ajio.com/zonfab-women-embroidered-...,466854574_black,Women Embroidered Semi-Stitched Straight Dress...,1480,3699.0,1258.0,,60%,https://assets.ajio.com/medias/sys_master/root...,"22,769 Items Found",AJIO_466854574_black.jpg,"[466854574_black, 466854574_pink]","[466854574_black, 466798382_grey, 466937820_gr..."


In [25]:
def combine_for_sub(row):
    x = np.concatenate([row.preds2, row.preds])
    return ' '.join( np.unique(x) )

def combine_for_cv(row):
    x = np.concatenate([row.preds2, row.preds])
    return np.unique(x)

def getMetric(col):
    def f1score(row):
        n = len( np.intersect1d(row.target,row[col]) )
        return 2*n / (len(row.target)+len(row[col]))
    return f1score

In [19]:
test['label_group'] = test['PID']

In [26]:
if True:
    tmp = test.groupby('label_group').PID.agg('unique').to_dict()
    test['target'] = test.label_group.map(tmp)
    test['oof'] = test.apply(combine_for_cv,axis=1)
    test['f1'] = test.apply(getMetric('oof'),axis=1)
    print('CV Score =', test.f1.mean() )

test['matches'] = test.apply(combine_for_sub,axis=1)

CV Score = 0.08479628911735816


In [27]:
test.to_excel('final_result.xlsx')

In [2]:
!../sbin/ifconfig en0

/bin/bash: line 1: ../sbin/ifconfig: No such file or directory


In [8]:
!curl ipecho.net/plain

34.106.147.45

In [None]:
There are other services like ifconfig.me or api.ipify.org that you can use with curl to achieve the same result.