In [1]:
from PIL import Image
from io import BytesIO
from transformers import CLIPProcessor, CLIPModel, SegformerImageProcessor, AutoModelForSemanticSegmentation , AutoFeatureExtractor
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import os
import weaviate
import numpy as np
from numpy.linalg import norm
import json
import cv2
import base64
from time import sleep

  from .autonotebook import tqdm as notebook_tqdm
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [2]:
checkpoint = "patrickjohncyh/fashion-clip"
model = CLIPModel.from_pretrained(checkpoint)
processor = CLIPProcessor.from_pretrained(checkpoint)
seg_processor = SegformerImageProcessor.from_pretrained("mattmdjaga/segformer_b2_clothes")
seg_model = AutoModelForSemanticSegmentation.from_pretrained("mattmdjaga/segformer_b2_clothes")

def getTextEmbeddings(text):
	inputs = processor(text=text , images=Image.new('RGB' , (72 , 72)), return_tensors="pt", padding=True)
	outputs = model(**inputs , return_dict=True)
	return outputs["text_embeds"]

def getImageEmbeddingsFromPath(image_path):
	image = Image.open(image_path)
	inputs = processor(text=["dummy"] , images=image, return_tensors="pt", padding=True)
	outputs = model(**inputs , return_dict=True)	
	return outputs["image_embeds"]

def getImageEmbeddings(image):
	inputs = processor(text=["dummy"] , images=image, return_tensors="pt", padding=True)
	outputs = model(**inputs , return_dict=True)
	return outputs["image_embeds"]

def applyMask(image, mask):
	image = np.array(image)
	mask = np.array(mask)
	mask = np.stack((mask,)*3, axis=-1)
	resultant = image*mask
	resultant[mask == 0] = 255
	return resultant

def cropImage(image):
	temp = image[:, :, ::-1].copy() 
	temp = temp.astype('uint8')
	gray = cv2.cvtColor(temp, cv2.COLOR_BGR2GRAY)
	thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
	contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
	contours = sorted(contours, key=lambda x: cv2.contourArea(x), reverse=True)
	x,y,w,h = cv2.boundingRect(contours[0])
	crop = image[y:y+h, x:x+w]
	return crop

def segment(image, to_mask):
	inputs = seg_processor(images=image, return_tensors="pt")
	outputs = seg_model(**inputs)
	logits = outputs.logits.cpu()
	upsampled_logits = nn.functional.interpolate(
		logits,
		size=image.size[::-1],
		mode="bilinear",
		align_corners=False,
	)
	pred_seg = upsampled_logits.argmax(dim=1)[0]
	result = []
	for i in to_mask:
		mask = pred_seg.numpy().copy()
		mask[mask != i] = 0
		mask[mask == i] = 1
		item = applyMask(image, mask)
		result.append(item)
	return result

def segmentAndEmbed(image_path, to_mask):
	result = {}
	image = Image.open(image_path)
	fullImageEmbedding = getImageEmbeddings(image)
	buffered = BytesIO()
	image.save(buffered, format="PNG")
	fullImageBase64 = base64.b64encode(buffered.getvalue()).decode()
	result["fullImageBase64"] = fullImageBase64
	result["fullImageEmbedding"] = fullImageEmbedding
	segments = segment(image, to_mask)
	for i in range(len(to_mask)):
		segmentEmbedding = getImageEmbeddings(segments[i])
		segments[i] = Image.fromarray(np.uint8(segments[i]))
		buffered = BytesIO()
		segments[i].save(buffered, format="PNG")
		segmentBase64 = base64.b64encode(buffered.getvalue()).decode()
		result[f"segmentBase64_{i}"] = segmentBase64
		result[f"segmentEmbedding_{i}"] = segmentEmbedding
	return result



In [3]:
def max_pooling(vector_list):
    vectors_array = np.array(vector_list)
    max_pooled_vector = np.max(vectors_array, axis=0) 
    return max_pooled_vector

def mean_pooling(vector_list):
    vectors_array = np.array(vector_list)
    mean_pooled_vector = np.mean(vectors_array, axis=0)
    return mean_pooled_vector

def weighted_mean_pooling(vector_list, weight_list):
    vectors_array = np.array(vector_list)
    weights_array = np.array(weight_list)
    weighted_sum = np.sum(vectors_array * weights_array[:, np.newaxis], axis=0)
    sum_of_weights = np.sum(weights_array)
    weighted_mean = weighted_sum / sum_of_weights
    return weighted_mean

def cosine_similarity(vector1, vector2):
    vector1 = np.array(vector1)
    vector1 = vector1.reshape(1, -1)
    vector2 = np.array(vector2)
    vector2 = vector2.reshape(-1, 1)
    dot_product = np.dot(vector1, vector2)
    norm1 = np.linalg.norm(vector1)
    norm2 = np.linalg.norm(vector2)
    return dot_product / (norm1 * norm2)

In [4]:
client = weaviate.Client(url="http://localhost:8080")

In [5]:
embeddings = getTextEmbeddings("brown oversized top")

In [6]:
where_filter1 = {
    "path" : ["category"],
    "operator" : "Equal",
    "valueText" : "girls_collegewear"
}

In [11]:
response1 = (
    client.query
    # .get("FlipkartProducts",["uRL", "brand", "category", "product", "price", "rating", "numberRatings", "colour", "row"])
    .get("PinterestImages", ["image", "top {... on PinterestTop { image, _additional {vector} }}"])
    .with_near_vector({"vector" : embeddings.tolist()[0]})
    .with_where(where_filter1)
    # .with_near_vector({"vector" : top_embedding})
    .with_additional(["vector", "id", "distance"])
    .with_limit(5)
    .do()
)

In [12]:
len(response1["data"]["Get"]["PinterestImages"])

5

In [13]:
for i in range(15):
    try:
        # image = response["data"]["Get"]["FlipkartProducts"][i]['image']
        # image = response1["data"]["Get"]["PinterestImages"][i]["top"][0]['image']
        image = response1["data"]["Get"]["PinterestImages"][i]['image']
        image = Image.open(BytesIO(base64.b64decode(image.split(",",1)[0])))
        image.show()
    except:
        pass

In [14]:
list_of_embeddings = []
for i in range(len(response1["data"]["Get"]["PinterestImages"])):
    top_embedding = response1["data"]["Get"]["PinterestImages"][i]["top"][0]['_additional']["vector"]
    list_of_embeddings.append(top_embedding)

list_of_embeddings.append(embeddings.tolist()[0])

In [23]:
weights = [0.07, 0.07, 0.07, 0.07, 0.07, 0.65]
# weights = [0.2, 0.15, 0.15, 0.5]

In [None]:
top_mean = mean_pooling(list_of_embeddings).tolist()

In [24]:
top_weighted_mean = weighted_mean_pooling(list_of_embeddings, weights)

In [17]:
where_filter = {
    "path": ["category"],
    "operator": "Equal",
    "valueText" : "Girls Tops"
}

In [25]:
response = (
    client.query
    .get("FlipkartNoSegProducts",["uRL", "brand", "category", "product", "price", "rating", "numberRatings", "colour", "brand", "image", "fit", "type"])
    # .with_where(where_filter)
    # .with_near_vector({"vector" : top_embedding})
    # .with_near_vector({"vector" : top_mean})
    .with_near_vector({"vector" : top_weighted_mean})
    .with_additional(["vector", "id", "distance"])
    .with_limit(12)
    .do()
)

print(response)

{'data': {'Get': {'FlipkartNoSegProducts': [{'_additional': {'distance': 0.49978888, 'id': 'ae3460dd-5a3a-495b-9d77-a3d8535e6acc', 'vector': [-0.027965361, 0.115867205, 0.01883872, 0.0217216, 0.016838035, -0.027129859, 0.008937089, -0.0077494667, 0.010913903, -0.055598896, 0.01986974, 0.008858246, -0.055860333, 0.028380143, 0.0054174224, 0.0018870361, -0.065638684, -0.016780227, -0.0034686085, -0.024192704, 0.013678164, -0.035016295, 0.009138139, 0.007725975, -0.040336568, -0.0009240866, 0.04339369, -0.004306975, -0.11076145, -0.012860272, -0.0705012, -0.003958907, 0.01340423, -0.02459074, -0.0031065473, -0.009771645, -0.03401911, 0.09644933, -0.0034583954, 0.0014810372, 0.039852813, -0.029053506, 0.041629095, 0.005457225, -0.039100654, 0.018955939, -0.020111483, 0.017664058, 0.047694605, -0.0063638473, 0.018374175, -0.0032253806, 0.013548924, 0.03265265, -0.1649677, -0.0107026, -0.007843134, 0.0106125865, 0.022610191, 0.011734272, -0.08000252, -0.020676805, 0.03238125, 0.0037216605, -

In [30]:
def text_based_rank(original_vector, products):
    product_text_embeddings = {}
    for i, product in enumerate(products):
        product_text_embedding = getTextEmbeddings(f"{product['product']} {product['colour']} {product['fit']} {product['type']}")
        product_text_embeddings[i] = product_text_embedding.tolist()[0]
    cosine_similarities = {}
    for i, embedding in product_text_embeddings.items():
        cosine_similarities[i] = cosine_similarity(original_vector, embedding).tolist()[0][0]
    cosine_similarity_ranking = sorted(cosine_similarities.items(), key=lambda x: x[1], reverse=True)
    rank_tuples = []
    for i, (rank, _) in enumerate(cosine_similarity_ranking):
        rank_tuples.append((i, rank))
    return rank_tuples
    
def reciprocal_rank_fusion(rank_tuples, weights):
    fused = {}
    for i, rank in rank_tuples:
        fused_score = (weights[0] * (1 / (i + 1))) + weights[1] * (1 / (rank + 1))
        fused[i] = fused_score
    fused_ranking = sorted(fused.items(), key=lambda x: x[1], reverse=True)
    rrf_rank = {}
    for i, (rank, _) in enumerate(fused_ranking):
        rrf_rank[i] = rank
    return rrf_rank


In [27]:
text_rank = text_based_rank(embeddings.tolist()[0], response["data"]["Get"]["FlipkartNoSegProducts"])

In [31]:
new_rank = reciprocal_rank_fusion(text_rank, (0.35, 0.65))

In [32]:
new_rank

{0: 0, 1: 1, 2: 9, 3: 3, 4: 4, 5: 2, 6: 10, 7: 5, 8: 8, 9: 6, 10: 7, 11: 11}

In [26]:
for i in range(15):
    try:
        image = response["data"]["Get"]["FlipkartNoSegProducts"][i]['image']
        # image = response1["data"]["Get"]["PinterestImages"][i]["top"][0]['image']
        # image = response1["data"]["Get"]["PinterestImages"][i]['image']
        image = Image.open(BytesIO(base64.b64decode(image.split(",",1)[0])))
        image.show()
    except:
        pass

In [33]:
for key, value in new_rank.items():
    try:
        image = response["data"]["Get"]["FlipkartNoSegProducts"][value]['image']
        image = Image.open(BytesIO(base64.b64decode(image.split(",",1)[0])))
        image.show()
    except:
        pass