In [2]:
from PIL import Image
from io import BytesIO
from transformers import CLIPProcessor, CLIPModel, SegformerImageProcessor, AutoModelForSemanticSegmentation , AutoFeatureExtractor
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import os
import weaviate
import numpy as np
import json
import cv2
import base64

  from .autonotebook import tqdm as notebook_tqdm
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [3]:
PATH = "D:\\Codes\\ziggy-ai\\scraping\\Pinterest"

In [4]:
client = weaviate.Client(url="http://localhost:8080")

In [5]:
pinterest = open("pinterest.json", "r")
pinterest_top = open("pinterest_top.json", "r")
pinterest_bottom = open("pinterest_bottom.json", "r")
pinterest_class = json.load(pinterest)
pinterest_top_class = json.load(pinterest_top)
pinterest_bottom_class = json.load(pinterest_bottom)
pinterest.close()
pinterest_top.close()
pinterest_bottom.close()

client.schema.delete_all()
client.schema.create_class(pinterest_top_class)
client.schema.create_class(pinterest_bottom_class)
client.schema.create_class(pinterest_class)

In [6]:
checkpoint = "patrickjohncyh/fashion-clip"
model = CLIPModel.from_pretrained(checkpoint)
processor = CLIPProcessor.from_pretrained(checkpoint)
seg_processor = SegformerImageProcessor.from_pretrained("mattmdjaga/segformer_b2_clothes")
seg_model = AutoModelForSemanticSegmentation.from_pretrained("mattmdjaga/segformer_b2_clothes")



In [7]:
def getImageEmbeddingsFromPath(image_path):
	image = Image.open(image_path)
	inputs = processor(text=["dummy"] , images=image, return_tensors="pt", padding=True)
	outputs = model(**inputs , return_dict=True)
	return outputs["image_embeds"]

def getImageEmbeddings(image):
	inputs = processor(text=["dummy"] , images=image, return_tensors="pt", padding=True)
	outputs = model(**inputs , return_dict=True)
	return outputs["image_embeds"]

def applyMask(image, mask):
	image = np.array(image)
	mask = np.array(mask)
	mask = np.stack((mask,)*3, axis=-1)
	resultant = image*mask
	resultant[mask == 0] = 255
	return resultant

def cropImage(image):
	temp = image[:, :, ::-1].copy() 
	temp = temp.astype('uint8')
	gray = cv2.cvtColor(temp, cv2.COLOR_BGR2GRAY)
	thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
	contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
	contours = sorted(contours, key=lambda x: cv2.contourArea(x), reverse=True)
	x,y,w,h = cv2.boundingRect(contours[0])
	crop = image[y:y+h, x:x+w]
	return crop

def segment(image, to_mask):
	inputs = seg_processor(images=image, return_tensors="pt")
	outputs = seg_model(**inputs)
	logits = outputs.logits.cpu()
	upsampled_logits = nn.functional.interpolate(
		logits,
		size=image.size[::-1],
		mode="bilinear",
		align_corners=False,
	)
	pred_seg = upsampled_logits.argmax(dim=1)[0]
	result = []
	for i in to_mask:
		mask = pred_seg.numpy().copy()
		mask[mask != i] = 0
		mask[mask == i] = 1
		item = applyMask(image, mask)
		result.append(item)
	return result

def segmentAndEmbed(image_path, to_mask):
	result = {}
	image = Image.open(image_path)
	fullImageEmbedding = getImageEmbeddings(image)
	buffered = BytesIO()
	image.save(buffered, format="PNG")
	fullImageBase64 = base64.b64encode(buffered.getvalue()).decode()
	result["fullImageBase64"] = fullImageBase64
	result["fullImageEmbedding"] = fullImageEmbedding
	segments = segment(image, to_mask)
	for i in range(len(to_mask)):
		segmentEmbedding = getImageEmbeddings(segments[i])
		segments[i] = Image.fromarray(np.uint8(segments[i]))
		buffered = BytesIO()
		segments[i].save(buffered, format="PNG")
		segmentBase64 = base64.b64encode(buffered.getvalue()).decode()
		result[f"segmentBase64_{i}"] = segmentBase64
		result[f"segmentEmbedding_{i}"] = segmentEmbedding
	return result

In [8]:
df = pd.read_csv(f"{PATH}\\mens_formal.csv")
with client.batch(batch_size=100, num_workers=2) as batch:
    for index, row in df.iterrows():
        if os.path.exists(f"{PATH}\\mens_formal\\image_{index}.jpg"):
            try:
                output = segmentAndEmbed(f"{PATH}\\mens_formal\\image_{index}.jpg", [4, 6])
                pinterest_obj = {
                    "category" : "mens_traditional",
                    "description" : row["Image Alt Text"],
                    "image" : output["fullImageBase64"]
                }
                pinterest_uuid = batch.add_data_object(pinterest_obj, "PinterestImages", vector=output["fullImageEmbedding"])
                pinterest_top_uuid = batch.add_data_object({"image" : output["segmentBase64_0"]}, "PinterestTop", vector=output["segmentEmbedding_0"])
                pinterest_bottom_uuid = batch.add_data_object({"image" : output["segmentBase64_1"]}, "PinterestBottom", vector=output["segmentEmbedding_1"])
                batch.add_reference(from_object_class_name="PinterestImages", to_object_class_name="PinterestTop", from_object_uuid=pinterest_uuid, to_object_uuid=pinterest_top_uuid, from_property_name="top")
                batch.add_reference(from_object_class_name="PinterestImages", to_object_class_name="PinterestBottom", from_object_uuid=pinterest_uuid, to_object_uuid=pinterest_bottom_uuid, from_property_name="bottom")
                print(f"Added {index} to Weaviate")
            except:
                print(f"Error adding {index} to Weaviate")

Added 1 to Weaviate
Added 2 to Weaviate
Added 3 to Weaviate
Added 4 to Weaviate
Added 6 to Weaviate
Added 10 to Weaviate
Added 12 to Weaviate
Added 14 to Weaviate
Added 15 to Weaviate
Added 17 to Weaviate
Added 18 to Weaviate
Added 19 to Weaviate
Added 20 to Weaviate
Added 28 to Weaviate
Added 31 to Weaviate
Added 32 to Weaviate
Added 33 to Weaviate
Added 36 to Weaviate
Added 38 to Weaviate
Added 39 to Weaviate
Added 40 to Weaviate
Added 43 to Weaviate
Added 44 to Weaviate
Added 46 to Weaviate
Added 52 to Weaviate
Added 61 to Weaviate
Added 63 to Weaviate
Added 65 to Weaviate
Added 71 to Weaviate
Added 72 to Weaviate
Error adding 77 to Weaviate
Error adding 80 to Weaviate
Added 84 to Weaviate
Added 88 to Weaviate
Added 89 to Weaviate
Added 91 to Weaviate
Added 93 to Weaviate
Added 97 to Weaviate
Added 99 to Weaviate
Added 100 to Weaviate
Added 104 to Weaviate
Added 105 to Weaviate
Added 107 to Weaviate
Added 113 to Weaviate
Added 115 to Weaviate
Added 120 to Weaviate
Added 121 to Weavi