In [52]:
from sentence_transformers import SentenceTransformer
from PIL import Image

import numpy as np
import openai
import pandas as pd
import pickle
import uuid
import pinecone
#from tqdm.auto import tqdm
import datetime
from time import sleep
import traceback
from dotenv import load_dotenv
import os
from matplotlib import pyplot as plt
from matplotlib import image as mpimg

In [54]:
load_dotenv()

IMAGE_PINECONE_API_KEY = os.getenv("IMAGE_PINECONE_API_KEY")
IMAGE_PINECONE_ENV = os.getenv("IMAGE_PINECONE_ENV")

In [53]:
# initialize connection (get API key at app.pinecone.io)
def initPinecone(index_name:str, pinecone_api_key:str, pinecone_env:str, dimension_len:int = 1536) -> pinecone.Index:
    pinecone.init(
        api_key=pinecone_api_key,
        #environment="asia-southeast1-gcp"  # find next to API key  #us-central1-gcp #asia-southeast1-gcp
        environment=pinecone_env
    )
        #environment="us-central1-gcp"  # find next to API key
    # check if index already exists (it shouldn't if this is first time)
    if index_name not in pinecone.list_indexes():
    # if does not exist, create index
        pinecone.create_index(
            index_name,
            dimension=dimension_len,
            metric='cosine',
            metadata_config={
                'indexed': ['title', 'heading']
            }
        )

    # connect to index
    return pinecone.Index(index_name)

In [55]:
def generate_vector(id, values, metadata):
    return {
        'id': id,
        'values': values,
        'metadata': { 'image_name': metadata }
    }

In [42]:
def seed(index: pinecone.Index):
    # Load CLIP model
    model = SentenceTransformer('clip-ViT-B-32')

    img_data = []

    img_data.append("001.png")
    img_data.append("002.png")
    img_data.append("003.png")
    img_data.append("004.png")

    to_upsert = []
    for item in img_data:
        uid = str(uuid.uuid4())
        embedding = model.encode(Image.open('./images/' + item))
        vector = generate_vector(id=uid,values=embedding,metadata=item)
        to_upsert.append(vector)
        print(vector)
   
    # print(to_upsert)
    # view index stats
    """
    
    """
    print(index.describe_index_stats())

    #index.upsert(vectors=to_upsert)

    # view index stats
    print(index.describe_index_stats())
    print(to_upsert)




In [63]:
def search(index: pinecone.Index, query:str):
    model = SentenceTransformer('clip-ViT-B-32')

    # Encode text query
    query_string = query
    text_emb = model.encode(query_string)

    print(text_emb)

    results = index.query(vector=text_emb,top_k=1)
    """
        result = results[0]
    print(result)
    plt.title(result)
    image = mpimg.imread('./images/' + result)
    plt.imshow(image)
    plt.show()
    """





In [44]:
index_name = "osha-images"

index = initPinecone(index_name=index_name,pinecone_api_key=IMAGE_PINECONE_API_KEY,pinecone_env=IMAGE_PINECONE_ENV,dimension_len=512)
seed(index)

ad64a8a2-1940-41dc-ac69-9e89080db291
a9eafbd3-82fe-45d8-85a6-67d4b748c7df
5dc06539-5b27-4a94-ac0e-367ef6556901
72e2caab-1756-465a-966f-17d62ef000ad
{'dimension': 512,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}




{'dimension': 512,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 4}},
 'total_vector_count': 4}
[{'id': 'ad64a8a2-1940-41dc-ac69-9e89080db291', 'values': [-0.31336188316345215, -0.38391417264938354, 0.15383079648017883, 0.03478362411260605, 0.47814828157424927, -0.29281383752822876, -0.1940252184867859, -0.29776641726493835, -0.04024136811494827, 0.10269062221050262, 0.08939395844936371, -0.07256637513637543, 1.0968012809753418, 0.2611045241355896, 0.5043377876281738, -0.17639824748039246, -0.4583081603050232, -0.1256105899810791, 0.03440503776073456, -0.4322054982185364, -0.030094891786575317, 0.4630137085914612, 0.27084869146347046, -0.027037717401981354, 0.01496441662311554, 0.22882162034511566, 0.084541454911232, 0.30591723322868347, -0.17535710334777832, 0.08979474008083344, 0.10639926791191101, -0.020261839032173157, -0.25363534688949585, -0.027110956609249115, 0.6251537203788757, -0.07743923366069794, 0.29269373416900635, 0.24571958184242249, 0.37346699833869934, 

In [64]:
index_name = "osha-images"

index = initPinecone(index_name=index_name,pinecone_api_key=IMAGE_PINECONE_API_KEY,pinecone_env=IMAGE_PINECONE_ENV,dimension_len=512)

search(index=index, query="open field")

[-5.87108694e-02 -1.53861865e-01  2.09312707e-01  2.11406693e-01
  1.51969284e-01  4.78777736e-02 -5.60188591e-02 -1.15776598e+00
  8.98227841e-02  4.88662899e-01 -2.84218639e-02 -2.44533435e-01
 -3.72980475e-01 -2.78696448e-01  4.27829862e-01 -4.72462326e-02
  1.90005869e-01  1.76985294e-01 -5.54078221e-02  8.87627378e-02
  8.87869895e-02  3.06002945e-01  2.99914539e-01 -1.83031887e-01
 -2.94887722e-01  3.90726104e-02 -1.29930243e-01  3.08872938e-01
 -1.51497573e-01 -2.26556823e-01 -2.23324075e-01 -1.31659329e-01
 -3.11659068e-01 -1.29265666e-01 -2.08148375e-01  2.93743730e-01
  1.46948099e-01  3.49780619e-02  4.68947142e-02  2.41101384e-02
 -7.40833730e-02 -2.58562446e-01  3.19743872e-01  1.81040391e-02
  3.14081371e-01 -8.69113207e-03 -1.11226730e-01 -1.72764957e-01
  2.27828920e-01  8.59902427e-02 -6.99984729e-02 -2.31381074e-01
  3.33244205e-01  1.53519183e-01  5.78380525e-02 -1.22549742e-01
 -9.35357809e-03  1.04955889e-01 -2.90258408e-01  2.89213836e-01
  1.21103227e-01 -1.87780

ApiValueError: Unable to prepare type ndarray for serialization