Connect to Weaviate

In [89]:
import weaviate

client = weaviate.connect_to_local()

client.is_ready()

True

In [106]:
import weaviate.classes.config as wc

collection_name = "Test"
if client.collections.exists(collection_name):
    client.collections.delete(collection_name)

client.collections.create(
        name=collection_name,
        properties=[
            wc.Property(name="name", data_type=wc.DataType.TEXT),
            wc.Property(name="image", data_type=wc.DataType.BLOB),
            wc.Property(name="text", data_type=wc.DataType.TEXT),
        ],
        # # Define & configure the vectorizer module
        # vectorizer_config=wc.Configure.Vectorizer.multi2vec_clip(
        #     image_fields=[wc.Multi2VecField(name="image", weight=0)],   
        #     text_fields=[wc.Multi2VecField(name="name", weight=0),     
        #                  wc.Multi2VecField(name="text", weight=1.0)],     
        # ),

        vectorizer_config=wc.Configure.Vectorizer.multi2vec_clip(
            image_fields=[wc.Multi2VecField(name="image")], 
            text_fields=[wc.Multi2VecField(name="name"),       
                         wc.Multi2VecField(name="text")],   
        ),
    )

import base64
import os
import requests

# Helper function to convert file to base64 representation
def to_base64(url_or_path):
    if url_or_path.startswith('http://') or url_or_path.startswith('https://'):
        # Handle URL
        image_response = requests.get(url_or_path)
        content = image_response.content
    elif os.path.exists(url_or_path):
        # Handle local file path
        with open(url_or_path, 'rb') as image_file:
            content = image_file.read()
    else:
        raise ValueError("The provided string is neither a valid URL nor a local file path.")
    
    return base64.b64encode(content).decode("utf-8")

collection = client.collections.get(collection_name)

directory = "./simple_images/test/"
source = os.listdir(directory)
contextual_info = ["cat", "dog"]

with collection.batch.dynamic() as batch:
    # for x in range(len(source)):
    for src_obj in source:
        # src_obj = source[x]
        path = directory + src_obj
        name = os.path.splitext(src_obj)[0]
        poster_b64 = to_base64(path)
        if name == "pic1":
            desc = contextual_info[1]
        else:
            desc = contextual_info[0]
        weaviate_obj = {
            # "name": src_obj,
            "name": name,
            "image": poster_b64,  # Add the image in base64 encoding
            # "text": desc # Optional text field for image metadata
        }

        # The model provider integration will automatically vectorize the object
        batch.add_object(
            properties=weaviate_obj,
            # vector=vector  # Optionally provide a pre-obtained vector
        )


In [111]:
import weaviate.classes as wvc
from weaviate.classes.query import Move

collection = client.collections.get(collection_name)

response = collection.query.near_text(
    query="cat",
    distance=0.6,
    return_metadata=wvc.query.MetadataQuery(distance=True),
    limit=2
)
# print(response)
# print(response.objects)

for o in response.objects:
    print(o.properties)
    print(o.metadata)

{'text': None, 'name': 'pic1'}
MetadataReturn(creation_time=None, last_update_time=None, distance=0.29831671714782715, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None)
{'text': None, 'name': 'pic2'}
MetadataReturn(creation_time=None, last_update_time=None, distance=0.33747434616088867, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None)


Creating a collection in Weaviate that supports both Image and Text data <br>
Has 3 fields: <br>
"name", "image", "text" <br>
10%, 70%, 20% <br>
"name": name of image file <br>
"image": base64 encoding of image itself <br>
"text": any textual data or image metadata <br>

In [114]:
# Chunking
# Insert embedded text into weaviate

import weaviate.classes.config as wc

collection_name = "DemoCollection"  # Replace collection name here
collection_name_2 = "DemoCollection2"

# Check if the collection already exists and delete it if it does
if client.collections.exists(collection_name):
    client.collections.delete(collection_name)

# Check if the collection already exists and delete it if it does
if client.collections.exists(collection_name_2):
    client.collections.delete(collection_name_2)

client.collections.create(
        name=collection_name,
        properties=[
            wc.Property(name="name", data_type=wc.DataType.TEXT),
            wc.Property(name="image", data_type=wc.DataType.BLOB),
            wc.Property(name="text", data_type=wc.DataType.TEXT),
        ],
        # Define & configure the vectorizer module
        vectorizer_config=wc.Configure.Vectorizer.multi2vec_clip(
            image_fields=[wc.Multi2VecField(name="image", weight=0)],    # 70% of the vector is from the image
            text_fields=[wc.Multi2VecField(name="name", weight=0),       # 10% of the vector is from the name
                         wc.Multi2VecField(name="text", weight=1.0)],      # 20% of the vector is from the text
        ),
    )

client.collections.create(
        name=collection_name_2,
        properties=[
            wc.Property(name="name2", data_type=wc.DataType.TEXT),
            wc.Property(name="image2", data_type=wc.DataType.BLOB),
            wc.Property(name="text2", data_type=wc.DataType.TEXT),
        ],
        # Define & configure the vectorizer module
        vectorizer_config=wc.Configure.Vectorizer.multi2vec_clip(
            image_fields=[wc.Multi2VecField(name="image2", weight=0.1)],    # 70% of the vector is from the image
            text_fields=[wc.Multi2VecField(name="name2", weight=0.1),       # 10% of the vector is from the name
                         wc.Multi2VecField(name="text2", weight=0.8)],      # 20% of the vector is from the text
        ),
    )

print(collection_name + " created")
print(collection_name_2 + " created")


AttributeError: module 'weaviate.classes.config' has no attribute 'Multi2VecqField'

In [50]:
import base64
import os
import requests

# Helper function to convert file to base64 representation
def to_base64(url_or_path):
    if url_or_path.startswith('http://') or url_or_path.startswith('https://'):
        # Handle URL
        image_response = requests.get(url_or_path)
        content = image_response.content
    elif os.path.exists(url_or_path):
        # Handle local file path
        with open(url_or_path, 'rb') as image_file:
            content = image_file.read()
    else:
        raise ValueError("The provided string is neither a valid URL nor a local file path.")
    
    return base64.b64encode(content).decode("utf-8")


Downloading images using simple image search

In [4]:
import argparse
from simple_image_download import simple_image_download as simp
from PIL import Image
import os

def image_download(query, number=5):
    # Initialize the argument parser
    parser = argparse.ArgumentParser(description="Download images.")
    parser.add_argument('query', type=str, help='Search query for images')
    parser.add_argument('number', type=int, nargs='?', default=5, help='Number of images to download (default: 5)')
    args = parser.parse_args([query, str(number)])

    # Extract arguments
    search_query = args.query
    num_images = args.number

    # Initialize the simple image download instance
    response = simp.simple_image_download

    # Specify the directory where images will be downloaded
    download_directory = 'downloads'

    # Download images for the specified query
    response().download(search_query, num_images)  # Download images for the search query

# Example usage
image_download("cat", 10)




Insert images into Weaviate

In [75]:
collection = client.collections.get(collection_name)
collection_2 = client.collections.get(collection_name_2)

directory = "./simple_images/test_data/"
source = os.listdir(directory)
contextual_info = ["line graph, seasonal rainfall, pre-monsoon, monsoon, post-monsoon, 1971-2001",
                   "bar graph, mobile accesories orders analysis, pouches, holsters, shells, skins, fitted cases, bumpers, flip cases, sleeves",
                   "yearly expenses, insurance, utilities, groceries",
                   "regional review north america, wesley foods international, underlying revenue, organic revenue growth, underlying operating profit, underlying operating margin",
                   "ethnicity of students, asian, white, hispanic, other, black, filipino, pacific islander, native american",
                   ]

with collection.batch.dynamic() as batch:
    for x in range(len(source)):
    # for src_obj in source:
        src_obj = source[x]
        path = directory + src_obj
        poster_b64 = to_base64(path)
        weaviate_obj = {
            # "name": src_obj,
            "name": os.path.splitext(src_obj)[0],
            "image": poster_b64,  # Add the image in base64 encoding
            "text": contextual_info[x] # Optional text field for image metadata
        }

        # The model provider integration will automatically vectorize the object
        batch.add_object(
            properties=weaviate_obj,
            # vector=vector  # Optionally provide a pre-obtained vector
        )

with collection_2.batch.dynamic() as batch:
    # for src_obj in source:
    for x in range(len(source)):
        src_obj = source[x]
        path = directory + src_obj
        poster_b64 = to_base64(path)
        weaviate_obj = {
            "name2": os.path.splitext(src_obj)[0],
            "image2": poster_b64,  # Add the image in base64 encoding
            "text2": contextual_info[x] # Optional text field for image data
        }

        # The model provider integration will automatically vectorize the object
        batch.add_object(
            properties=weaviate_obj,
            # vector=vector  # Optionally provide a pre-obtained vector
        )
print("images added")

images added


Insert text into Weaviate

In [21]:
collection = client.collections.get(collection_name)

text_list = ["Cats in fact do not like dogs because they are enemies",
             "Eggplant is a natural enemy of the cat",
             "Cats love fish",
             "Monsters are enemies of battle cats",  
             "cats consider other cats to be their friends",
             "cats like hamsters",      
             ]

with collection.batch.dynamic() as batch:
    for text in text_list:
        weaviate_obj = {
            "text": text
        }

        # The model provider integration will automatically vectorize the object
        batch.add_object(
            properties=weaviate_obj,
            # vector=vector  # Optionally provide a pre-obtained vector
        )

Show objects in collection using Weaviate API

In [87]:
import requests
import json

# Define the Weaviate URL with query parameters
class_name = collection_name
limit = 10
url = f"http://localhost:8080/v1/objects?class={class_name}&limit={limit}"

# Send a GET request to fetch the objects
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    objects = response.json()
    
    # Pretty-print the JSON response
    print(json.dumps(objects, indent=4))
else:
    # Print an error message if the request was not successful
    print(f"Failed to retrieve objects. Status code: {response.status_code}")
    print(response.text)

{
    "deprecations": [],
    "objects": [
        {
            "class": "Test",
            "creationTimeUnix": 1720504578103,
            "id": "ca429962-dc26-4f35-af9f-4f22ac60a5ed",
            "lastUpdateTimeUnix": 1720504578103,
            "properties": {
                "image": "/9j/4AAQSkZJRgABAQEAYABgAAD//gA7Q1JFQVRPUjogZ2QtanBlZyB2MS4wICh1c2luZyBJSkcgSlBFRyB2NjIpLCBxdWFsaXR5ID0gODIK/9sAQwAGBAQFBAQGBQUFBgYGBwkOCQkICAkSDQ0KDhUSFhYVEhQUFxohHBcYHxkUFB0nHR8iIyUlJRYcKSwoJCshJCUk/9sAQwEGBgYJCAkRCQkRJBgUGCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQk/8AAEQgBkAJYAwEiAAIRAQMRAf/EAB8AAAEFAQEBAQEBAAAAAAAAAAABAgMEBQYHCAkKC//EALUQAAIBAwMCBAMFBQQEAAABfQECAwAEEQUSITFBBhNRYQcicRQygZGhCCNCscEVUtHwJDNicoIJChYXGBkaJSYnKCkqNDU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6g4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2drh4uPk5ebn6Onq8fLz9PX29/j5+v/EAB8BAAMBAQEBAQEBAQEAAAAAAAABAgMEBQYHCAkKC//EALURAAIBAgQEAwQHBQQEAAECdwABAgMRBAUhMQYSQVEHYXETIjKBCBRCka

Iterate through collection and print items

In [113]:
collection = client.collections.get(collection_name)

for item in collection.iterator(
    include_vector=True  # If using named vectors, you can specify ones to include e.g. ['title', 'body'], or True to include all
):
    print(item.properties)
    print(item.vector)

print("------")
collection_2 = client.collections.get(collection_name_2)

for item in collection_2.iterator(
    include_vector=True  # If using named vectors, you can specify ones to include e.g. ['title', 'body'], or True to include all
):
    print(item.properties)
    print(item.vector)


{'text': None, 'name': 'pic1'}
{'default': [-0.14694121479988098, -0.08753138780593872, 0.04560935124754906, 0.022989943623542786, -0.0721621960401535, -0.1612728238105774, 0.036386746913194656, -0.25621387362480164, 0.1698022335767746, 0.09545524418354034, 0.09133419394493103, -0.25675517320632935, 0.16637466847896576, -0.2970828711986542, 0.3923320770263672, -0.11703824996948242, -0.17914043366909027, 0.008902385830879211, 0.09785392135381699, 0.3329892158508301, -0.22933603823184967, 0.1738475263118744, 0.3152844309806824, -0.256892591714859, -0.20772366225719452, 0.04933950677514076, 0.060491710901260376, 0.07387083023786545, -0.07413393259048462, 0.14567531645298004, -0.07975950837135315, -0.10739270597696304, 0.14132781326770782, -0.08863770961761475, -0.302604615688324, 0.0985693484544754, 0.05533578246831894, 0.01259779091924429, 0.21580664813518524, 0.2996918261051178, -0.31450700759887695, -0.3523692190647125, -0.1742962747812271, -0.08411388844251633, -0.05883108824491501, -

Sample nearText query

In [83]:
import weaviate.classes as wvc
from weaviate.classes.query import Move

collection = client.collections.get(collection_name)

response = collection.query.near_text(
    query="bar graph",
    distance=0.6,
    return_metadata=wvc.query.MetadataQuery(distance=True),
    limit=2
)
# print(response)
# print(response.objects)

for o in response.objects:
    print(o.properties)
    print(o.metadata)

print("----------")
collection_2 = client.collections.get(collection_name_2)

response = collection_2.query.near_text(
    query="bar graph",
    distance=0.6,
    return_metadata=wvc.query.MetadataQuery(distance=True),
    limit=2
)

for o in response.objects:
    print(o.properties)
    print(o.metadata)

QueryReturn(objects=[Object(uuid=_WeaviateUUIDInt('7155cb5f-9f3c-441f-a99d-1efa45e1617b'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.5686601400375366, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'text': 'bar graph, mobile accesories orders analysis, pouches, holsters, shells, skins, fitted cases, bumpers, flip cases, sleeves', 'name': 'Mobile Accessories Orders Analysis'}, references=None, vector={}, collection='DemoCollection'), Object(uuid=_WeaviateUUIDInt('4be11d39-ac16-4ae8-9ca0-786f64d2594a'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.5763142704963684, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'text': 'yearly expenses, insurance, utilities, groceries', 'name': 'yearly expenses'}, references=None, vector={}, collection='DemoCollection')])
[Object(uuid=_WeaviateUUIDInt('7155cb5f-9f3c-441f-a99d-1efa45

In [31]:
import weaviate.classes as wvc
from weaviate.classes.query import Move

collection = client.collections.get(collection_name)

response = collection.query.near_text(
    query="pie chart",
    distance=0.6,
    # move_to=Move(force=0.85, concepts="enemy"),
    # move_away=Move(force=0.45, concepts="friends"),
    return_metadata=wvc.query.MetadataQuery(distance=True),
    limit=2
)

for o in response.objects:
    print(o.properties)
    print(o.metadata)

{'text': None, 'name': 'chart2.png'}
MetadataReturn(creation_time=None, last_update_time=None, distance=0.3431311845779419, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None)
{'text': None, 'name': 'chart1.png'}
MetadataReturn(creation_time=None, last_update_time=None, distance=0.37204670906066895, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None)
