In [None]:
!pip install weaviate-client

## Deployment options
* [Weaviate Cloud Services (WCS)](https://weaviate.io/developers/wcs)
* [Local deployment with Docker](https://weaviate.io/developers/weaviate/installation/docker-compose)

### Saas Deployment

Head to [WCS](https://console.weaviate.cloud/), create an account and create a Weaviate instance.

This might take a few minutes. Once you create an instance, click on `details` to get your `Cluster URL`.

### Local Deployment

For local deployment, you can use Docker, see [docs](https://weaviate.io/developers/weaviate/installation/docker-compose).

Get the docker config, with the following command:

```
curl -o docker-compose.yml "https://configuration.weaviate.io/v2/docker-compose/docker-compose.yml?modules=standalone&runtime=docker-compose&weaviate_version=v1.18.2"
```

Then start it with:

```
docker-compose up -d
```

Your cluster url should be
```
url="http://localhost:8080",
```

In [2]:
## Connect to a WCS instance

import os
import weaviate

auth_config = weaviate.AuthBearerToken(
  access_token = os.getenv("AUTH_WRITE_KEY"),
  expires_in=3600, # in seconds, by default 60s
)
client = weaviate.Client(
    url="https://jbf9ymswtcqn0niqsdnobg.gcp.weaviate.cloud",
    auth_client_secret=auth_config
)

client.is_ready()


            Access to your weaviate instance is not possible after expiration and this client will return an
            authentication exception.

            Things to try:
            - You might need to enable refresh tokens in the settings of your authentication provider
            - You might need to send the correct scope. For some providers it needs to include "offline_access"
            


True

In [None]:
# Connect to a local instance

import os
import weaviate

client = weaviate.Client(
    url="http://localhost:8080"
)

client.is_ready()

In [10]:
# Only run this when you want to delete the collection and recreate it from scratch
# This will delete all data in the Images collection

# client.schema.delete_class("Images")

In [3]:
images_schema = {
    "class": "Images",
    "description": "Wiki Article",
    "vectorizer": "none",

    "vectorIndexConfig": {
        # I am not 100% what distance metrics Clip uses, I guess it is cosine
        # but if not, check the docs to select the right distance metric
        # https://weaviate.io/developers/weaviate/configuration/distances
        "distance": "cosine" 
    },
    "properties": [
        {
            "name": "my_id",
            "dataType": [ "int" ],
        },
        {
            "name": "name",
            "dataType": [ "string" ],
        },
        {
            "name": "url",
            "dataType": [ "string" ],
        },
    ]
}

# add the schema
client.schema.create_class(images_schema)
print("The collection schema has been created")

The collection schema has been created


In [None]:
### Step 1 - configure Weaviate Batch, which optimizes CRUD operations in bulk
# - batch size of 100 – you can increase this value to speed up the process, but how much depends on the size of vectors and the server specs
# feel free to experiment and increase
# - add timeout retries if something goes wrong

client.batch.configure(
    batch_size=100,
    timeout_retries=3,
)

In [8]:
print("Uploading data with vectors to Images collection...")

# here should be the code to load the data from a file, S3 bucket, etc

collection = [
    {
        "img_id": 1,
        "img_name": "Dancing dog",
        "img_url": "/img/dd.png",
        "img_vector": [0.1, 0.2, 0.3, 0.4]
    },{
        "img_id": 2,
        "img_name": "House with a tree",
        "img_url": "/img/hwat.png",
        "img_vector": [0.3, 0.5, 0.2, 0.3]
    }
]

counter=0

with client.batch as batch:
    for item in collection:
        
        # print update message every 100 objects        
        if (counter %100 == 0):
            print(f"Import {counter} / {len(collection)} ")
        
        properties = {
            "my_id": item["img_id"],
            "name": item["img_name"],
            "url": item["img_url"],
        }
        
        vector = item["img_vector"] # grab the vector embedding from your object
        
        batch.add_data_object(properties, "Images", None, vector)
        counter = counter+1

print(f"Importing ({len(collection)}) Images complete")  

Uploading data with vectors to Images collection...
Import 0 / 2 
Importing (2) Images complete


In [9]:
# Display the number of imported objects
result = (
    client.query.aggregate("Images")
    .with_meta_count()
    .do()
)
print("Object count: ", result["data"]["Aggregate"]["Images"])

Object count:  [{'meta': {'count': 2}}]
