Read here https://towardsdatascience.com/getting-started-with-weaviate-a-beginners-guide-to-search-with-vector-databases-14bbb9285839

In [53]:
pip install weaviate-client

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [44]:
import weaviate
auth_config = weaviate.AuthApiKey(api_key="CywsEHTFXp0kSNMlWi3LPU3hPdPtQrbQebcE")  # Replace w/ your Weaviate instance API key

# Instantiate the client
client = weaviate.Client(
    url="https://thomas-weaviate-learn-16v2jei3.weaviate.network", # Replace w/ your Weaviate cluster URL
    auth_client_secret=auth_config,
    additional_headers={
        "X-OpenAI-Api-Key": "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", # Replace with your OpenAI key
        }
)

print(client.is_ready())


            Consider upgrading to the new and improved v4 client instead!
            See here for usage: https://weaviate.io/developers/weaviate/client-libraries/python
            


True


In [45]:
import pandas as pd

df = pd.read_csv("jeopardy_questions.csv", nrows = 100)

In [46]:
class_obj = {
    # Class definition
    "class": "JeopardyQuestion",

    # Property definitions
    "properties": [
        {
            "name": "category",
            "dataType": ["text"],
        },
        {
            "name": "question",
            "dataType": ["text"],
        },
        {
            "name": "answer",
            "dataType": ["text"],
        },
    ],

    # Specify a vectorizer
    "vectorizer": "text2vec-openai",

    # Module settings
    "moduleConfig": {
        "text2vec-openai": {
            "vectorizeClassName": False,
            "model": "ada",
            "modelVersion": "002",
            "type": "text"
        },
        "qna-openai": {
          "model": "text-davinci-003"
        },
         "generative-openai": {
          "model": "gpt-3.5-turbo"
        }
    },
}

if client.schema.exists("JeopardyQuestion"):
    client.schema.delete_class("JeopardyQuestion")


client.schema.create_class(class_obj)
print(client.schema.get("JeopardyQuestion"))



{'class': 'JeopardyQuestion', 'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2}, 'cleanupIntervalSeconds': 60, 'stopwords': {'additions': None, 'preset': 'en', 'removals': None}}, 'moduleConfig': {'generative-openai': {'model': 'gpt-3.5-turbo'}, 'qna-openai': {'model': 'text-davinci-003'}, 'text2vec-openai': {'baseURL': 'https://api.openai.com', 'model': 'ada', 'modelVersion': '002', 'type': 'text', 'vectorizeClassName': False}}, 'multiTenancyConfig': {'enabled': False}, 'properties': [{'dataType': ['text'], 'indexFilterable': True, 'indexSearchable': True, 'moduleConfig': {'text2vec-openai': {'skip': False, 'vectorizePropertyName': False}}, 'name': 'category', 'tokenization': 'word'}, {'dataType': ['text'], 'indexFilterable': True, 'indexSearchable': True, 'moduleConfig': {'text2vec-openai': {'skip': False, 'vectorizePropertyName': False}}, 'name': 'question', 'tokenization': 'word'}, {'dataType': ['text'], 'indexFilterable': True, 'indexSearchable': True, 'moduleConfig': {'text2

In [47]:
from weaviate.util import generate_uuid5

with client.batch(
    batch_size=200,  # Specify batch size
    num_workers=2,   # Parallelize the process
) as batch:
    for _, row in df.iterrows():
        question_object = {
            "category": row.category,
            "question": row.question,
            "answer": row.answer,
        }
        batch.add_data_object(
            question_object,
            class_name="JeopardyQuestion",
            uuid=generate_uuid5(question_object)
        )
        

            Please instead use the `client.batch.configure()` method to configure your batch and `client.batch` to enter the context manager.
            See https://weaviate.io/developers/weaviate/client-libraries/python for details.


In [48]:
count = client.query.aggregate("JeopardyQuestion").with_meta_count().do()
print(count)

{'data': {'Aggregate': {'JeopardyQuestion': [{'meta': {'count': 100}}]}}}


In [49]:
import json

res = client.query.get("JeopardyQuestion", 
                      ["question", "answer", "category"])\
                  .with_additional(["id", "vector"])\
                  .with_limit(2)\
                  .do()

print(json.dumps(res, indent=4))

{
    "data": {
        "Get": {
            "JeopardyQuestion": [
                {
                    "_additional": {
                        "id": "038a5b5c-4b36-5ac3-9bb1-87f02d38a2e1",
                        "vector": [
                            -0.024166431,
                            0.009845334,
                            0.010571133,
                            -0.0036625986,
                            -0.012741811,
                            0.019314328,
                            -0.022983646,
                            0.004774819,
                            -0.017956814,
                            -0.030671744,
                            0.040107135,
                            0.0136826625,
                            0.003430746,
                            0.013050948,
                            -0.023198698,
                            0.0034811487,
                            0.0125670815,
                            0.015591245,
                       

In [50]:
# Vector search test

res = client.query.get(
    "JeopardyQuestion",
    ["question", "answer", "category"])\
    .with_near_text({"concepts": "animals"})\
    .with_limit(2)\
    .do()
print(json.dumps(res, indent=2))


{
  "data": {
    "Get": {
      "JeopardyQuestion": [
        {
          "answer": "an octopus",
          "category": "SEE & SAY",
          "question": "Say the name of <a href=\"http://www.j-archive.com/media/2010-07-06_DJ_26.jpg\" target=\"_blank\">this</a> type of mollusk you see"
        },
        {
          "answer": "the ant",
          "category": "3-LETTER WORDS",
          "question": "In the title of an Aesop fable, this insect shared billing with a grasshopper"
        }
      ]
    }
  }
}


In [51]:
# Question and anwesering

ask = {
  "question": "Which animal was mentioned in the title of the Aesop fable?",
  "properties": ["answer"]
}

res = (
  client.query
  .get("JeopardyQuestion", [
      "question",
      "_additional {answer {hasAnswer property result} }"
  ])
  .with_ask(ask)
  .with_limit(1)
  .do()
)

print(json.dumps(res, indent=2))

{
  "data": {
    "Get": {
      "JeopardyQuestion": null
    }
  },
  "errors": [
    {
      "locations": [
        {
          "column": 6,
          "line": 1
        }
      ],
      "message": "explorer: get class: extend: extend answer: connection to: OpenAI API failed with status: 404 error: The model `text-davinci-003` has been deprecated, learn more here: https://platform.openai.com/docs/deprecations",
      "path": [
        "Get",
        "JeopardyQuestion"
      ]
    }
  ]
}


In [52]:
## Generative AI

res = client.query.get(
    "JeopardyQuestion", 
    ["question", "answer"])\
  .with_near_text({"concepts": ["animals"]})\
  .with_limit(1)\
  .with_generate(single_prompt= "Generate a question to which the answer is {answer}")\
  .do()

print(json.dumps(res, indent=2))

{
  "data": {
    "Get": {
      "JeopardyQuestion": [
        {
          "_additional": {
            "generate": {
              "error": null,
              "singleResult": "What sea creature has eight arms and can change color to blend in with its surroundings?"
            }
          },
          "answer": "an octopus",
          "question": "Say the name of <a href=\"http://www.j-archive.com/media/2010-07-06_DJ_26.jpg\" target=\"_blank\">this</a> type of mollusk you see"
        }
      ]
    }
  }
}
