## Dependencies

In [8]:
from dotenv import load_dotenv
import os
load_dotenv(verbose=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
cohere_api_key = os.getenv('COHERE_API_KEY')
weaviate_api_key = os.getenv('weaviate_api_key')
weaviate_url = os.getenv('weaviate_url')

In [9]:
import weaviate
import json

client = weaviate.Client(
  url=weaviate_url,  # URL of your Weaviate instance
  auth_client_secret=weaviate.AuthApiKey(api_key=weaviate_api_key), # (Optional) If the Weaviate instance requires authentication
  additional_headers={
    "X-OpenAI-Api-Key": openai_api_key, # Replace with your OpenAI key
  }
)

client.schema.get()  # Get the schema to test connection

{'classes': [{'class': 'JeopardyQuestion',
   'description': 'List of jeopardy questions',
   'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
    'cleanupIntervalSeconds': 60,
    'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
   'moduleConfig': {'generative-openai': {'model': 'gpt-3.5-turbo'},
    'text2vec-openai': {'model': 'ada',
     'modelVersion': '002',
     'type': 'text',
     'vectorizeClassName': True}},
   'properties': [{'dataType': ['text'],
     'description': 'Category of the question',
     'indexFilterable': True,
     'indexSearchable': True,
     'moduleConfig': {'text2vec-openai': {'skip': False,
       'vectorizePropertyName': False}},
     'name': 'category',
     'tokenization': 'word'},
    {'dataType': ['text'],
     'description': 'The question',
     'indexFilterable': True,
     'indexSearchable': True,
     'moduleConfig': {'text2vec-openai': {'skip': False,
       'vectorizePropertyName': False}},
     'name': 'question',
  

## Schema

In [10]:
# resetting the schema. CAUTION: THIS WILL DELETE YOUR DATA 
client.schema.delete_all()

schema = {
   "classes": [
       {
           "class": "JeopardyQuestion",
           "description": "List of jeopardy questions",
           "vectorizer": "text2vec-openai",
           "moduleConfig": { # specify the model you want to use
               "generative-openai": { 
                    "model": "gpt-3.5-turbo",  # Optional - Defaults to `gpt-3.5-turbo`
                }
           },
           "properties": [
               {
                  "name": "Category",
                  "dataType": ["text"],
                  "description": "Category of the question",
               },
               {
                  "name": "Question",
                  "dataType": ["text"],
                  "description": "The question",
               },
               {
                  "name": "Answer",
                  "dataType": ["text"],
                  "description": "The answer",
                }
            ]
        }
    ]
}

client.schema.create(schema)

print("Successfully created the schema.")

Successfully created the schema.


## Import the Data

In [15]:
import requests
url = 'https://raw.githubusercontent.com/weaviate/weaviate-examples/main/jeopardy_small_dataset/jeopardy_tiny.json'
resp = requests.get(url)
data = json.loads(resp.text)

if client.is_ready():

# Configure a batch process
  with client.batch as batch:
      batch.batch_size=100
      # Batch import all Questions
      for i, d in enumerate(data):
          print(f"importing question: {i+1}")

          properties = {
              "answer": d["Answer"],
              "question": d["Question"],
              "category": d["Category"],
          }

          client.batch.add_data_object(properties, "JeopardyQuestion")
else:
  print("The Weaviate cluster is not connected.")

importing question: 1
importing question: 2
importing question: 3
importing question: 4
importing question: 5
importing question: 6
importing question: 7
importing question: 8
importing question: 9
importing question: 10


## Generative Search Queries

### Single Result

Single Result makes a generation for each individual search result. 

In the below example, I want to create a Facebook ad from the Jeopardy question about Elephants. 

In [16]:
generatePrompt = "Turn the following Jeogrady question into a Facebook Ad: {question}"

result = (
  client.query
  .get("JeopardyQuestion", ["question"])
  .with_generate(single_prompt = generatePrompt)
  .with_near_text({
    "concepts": ["Elephants"]
  })
  .with_limit(1)
).do()

print(json.dumps(result, indent=1))

{
 "data": {
  "Get": {
   "JeopardyQuestion": [
    {
     "_additional": {
      "generate": {
       "error": null,
       "singleResult": "Attention animal lovers! Did you know that there is only one living mammal in the order Proboseidea? Discover more fascinating facts about this unique creature on Jeogrady. Click now to learn more! \ud83d\udc18\ud83c\udf0d #Jeogrady #AnimalFacts #Proboseidea #Mammals #Wildlife #NatureLovers"
      }
     },
     "question": "It's the only living mammal in the order Proboseidea"
    }
   ]
  }
 }
}


### Grouped Result

Grouped Result generates a single response from all the search results. 

The below example is creating a Facebook ad from the 3 retrieved Jeoprady questions about animals. 

In [17]:
generateTask = "Explain why these Jeopardy questions are under the Animals category."

result = (
  client.query
  .get("JeopardyQuestion", ["question"])
  .with_generate(grouped_task = generateTask)
  .with_near_text({
    "concepts": ["Animals"]
  })
  .with_limit(3)
).do()

print(json.dumps(result, indent=1))

{
 "data": {
  "Get": {
   "JeopardyQuestion": [
    {
     "_additional": {
      "generate": {
       "error": null,
       "groupedResult": "These Jeopardy questions are under the Animals category because they all relate to different types of animals. The first two questions specifically mention the elephant, which is a well-known animal that falls under the category of mammals. The third question mentions the gavial, which is a type of crocodile, and asks about a specific bodily feature, which is also related to the study of animals. Therefore, all three questions are related to the study of animals and are appropriately categorized under the Animals category."
      }
     },
     "question": "It's the only living mammal in the order Proboseidea"
    },
    {
     "_additional": {
      "generate": null
     },
     "question": "It's the only living mammal in the order Proboseidea"
    },
    {
     "_additional": {
      "generate": null
     },
     "question": "The gavial looks