In [1]:
import weaviate

client = weaviate.connect_to_local()

print(client.is_ready())  # Should print: `True`

client.close()  

True


In [2]:
import weaviate
from weaviate.classes.config import Configure

client = weaviate.connect_to_local()

questions = client.collections.create(
    name="Question",
    vectorizer_config=Configure.Vectorizer.text2vec_ollama(     # Configure the Ollama embedding integration
        api_endpoint="http://host.docker.internal:11434",       # Allow Weaviate from within a Docker container to contact your Ollama instance
        model="nomic-embed-text",                               # The model to use
    ),
    generative_config=Configure.Generative.ollama(              # Configure the Ollama generative integration
        api_endpoint="http://host.docker.internal:11434",       # Allow Weaviate from within a Docker container to contact your Ollama instance
        model="llama3.2",                                       # The model to use
    )
)

client.close()  # Free up resources

/Users/moraish/Desktop/ams691/project_llm/.venv/lib/python3.9/site-packages/weaviate/collections/classes/config.py:1950: PydanticDeprecatedSince211: Accessing the 'model_fields' attribute on the instance is deprecated. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  for cls_field in self.model_fields:


In [4]:
import weaviate
import requests, json

client = weaviate.connect_to_local()

resp = requests.get(
    "https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json"
)
data = json.loads(resp.text)

questions = client.collections.get("Question")

with questions.batch.rate_limit(requests_per_minute=200) as batch:
    for d in data:
        batch.add_object(
            {
                "answer": d["Answer"],
                "question": d["Question"],
                "category": d["Category"],
            }
        )
        if batch.number_errors > 10:
            print("Batch import stopped due to excessive errors.")
            break

failed_objects = questions.batch.failed_objects
if failed_objects:
    print(f"Number of failed imports: {len(failed_objects)}")
    print(f"First failed object: {failed_objects[0]}")

client.close()  # Free up resources

In [6]:
import weaviate
import json

client = weaviate.connect_to_local()

questions = client.collections.get("Question")

response = questions.query.near_text(
    query="air",
    limit=2
)

for obj in response.objects:
    print(json.dumps(obj.properties, indent=2))

client.close()  # Free up resources

{
  "answer": "Sound barrier",
  "question": "In 70-degree air, a plane traveling at about 1,130 feet per second breaks it",
  "category": "SCIENCE"
}
{
  "answer": "the atmosphere",
  "question": "Changes in the tropospheric layer of this are what gives us weather",
  "category": "SCIENCE"
}


In [1]:
import weaviate

client = weaviate.connect_to_local()

questions = client.collections.get("Question")

response = questions.generate.near_text(
    query="biology",
    limit=2,
    grouped_task="Write a tweet with emojis about these facts."
)

print(response.generated)  # Inspect the generated text

client.close()  # Free up resources

"Did you know? 🤔🧬 DNA is the molecule that holds our genes! 💡 And, did you know that the liver 👍 plays a crucial role in regulating blood sugar levels by removing excess glucose and storing it as glycogen? 🔄💪 Mind. Blown. #ScienceFacts #DNA #Liver"


In [1]:
# Testing

# 1. View Scheam for a collection

import weaviate
import json

client = weaviate.connect_to_local()
questions = client.collections.get("Question")
config = questions.config.get()

print(json.dumps(config.to_dict(), indent=4))

client.close()

{
    "invertedIndexConfig": {
        "bm25": {
            "b": 0.75,
            "k1": 1.2
        },
        "cleanupIntervalSeconds": 60,
        "indexNullState": false,
        "indexPropertyLength": false,
        "indexTimestamps": false,
        "stopwords": {
            "preset": "en"
        }
    },
    "multiTenancyConfig": {
        "enabled": false,
        "autoTenantCreation": false,
        "autoTenantActivation": false
    },
    "properties": [
        {
            "name": "category",
            "description": "This property was generated by Weaviate's auto-schema feature on Thu Apr 24 01:42:36 2025",
            "dataType": [
                "text"
            ],
            "indexFilterable": true,
            "indexSearchable": true,
            "indexRangeFilters": false,
            "tokenization": "word",
            "moduleConfig": {
                "text2vec-ollama": {
                    "skip": false,
                    "vectorizePropertyName": false


In [None]:
import weaviate, json

client = weaviate.connect_to_local()
papers = client.collections.get("ResearchPapers")

response = papers.query.near_text(
    query="atlas",
    limit=2,
    return_metadata=["distance"],      # optional extras
    return_properties=["paper_title", "chunk_text"]
)

print(response)


QueryReturn(objects=[])


In [9]:
print(response)

QueryReturn(objects=[])


In [None]:
papers = client.collections.get('ResearchPapers')
vector_names = ['paper_title', 'chunk_text']

data_object = papers.query.fetch_object_by_id(
    uuid=obj_uuid,  # Object UUID
    include_vector=vector_names  # Specify names of the vectors to include
)

# The vectors are returned in the `vector` property as a dictionary
for n in vector_names:
    print(f"Vector '{n}': {data_object.vector[n][:5]}...")

In [12]:
response = client.collections.list_all(simple=False)

print(response)

{'ResearchPapers': _CollectionConfig(name='ResearchPapers', description=None, generative_config=_GenerativeConfig(generative=<GenerativeSearches.OLLAMA: 'generative-ollama'>, model={'apiEndpoint': 'http://host.docker.internal:11434', 'model': 'llama3'}), inverted_index_config=_InvertedIndexConfig(bm25=_BM25Config(b=0.75, k1=1.2), cleanup_interval_seconds=60, index_null_state=False, index_property_length=False, index_timestamps=False, stopwords=_StopwordsConfig(preset=<StopwordsPreset.EN: 'en'>, additions=None, removals=None)), multi_tenancy_config=_MultiTenancyConfig(enabled=False, auto_tenant_creation=False, auto_tenant_activation=False), properties=[_Property(name='chunk_text', description='The text content of the paper chunk', data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=False)