In [3]:
import weaviate

client = weaviate.connect_to_local(headers={
        "X-OpenAI-Api-Key": 'e9fd8fbff4834785a83abb9650d7aa07'
    })  # Connect with default parameters

WeaviateConnectionError: Connection to Weaviate failed. Details: Error: All connection attempts failed. 
Is Weaviate running and reachable at http://localhost:8080?

In [None]:
import os

def load_text_files_from_folder(folder_path):
    docs = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            with open(os.path.join(folder_path, filename), "r", encoding="utf-8") as f:
                docs.append({"filename": filename, "content": f.read()})
    return docs

def chunk_text(text, max_chunk_size=512):
    words = text.split()
    return [" ".join(words[i:i+max_chunk_size]) for i in range(0, len(words), max_chunk_size)]


In [None]:
class_name = "DocumentChunk"

if not client.schema.contains({"class": class_name}):
    client.schema.create_class({
        "class": class_name,
        "vectorizer": "text2vec-openai",  # Or "none" if you manually embed
        "properties": [
            {"name": "content", "dataType": ["text"]},
            {"name": "filename", "dataType": ["text"]}
        ]
    })


In [None]:
from openai import OpenAI
openai_client = OpenAI(api_key="your-openai-api-key")

def embed_text(text):
    response = openai_client.embeddings.create(
        model="text-embedding-ada-002",
        input=text
    )
    return response.data[0].embedding


In [None]:
def store_chunks_in_weaviate(docs):
    for doc in docs:
        chunks = chunk_text(doc["content"])
        for chunk in chunks:
            vector = embed_text(chunk)  # Skip if using internal vectorizer
            client.data_object.create(
                {
                    "content": chunk,
                    "filename": doc["filename"]
                },
                class_name=class_name,
                vector=vector  # omit if using Weaviate’s vectorizer
            )


In [None]:
def query_weaviate(query_text, top_k=5):
    query_vector = embed_text(query_text)  # again, skip if using Weaviate’s vectorizer
    response = client.query.get(class_name, ["content", "filename"])\
        .with_near_vector({"vector": query_vector})\
        .with_limit(top_k)\
        .do()

    return response["data"]["Get"][class_name]


In [None]:
docs = load_text_files_from_folder("path/to/your/folder")
store_chunks_in_weaviate(docs)

results = query_weaviate("What is the refund policy?")
for res in results:
    print(f"\n📄 File: {res['filename']}\n---\n{res['content']}")
