In [1]:
import chromadb

# Creating Vector Database
client = chromadb.PersistentClient()

In [2]:
collection = client.get_or_create_collection(name="vdb_collection", metadata={"hnsw:space": "cosine"})

In [5]:
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Loading and Chunking
loader = PyMuPDFLoader("./zfc_guide.pdf")
pages = loader.load()

document = ""
for i in range(len(pages)):
    document += pages[i].page_content

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    model_name="gpt-4",
    chunk_size=800,
    chunk_overlap=400,
)

chunks = text_splitter.split_text(document)

In [8]:
# Insert Chunks into ChromaDB Collection
i = 0
for chunk in chunks:
    collection.add(
    documents=[chunk],
    ids=[f"chunk_{i}"]
    )
    i += 1

In [3]:
from fastapi import FastAPI
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Adjust to restrict origins if needed
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Define a request model
class QueryRequest(BaseModel):
    query: str

# Define the query endpoint
@app.post("/query")
async def query_chroma(request: QueryRequest):
    # Perform the query on your ChromaDB collection
    results = collection.query(query_texts=[request.query], n_results=3)
    return {"results": results['documents'][0]}

In [4]:
import uvicorn
import threading

def run_api():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Run the FastAPI app in a background thread
thread = threading.Thread(target=run_api)
thread.start()

INFO:     Started server process [58046]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:53001 - "OPTIONS /query HTTP/1.1" 200 OK
INFO:     127.0.0.1:53001 - "POST /query HTTP/1.1" 200 OK
INFO:     127.0.0.1:53287 - "POST /query HTTP/1.1" 200 OK


In [5]:
import requests

response = requests.post("http://localhost:8000/query", json={"query": "lenses"})
print(response.json())

INFO:     127.0.0.1:52750 - "POST /query HTTP/1.1" 200 OK
INFO:     127.0.0.1:53499 - "OPTIONS /query HTTP/1.1" 405 Method Not Allowed
INFO:     127.0.0.1:53683 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:53683 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     127.0.0.1:53723 - "POST /query HTTP/1.1" 200 OK
