In [1]:
#pip install fastapi uvicorn pymongo sentence-transformers

In [2]:
#pip install tf_keras

In [4]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from pymongo import MongoClient
from sentence_transformers import SentenceTransformer
import numpy as np
app = FastAPI()

# Initialize MongoDB client and connect to the database
client = MongoClient("mongodb://localhost:27017/documentsearch")
db = client.document_db
collection = db.documents
# Load the LLM model for embeddings (using a sentence-transformer model)
model = SentenceTransformer('all-MiniLM-L6-v2')

class Document(BaseModel):
    title: str
    content: str

class SearchQuery(BaseModel):
    query: str

# Helper function to calculate cosine similarity
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Add a document to the database
@app.post("/documents/")
async def add_document(doc: Document):
    # Generate embedding for the document content
    embedding = model.encode(doc.content).tolist()

    # Insert document into MongoDB
    collection.insert_one({
        "title": doc.title,
        "content": doc.content,
        "embedding": embedding
    })

    return {"message": "Document added successfully"}

# Search for documents based on a query
@app.post("/search/")
async def search_documents(query: SearchQuery):
    # Generate embedding for the search query
    query_embedding = model.encode(query.query).tolist()

    # Retrieve all documents from the database
    documents = list(collection.find({}))

    if not documents:
        raise HTTPException(status_code=404, detail="No documents found")

    # Calculate similarity scores for each document
    results = []
    for doc in documents:
        similarity = cosine_similarity(query_embedding, doc['embedding'])
        results.append({"title": doc['title'], "content": doc['content'], "similarity": similarity})

    # Sort the results by similarity
    results.sort(key=lambda x: x['similarity'], reverse=True)

    return results[:10]  # Return top 10 matching documents




In [None]:
import asyncio
import uvicorn

if __name__ == "__main__":
    #uvicorn.run(app, host="127.0.0.1", port=8005)
    config = uvicorn.Config(app, host="127.0.0.1", port=8005)
    server = uvicorn.Server(config)
    await server.serve()

INFO:     Started server process [21480]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8005 (Press CTRL+C to quit)


INFO:     127.0.0.1:65472 - "GET / HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:65472 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:65519 - "GET /search/test HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:65532 - "GET /search/query/test HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:65532 - "GET /search/query/test HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:49191 - "GET / HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:49280 - "POST /search/ HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:49280 - "GET /search/ HTTP/1.1" 405 Method Not Allowed
INFO:     127.0.0.1:49294 - "GET /search/query/FastAPI HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:49313 - "POST /search/query/FastAPI HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:49317 - "POST /search HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:49317 - "POST /search/ HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:49319 - "POST /search HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:49319 - "POST /search/ HTTP/1.1" 422 Unprocessable E