<a href="https://colab.research.google.com/github/mohan22iitk/Giva_Assignment/blob/main/app_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
!pip install faiss-cpu



In [15]:
!pip install fastapi



In [16]:
!pip install uvicorn



In [17]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from fastapi import FastAPI, Query
from pydantic import BaseModel
import uvicorn

In [18]:
#dataset of news articles
df = pd.read_csv("Articles.csv", encoding="ISO-8859-1")
documents = df["Article"].dropna().astype(str).tolist()

In [19]:
#hugging face for embeddings
model = SentenceTransformer("all-MiniLM-L6-v2")

# Converting documents into embeddings
document_embeddings = model.encode(documents, convert_to_tensor=False)

In [20]:
# FAISS Index (Default: L2 Distance)
dimension = len(document_embeddings[0])
index = faiss.IndexFlatL2(dimension)
index.add(np.array(document_embeddings))

In [21]:
# FastAPI app
app = FastAPI()

# Similarity options
SIMILARITY_METRICS = ["l2", "cosine", "dot"]

class QueryInput(BaseModel):
    q: str
    metric: str = "cosine"

@app.get("/")
def home():
    return {"message": "Welcome to the Document Similarity API!"}

In [22]:
@app.post("/api/search")
def search(request: QueryInput):
    """Search for similar documents with different similarity metrics"""
    query_embedding = model.encode([request.q])

    if request.metric == "cosine":
        faiss.normalize_L2(query_embedding)  # Normalize for cosine similarity
        faiss.normalize_L2(document_embeddings)

    elif request.metric == "dot":
        query_embedding = np.array(query_embedding, dtype=np.float32)
        index_dot = faiss.IndexFlatIP(dimension)
        index_dot.add(np.array(document_embeddings))
        _, indices = index_dot.search(query_embedding, 5)

    else:  # Default: L2
        _, indices = index.search(np.array(query_embedding), 5)

    results = [documents[i] for i in indices[0]]
    return {"query": request.q, "metric": request.metric, "results": results}

@app.post("/api/add_document")
def add_document(text: str):
    """Real-time indexing: Add a new document to the vector database"""
    documents.append(text)
    new_embedding = model.encode([text])
    index.add(np.array(new_embedding))
    return {"message": "Document added successfully!", "total_documents": len(documents)}

import nest_asyncio
import uvicorn

nest_asyncio.apply()

uvicorn.run(app, host="0.0.0.0", port=8000)


INFO:     Started server process [740]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [740]
