In [2]:
#%%capture
#%pip install langchain langchain-community qdrant-client openai langchain-core tqdm pandas -U langchain-qdrant langchain-openai

In [3]:
import sys, os
SRC_PATH = os.path.abspath(os.path.join(os.getcwd(), ".."))
if SRC_PATH not in sys.path:
    sys.path.insert(0, SRC_PATH)
print(SRC_PATH)

/home/prashant-agrawal/Netflix_Project/src


In [5]:
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
import pandas as pd
import re
import os
from utils.qdrant_client_loader import get_qdrant_client, get_qdrant_collection_name
from utils.path_config import get_base_dir, get_data_path, get_qdrant_store_path

# 📁 Paths
BASE_DIR = get_base_dir()
print(f"Base Directory: {BASE_DIR}")

DATA_PATH = get_data_path()
print(f"CSV Path: {DATA_PATH}")

qdrant_store_path = get_qdrant_store_path()
print(f"Qdrant Local Path: {qdrant_store_path}")

COLLECTION_NAME = get_qdrant_collection_name()
print(f"Qdrant Collection Name: {COLLECTION_NAME}")
# Qdrant Client



Base Directory: /home/prashant-agrawal/Netflix_Project/src
CSV Path: /home/prashant-agrawal/Netflix_Project/src/Data/Enriched_Indian_Startup_Dataset.csv
Qdrant Local Path: /home/prashant-agrawal/Netflix_Project/src/database/qdrant_store_local_db/collection
Qdrant Collection Name: indian_startups


In [6]:

# 📊 Load and clean CSV
# Check if the file exists
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"The file at path '{DATA_PATH}' does not exist. Please verify the file path.")

df = pd.read_csv(DATA_PATH)
df.columns = [
    re.sub(r"[^a-zA-Z0-9_]", "", col.strip().lower().replace(" ", "_"))
    for col in df.columns
]
df = df.map(lambda x: x.strip() if isinstance(x, str) else x)

# 🧠 Prepare documents
columns_to_embed = df.columns.tolist()
docs = []
for _, row in df.iterrows():
    base_meta = row.to_dict()
    content = "\n".join(f"{col}: {row[col]}" for col in columns_to_embed if pd.notna(row[col]))
    docs.append(Document(page_content=content, metadata=base_meta))

# 🔐 Embedding model
embedding_model = OpenAIEmbeddings()



# 🔄 Load into Qdrant
qdrant_store = Qdrant.from_documents(
    documents=docs,
    embedding=embedding_model,
    path=qdrant_store_path,  # ✅ Don't pass `client=`
    collection_name=COLLECTION_NAME,
    distance_func="Cosine"
)

print(f"✅ Stored {len(docs)} documents in Qdrant collection: '{COLLECTION_NAME}'")


✅ Stored 500 documents in Qdrant collection: 'indian_startups'


In [7]:
## -- Query Qdrant Collection -- ##

from langchain_qdrant import QdrantVectorStore
from langchain_openai import OpenAIEmbeddings
from qdrant_client import QdrantClient
from utils.qdrant_client_loader import get_qdrant_client
import os

query = "Top funded fintech startups in Bengaluru"

# Create embedding model
embedding_model = OpenAIEmbeddings()

# Initialize the client if not already defined

client = get_qdrant_client()

# 🔁 Load vector store from existing collection
qdrant_store = QdrantVectorStore(
    client=client,
    collection_name=COLLECTION_NAME,
    embedding=embedding_model,
    
)


# Perform search with embedding
results = qdrant_store.similarity_search_with_score(
    query=query,
    k=5,
)

# Print results
for doc, score in results:
    print(f"🧠 Score: {score:.4f}")
    print(doc.page_content)
    print("-" * 80)


[Qdrant Client] Initializing client with path: /home/prashant-agrawal/Netflix_Project/src/database/qdrant_store_local_db/collection


RuntimeError: Storage folder /home/prashant-agrawal/Netflix_Project/src/database/qdrant_store_local_db/collection is already accessed by another instance of Qdrant client. If you require concurrent access, use Qdrant server instead.