In [12]:
%%capture
%pip install langchain langchain-community qdrant-client openai langchain-core tqdm pandas -U langchain-qdrant

In [None]:
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
import pandas as pd
import re
import os

# 📁 Paths
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))
#print(f"Base Directory: {BASE_DIR}")

DATA_PATH = os.path.join(BASE_DIR, "Data", "Enriched_Indian_Startup_Dataset.csv")
#print(f"CSV Path: {DATA_PATH}")

QDRANT_LOCAL_PATH = os.path.join(BASE_DIR,"database", "qdrant_store_local_db")
#print(f"Qdrant Local Path: {QDRANT_LOCAL_PATH}")

COLLECTION_NAME = "indian_startups"

# 📊 Load and clean CSV
# Check if the file exists
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"The file at path '{DATA_PATH}' does not exist. Please verify the file path.")

df = pd.read_csv(DATA_PATH)
df.columns = [
    re.sub(r"[^a-zA-Z0-9_]", "", col.strip().lower().replace(" ", "_"))
    for col in df.columns
]
df = df.map(lambda x: x.strip() if isinstance(x, str) else x)

# 🧠 Prepare documents
columns_to_embed = df.columns.tolist()
docs = []
for _, row in df.iterrows():
    base_meta = row.to_dict()
    content = "\n".join(f"{col}: {row[col]}" for col in columns_to_embed if pd.notna(row[col]))
    docs.append(Document(page_content=content, metadata=base_meta))

# 🔐 Embedding model
embedding_model = OpenAIEmbeddings()



# 🔄 Load into Qdrant
qdrant_store = Qdrant.from_documents(
    documents=docs,
    embedding=embedding_model,
    path=QDRANT_LOCAL_PATH,  # ✅ Don't pass `client=`
    collection_name=COLLECTION_NAME,
    distance_func="Cosine"
)

print(f"✅ Stored {len(docs)} documents in Qdrant collection: '{COLLECTION_NAME}'")


Base Directory: /home/prashant-agrawal/Netflix_Project/src
CSV Path: /home/prashant-agrawal/Netflix_Project/src/Data/Enriched_Indian_Startup_Dataset.csv
Qdrant Local Path: /home/prashant-agrawal/Netflix_Project/src/database/qdrant_store_local_db
✅ Stored 500 documents in Qdrant collection: 'indian_startups'


In [5]:
## -- Query Qdrant Collection -- ##

from langchain_qdrant import QdrantVectorStore
from langchain_openai import OpenAIEmbeddings
from qdrant_client import QdrantClient
import os

# Define paths
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))
QDRANT_LOCAL_PATH = os.path.join(BASE_DIR,"database", "qdrant_store_local_db")
#print(f"Qdrant Local Path: {QDRANT_LOCAL_PATH}")

COLLECTION_NAME = "indian_startups"

query = "Top funded fintech startups in Bengaluru"

# Create embedding model
embedding_model = OpenAIEmbeddings()

# Use the existing client instance
if client is None:
    raise ValueError("The 'client' variable is not defined. Please ensure it is initialized.")

# 🔁 Load vector store from existing collection
qdrant_store = QdrantVectorStore(
    client=client,
    collection_name=COLLECTION_NAME,
    embeddings=embedding_model,
    path=QDRANT_LOCAL_PATH,
)


# Perform search with embedding
results = qdrant_store.similarity_search_with_score(
    query=query,
    k=5,
)

# Print results
for doc, score in results:
    print(f"🧠 Score: {score:.4f}")
    print(doc.page_content)
    print("-" * 80)


TypeError: QdrantVectorStore.__init__() got an unexpected keyword argument 'embeddings'

In [7]:
import pandas as pd
from qdrant_client import QdrantClient
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings

# Define paths
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))
QDRANT_LOCAL_PATH = os.path.join(BASE_DIR,"database", "qdrant_store_local_db")
#print(f"Qdrant Local Path: {QDRANT_LOCAL_PATH}")

COLLECTION_NAME = "indian_startups"

# Embeddings model
embedding_model = OpenAIEmbeddings()

# Sample query
query = "Tell me the names and funding of SaaS companies in Bengaluru with over ₹1000 Cr funding"

# Safe Qdrant setup
try:
    client.close()  # Close old session if exists
except:
    pass

# Initialize new client
client = QdrantClient(path=QDRANT_LOCAL_PATH)

# Load vector store
vectorstore = Qdrant(
    client=client,
    collection_name=COLLECTION_NAME,
    embeddings=embedding_model,
)

# Run semantic search
results = vectorstore.similarity_search_with_score(query, k=10)

# Print results
for i, (doc, score) in enumerate(results):
    print(f"\n🔹 Result #{i+1}")
    print(f"📄 Content:\n{doc.page_content}")
    print(f"📏 Score: {score:.4f}")

# Close client
client.close()
print("\n✅ Qdrant session closed.")



🔹 Result #1
📄 Content:
company_name: Freshworks
legal_entity_type: Pvt Ltd
state: Karnataka
headquarters_city: Bengaluru
year_founded: 2023
company_website: https://davisplc.in
logo_url: https://logo.clearbit.com/davisplc.in
company_description_short: Synchronized cohesive application
company_description_long: Size effect part long almost. Evening yet total economy southern family table set.
Adult such seem.
None artist morning hotel mouth. Stand kid case guy toward industry.
Camera specific far field skin mean. Save hard management. Official range key. Eight everything join.
industry_sector: AgriTech
total_funding_raised_inr: ₹299 Cr
number_of_funding_rounds: 2
latest_funding_round_type: Pre-seed
latest_funding_date: 2021-09-06
lead_investors: Morgan Ltd
revenue_estimate_annual: ₹276 Cr
valuation_estimate_if_available: ₹921 Cr
number_of_employees_current: 32
number_of_employees_estimate_range: 319-932
key_people: CEO: Jose Blanchard, CTO: Michael Kennedy
founders: Isaac Smith (https: