<a href="https://colab.research.google.com/github/kvijaysrivas/DataSciencce/blob/main/Hartex_Product.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# # STEP 1: Install necessary packages
!pip install pandas langchain faiss-cpu sentence-transformers --quiet
!pip install langchain_community --quiet

# STEP 2: Import Libraries
import pandas as pd
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

# STEP 3: Load CSV
df = pd.read_csv("product.csv")

# STEP 4: Clean and convert data types
df["Price"] = df["Price"].replace(",", "", regex=True)  # Remove commas in prices
df["Price"] = pd.to_numeric(df["Price"], errors='coerce')
df["Uom_Qty"] = pd.to_numeric(df["Uom_Qty"], errors='coerce')
df = df.dropna(subset=["SKU_ID", "Name", "Price", "Unit", "Uom_Qty"])
df = df[df["Uom_Qty"] > 0]

# STEP 5: Compute Effective Price and build text column
df["Effective_Price"] = df["Price"] / df["Uom_Qty"]
df["text"] = (
    "Product ID: " + df["ID"].astype(str) +
    "\nSKU ID: " + df["SKU_ID"].astype(str) +
    "\nName: " + df["Name"] +
    "\nPrice: ₹" + df["Price"].round(2).astype(str) +
    "\nUnit: " + df["Unit"] +
    "\nQuantity per Unit: " + df["Uom_Qty"].astype(str)
)

# STEP 6: Create embeddings for semantic search
documents = [Document(page_content=row["text"]) for _, row in df.iterrows()]
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(documents, embedding)
retriever = db.as_retriever()

# STEP 7: Chat loop
while True:
    query = input("\n🔎 Ask about a product (type 'exit' to stop): ")
    if query.lower() == 'exit':
        break

    query_lc = query.lower()

    # ---- Cheapest product logic ----
    if "cheapest" in query_lc:
        product_df = df.copy()
        if "tyre" in query_lc:
            product_df = product_df[product_df["Name"].str.lower().str.contains("tyre")]
        if not product_df.empty:
            cheapest = product_df.sort_values(by="Effective_Price").head(1)
            print("\n💰 Cheapest product:")
            print(cheapest["text"].values[0])
        else:
            print("❌ No matching product found.")
        continue

    # ---- SKU ID or Product ID logic ----
    sku_match = re.search(r"\bsku\s*id?\s*(\d+)", query_lc)
    prod_match = re.search(r"\bproduct\s*id?\s*(\d+)", query_lc)

    if sku_match:
        val = int(sku_match.group(1))
        match_df = df[df["SKU_ID"] == val]
        if not match_df.empty:
            print(f"\n📦 Found {len(match_df)} products with SKU ID {val}:")
            for _, row in match_df.iterrows():
                print("\n" + row["text"])
        else:
            print(f"❌ No product found with SKU ID: {val}")
        continue

    elif prod_match:
        val = int(prod_match.group(1))
        match_df = df[df["ID"] == val]
        if not match_df.empty:
            print(f"\n📦 Product found with Product ID {val}:")
            for _, row in match_df.iterrows():
                print("\n" + row["text"])
        else:
            print(f"❌ No product found with Product ID: {val}")
        continue

    # ---- Default: use semantic retriever ----
    try:
        docs = retriever.invoke(query)
        if docs:
            print("\n✅ Match found:\n", docs[0].page_content)
        else:
            print("❌ No relevant result found.")
    except Exception as e:
        print(f"⚠️ Retrieval error: {e}")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m61.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m54.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m45.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

FileNotFoundError: [Errno 2] No such file or directory: 'product.csv'