In [1]:
pip install farm-haystack sentence-transformers transformers faiss-cpu gradio


Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install SQLAlchemy==1.4.49


Note: you may need to restart the kernel to use updated packages.


In [None]:
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import EmbeddingRetriever, FARMReader
from haystack.pipelines import ExtractiveQAPipeline
from haystack.schema import Document
import pandas as pd
import os
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

# Step 1: Load the dataset
try:
    df = pd.read_csv('nutrients .csv')  # Make sure the file is in your working directory
except FileNotFoundError:
    print("Error: Could not find 'nutrients.csv'. Please ensure the file exists in your working directory.")
    exit()

# Step 2: Create Documents
docs = []
for idx, row in df.iterrows():
    # Create a more informative document content
    content = f"Nutrient: {row['Nutrient']}\nPercentage: {row['%']}\nCategory: {row.get('Category', 'N/A')}"
    metadata = {"nutrient": row['Nutrient'], "percentage": row['%']}
    docs.append(Document(content=content, meta=metadata))

# Step 3: Initialize the FAISS Document Store with proper configuration
# First clean up any existing files
faiss_files = ['faiss_document_store.db', 'faiss_index.faiss', 'faiss_index.json']
for file in faiss_files:
    if os.path.exists(file):
        os.remove(file)

document_store = FAISSDocumentStore(
    sql_url="sqlite:///faiss_document_store.db",
    faiss_index_factory_str="Flat",
    embedding_dim=384,  # This matches the all-MiniLM-L6-v2 model
)

# Step 4: Store documents
document_store.write_documents(docs)

# Step 5: Initialize Retriever with a free embedding model
retriever = EmbeddingRetriever(
    document_store=document_store,
    embedding_model="sentence-transformers/all-MiniLM-L6-v2",
    model_format="sentence_transformers",
    use_gpu=False
)

# Step 6: Update embeddings in document store
document_store.update_embeddings(retriever)

# Step 7: Initialize Reader (QA model)
reader = FARMReader(
    model_name_or_path="deepset/roberta-base-squad2",
    use_gpu=False
)

# Step 8: Create pipeline
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever)

# Chatbot interaction loop
print("Nutrient Information Chatbot")
print("Type 'exit' to quit the chatbot\n")

while True:
    query = input("Please enter your question about nutrients: ")
    
    if query.lower() == 'exit':
        break
    
    # Get prediction
    prediction = pipeline.run(
        query=query,
        params={
            "Retriever": {"top_k": 5},
            "Reader": {"top_k": 3}
        }
    )
    
    # Display answers
    if len(prediction['answers']) > 0:
        print("\nHere's what I found:")
        for idx, answer in enumerate(prediction['answers'], 1):
            if answer.answer:
                print(f"{idx}. {answer.answer} (confidence: {answer.score:.2f})")
                print(f"   Source: {answer.context}\n")
    else:
        print("I couldn't find any relevant information about that nutrient.")
    
    print("---\n")

# Clean up (optional - comment out if you want to persist the index)
document_store.delete_index()
for file in faiss_files:
    if os.path.exists(file):
        os.remove(file)

Writing Documents: 10000it [00:00, 155748.38it/s]                               


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Updating Embedding:   0%|                              | 0/8 [00:00<?, ? docs/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Documents Processed: 10000 docs [00:00, 111364.93 docs/s]                       


Nutrient Information Chatbot
Type 'exit' to quit the chatbot



Please enter your question about nutrients:  what is the percentage of calcium


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Inferencing Samples: 100%|██████████████████| 1/1 [00:01<00:00,  1.94s/ Batches]



Here's what I found:
1. 3.0 (confidence: 0.59)
   Source: Nutrient: Ca
Percentage: 3.0
Category: N/A

2. 2.5 (confidence: 0.02)
   Source: Nutrient: P
Percentage: 2.5
Category: N/A

3. 18.0 (confidence: 0.01)
   Source: Nutrient: CF
Percentage: 18.0
Category: N/A

---



Please enter your question about nutrients:  p


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Inferencing Samples: 100%|██████████████████| 1/1 [00:01<00:00,  1.73s/ Batches]



Here's what I found:
1. 2.5 (confidence: 0.01)
   Source: Nutrient: P
Percentage: 2.5
Category: N/A

2. 6.0 (confidence: 0.00)
   Source: Nutrient: Fat
Percentage: 6.0
Category: N/A

3. 32.0 (confidence: 0.00)
   Source: Nutrient: NFE
Percentage: 32.0
Category: N/A

---



In [None]:
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import EmbeddingRetriever, FARMReader
from haystack.pipelines import ExtractiveQAPipeline
from haystack.schema import Document
import pandas as pd
import os
import warnings
from tqdm import tqdm

# Suppress warnings and disable progress bars in notebook
warnings.filterwarnings('ignore')
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Step 1: Load the dataset
try:
    df = pd.read_csv('nutrients .csv')
    print(f"Loaded dataset with {len(df)} records")
except FileNotFoundError:
    print("Error: Could not find 'nutrients.csv'. Please ensure the file exists.")
    exit()

# Step 2: Create Documents
print("\nCreating documents...")
docs = []
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing"):
    content = f"Nutrient: {row['Nutrient']}\nPercentage: {row['%']}"
    metadata = {"nutrient": row['Nutrient'], "percentage": row['%']}
    docs.append(Document(content=content, meta=metadata))

# Step 3: Initialize FAISS Document Store
print("\nInitializing document store...")
faiss_files = ['faiss_document_store.db', 'faiss_index.faiss', 'faiss_index.json']
for file in faiss_files:
    if os.path.exists(file):
        os.remove(file)

document_store = FAISSDocumentStore(
    sql_url="sqlite:///faiss_document_store.db",
    faiss_index_factory_str="Flat",
    embedding_dim=384,
    progress_bar=False  # Disable internal progress bars
)

# Step 4: Store documents
print("\nWriting documents to store...")
document_store.write_documents(docs, batch_size=100)

# Step 5: Initialize Retriever
print("\nInitializing retriever...")
retriever = EmbeddingRetriever(
    document_store=document_store,
    embedding_model="sentence-transformers/all-MiniLM-L6-v2",
    model_format="sentence_transformers",
    use_gpu=False,
    progress_bar=False  # Disable progress bars
)

# Step 6: Update embeddings
print("\nGenerating embeddings...")
document_store.update_embeddings(
    retriever,
    batch_size=32,
    update_existing_embeddings=True
)

# Step 7: Initialize Reader
print("\nLoading QA model...")
reader = FARMReader(
    model_name_or_path="deepset/roberta-base-squad2",
    use_gpu=False,
    num_processes=1
)

# Step 8: Create pipeline
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever)

# Chatbot interaction
print("\nNutrient Information Chatbot (type 'exit' to quit)")
while True:
    query = input("\nYour question: ")
    if query.lower() == 'exit':
        break
    
    try:
        prediction = pipeline.run(
            query=query,
            params={
                "Retriever": {"top_k": 3},
                "Reader": {"top_k": 1}
            }
        )
        
        if prediction['answers']:
            answer = prediction['answers'][0]
            print(f"\nAnswer: {answer.answer}")
            print(f"Confidence: {answer.score:.2f}")
            print(f"Source: {answer.context[:100]}...")
        else:
            print("No relevant information found.")
    
    except Exception as e:
        print(f"Error processing query: {str(e)}")

# Cleanup
document_store.delete_index()
print("\nSession ended. Resources cleaned up.")