In [None]:
#importing libraries

import pandas as pd
from langchain_core.documents import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from transformers import pipeline
import warnings

warnings.filterwarnings("ignore")

In [19]:
#Loading dataset
def load_and_preprocess_data(url):
    df = pd.read_csv(url, sep=";").dropna().sample(200, random_state=42)
    documents = []

    summary_text = (
        f"Wine Quality Dataset Summary:\n"
        f"Samples: {len(df)}\n"
        f"Avg quality: {df['quality'].mean():.2f} (range {df['quality'].min()}-{df['quality'].max()})\n"
        f"Avg pH: {df['pH'].mean():.2f} (range {df['pH'].min():.2f}-{df['pH'].max():.2f})\n"
        f"High quality wines (7-8) often have alcohol > 11 and pH < 3.5."
    )
    documents.append(Document(page_content=summary_text, metadata={"type": "summary"}))

    for idx, row in df.iterrows():
        text = (
            f"Wine Sample #{idx+1}\n"
            f"Quality: {row['quality']}\n"
            f"Alcohol: {row['alcohol']}%\n"
            f"pH: {row['pH']}\n"
            f"Sulphates: {row['sulphates']}\n"
            f"Residual Sugar: {row['residual sugar']} g/L"
        )
        documents.append(Document(page_content=text, metadata={"index": idx}))
    return documents, df

In [20]:
#Setting up PIPELINE
def setup_rag_pipeline(documents):
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(documents, embeddings)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

    # Using flan-t5-base
    qa_pipe = pipeline(
        "text2text-generation",
        model="google/flan-t5-base",
        tokenizer="google/flan-t5-base",
        max_length=128,
        truncation=True
    )
    llm = HuggingFacePipeline(pipeline=qa_pipe)

    prompt_template = PromptTemplate(
        input_variables=["context", "question"],
        template="""
        You are a helpful wine assistant. Use the context to answer the user's question in a short, friendly way.

        Context:
        {context}

        Question: {question}

        Answer:
        """
    )

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=False,
        chain_type_kwargs={"prompt": prompt_template}
    )
    return qa_chain

In [21]:
#Chatbot
def run_chatbot(rag_chain):
    print("🍷 Welcome to the Smart Wine Chatbot!")
    print("Ask anything about wine quality (type 'exit' to quit)\n")

    while True:
        query = input("You: ").strip()
        if query.lower() in ["exit", "quit", "bye"]:
            print("👋 Goodbye! Hope you enjoyed the wine insights.")
            break
        try:
            print("\n🍷 Assistant:", result['result'].strip(), "\n")

        except Exception as e:
            print(f"⚠️ Error: {e}")

In [None]:
#Deployment

def main():
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    documents, df = load_and_preprocess_data(url)
    rag_chain = setup_rag_pipeline(documents)
    run_chatbot(rag_chain)

if __name__ == "__main__":
    main()

Device set to use cpu


🍷 Welcome to the Smart Wine Chatbot!
Ask anything about wine quality (type 'exit' to quit)



In [14]:
# improved_rag_wine_chatbot.py
"""
Improved Wine Chatbot using flan-t5-base with contextual and diverse answers.
Fixes repetitive response issues by adjusting retrieval and prompt design.
"""

import pandas as pd
from langchain_core.documents import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from transformers import pipeline
import warnings

warnings.filterwarnings("ignore")

def load_and_preprocess_data(url):
    df = pd.read_csv(url, sep=";").dropna().sample(200, random_state=42)
    documents = []

    # Create sample-based documents first
    for idx, row in df.iterrows():
        text = (
            f"Wine Sample #{idx+1}\n"
            f"Quality: {row['quality']}\n"
            f"Alcohol: {row['alcohol']}%\n"
            f"pH: {row['pH']}\n"
            f"Sulphates: {row['sulphates']}\n"
            f"Residual Sugar: {row['residual sugar']} g/L"
        )
        documents.append(Document(page_content=text, metadata={"index": idx}))

    # Add summary last so it doesn’t dominate vector retrieval
    summary_text = (
        f"Wine Quality Dataset Summary:\n"
        f"Samples: {len(df)}\n"
        f"Avg quality: {df['quality'].mean():.2f} (range {df['quality'].min()}-{df['quality'].max()})\n"
        f"Avg pH: {df['pH'].mean():.2f} (range {df['pH'].min():.2f}-{df['pH'].max():.2f})\n"
        f"High quality wines (7-8) often have alcohol > 11 and pH < 3.5."
    )
    documents.append(Document(page_content=summary_text, metadata={"type": "summary"}))

    return documents, df

def setup_rag_pipeline(documents):
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(documents, embeddings)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

    # Use flan-t5-base
    qa_pipe = pipeline(
        "text2text-generation",
        model="google/flan-t5-base",
        tokenizer="google/flan-t5-base",
        max_length=128,
        truncation=True
    )
    llm = HuggingFacePipeline(pipeline=qa_pipe)

    # Improved prompt
    prompt_template = PromptTemplate(
        input_variables=["context", "question"],
        template="""
You are an expert wine assistant helping users explore a dataset of red wine samples.

Use the context to answer the question clearly and helpfully, drawing from real wine sample data. Be concise but informative. Avoid repeating general statements.

Context:
{context}

Question: {question}

Answer:
"""
    )

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=False,
        chain_type_kwargs={"prompt": prompt_template}
    )
    return qa_chain

def run_chatbot(rag_chain):
    print("🍷 Welcome to the Smart Wine Chatbot!")
    print("Ask anything about wine quality (type 'exit' to quit)\n")

    while True:
        query = input("You: ").strip()
        if query.lower() in ["exit", "quit", "bye"]:
            print("👋 Goodbye! Hope you enjoyed the wine insights.")
            break
        try:
            result_dict = rag_chain.invoke({"query": query})
            answer = result_dict['result'].strip()
            print("\n🍷 Assistant:", answer, "\n")
        except Exception as e:
            print(f"⚠️ Error: {e}")

def main():
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    documents, df = load_and_preprocess_data(url)
    rag_chain = setup_rag_pipeline(documents)
    run_chatbot(rag_chain)

if __name__ == "__main__":
    main()

Device set to use cpu


🍷 Welcome to the Smart Wine Chatbot!
Ask anything about wine quality (type 'exit' to quit)

You: Describe a high-quality wine

🍷 Assistant: High quality wines (7-8) often have alcohol > 11 and pH  3.5. 

You: Tell me about wines with low pH

🍷 Assistant: High quality wines (7-8) often have alcohol > 11 and pH  3.5. 



KeyboardInterrupt: Interrupted by user

In [15]:
# fixed_improved_rag_wine_chatbot.py
"""
Improved RAG-based Chatbot for Wine Quality Dataset
This script implements a chatbot that answers diverse questions about the wine quality dataset
with concise, personalized responses using LangChain and flan-t5-base on CPU.

Assignment Tasks:
1. Load the wine quality dataset and preprocess it into LangChain Documents.
2. Set up a RAG pipeline with a prompt for distinct, engaging answers.
3. Build an interactive chatbot that saves responses to a file.

Citations:
- Wine Quality Dataset: UCI Machine Learning Repository (https://archive.ics.uci.edu/ml/datasets/Wine+Quality)
- LangChain Documentation: https://python.langchain.com/docs
- HuggingFace Transformers: https://huggingface.co/docs/transformers
- Used GitHub Copilot for code suggestions and debugging.
"""

# Import necessary libraries
import pandas as pd
from langchain_core.documents import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import os
import warnings
import re

# Suppress warnings
warnings.filterwarnings("ignore", category=UserWarning)

def load_and_preprocess_data(url):
    """
    Load the wine quality dataset and create LangChain Documents with detailed metadata.

    Args:
        url (str): URL to the CSV file.

    Returns:
        tuple: List of LangChain Document objects and the original DataFrame.
    """
    df = pd.read_csv(url, sep=";").dropna().sample(200, random_state=42)
    documents = []

    # Create sample-based documents with metadata for filtering
    for idx, row in df.iterrows():
        text = (
            f"Wine Sample #{idx+1}\n"
            f"Quality: {row['quality']}\n"
            f"Alcohol: {row['alcohol']}%\n"
            f"pH: {row['pH']} ({'low' if row['pH'] < 3.3 else 'moderate' if row['pH'] < 3.5 else 'high'} acidity)\n"
            f"Sulphates: {row['sulphates']} g/L\n"
            f"Residual Sugar: {row['residual sugar']} g/L\n"
            f"Description: This wine has a quality of {row['quality']} and {row['alcohol']}% alcohol."
        )
        metadata = {
            "index": idx,
            "type": "sample",
            "quality": row['quality'],
            "pH": row['pH'],
            "alcohol": row['alcohol']
        }
        documents.append(Document(page_content=text, metadata=metadata))

    # Add summary document
    summary_text = (
        f"Wine Quality Dataset Summary:\n"
        f"Samples: {len(df)}\n"
        f"Average quality: {df['quality'].mean():.2f} (range: {df['quality'].min()}-{df['quality'].max()})\n"
        f"Average pH: {df['pH'].mean():.2f} (range: {df['pH'].min():.2f}-{df['pH'].max():.2f})\n"
        f"pH Info: Low pH (<3.3) wines are crisp and acidic, often rated 5–6. High pH (>3.5) wines are softer.\n"
        f"Quality Info: High-quality wines (7-8) have alcohol >11% and pH 3.3-3.5.\n"
        f"Attributes: {', '.join(df.columns)}"
    )
    documents.append(Document(page_content=summary_text, metadata={"type": "summary"}))

    return documents, df

def setup_rag_pipeline(documents):
    """
    Set up a RAG pipeline with a prompt for concise, personalized answers on CPU.

    Args:
        documents (list): List of LangChain Document objects.

    Returns:
        RetrievalQA: Configured RAG chain for question answering.
    """
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(documents, embeddings)

    # Custom retriever with metadata filtering
    def custom_retriever(query, k=3):
        if "high-quality" in query.lower():
            results = vectorstore.similarity_search_with_score(query, k=k, filter={"type": "sample", "quality": {"$gte": 7}})
        elif "low pH" in query.lower():
            results = vectorstore.similarity_search_with_score(query, k=k, filter={"type": "sample", "pH": {"$lte": 3.3}})
        else:
            results = vectorstore.similarity_search_with_score(query, k=k)
        return [doc for doc, _ in results]

    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    retriever.search_kwargs["fetch_k"] = 10  # Fetch more candidates for filtering

    # Use flan-t5-base
    model_name = "google/flan-t5-base"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    qa_pipe = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=50,
        truncation=True,
        device=-1  # CPU
    )
    llm = HuggingFacePipeline(pipeline=qa_pipe)

    # Prompt for concise, personalized responses
    prompt_template = """
You are a friendly wine expert answering about a wine quality dataset.
Using the context, give a concise (1-2 sentences, max 25 words), accurate, engaging answer.
Add a personal touch (e.g., "Love..."). Be specific to the question.

Context:
{context}

Question: {question}

Answer:
"""
    prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=False,
        chain_type_kwargs={"prompt": prompt}
    )
    return qa_chain

def clean_response(text):
    """
    Clean the response to ensure coherence and brevity.

    Args:
        text (str): Raw response from the model.

    Returns:
        str: Cleaned response.
    """
    # Extract answer
    text = text.split("Answer:")[-1].strip() if "Answer:" in text else text
    text = re.sub(r'[^\w\s%.]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()

    # Truncate to 1-2 sentences, max 25 words
    sentences = text.split(". ")
    text = ". ".join(sentences[:2]).strip()
    if text and not text.endswith("."):
        text += "."
    words = text.split()
    if len(words) > 25:
        text = " ".join(words[:25]) + "."

    # Fallback for incoherent responses
    if not text or len(text.split()) < 3 or any(word.lower() in text.lower() for word in ["what", "where", "?"]):
        return "Can’t find that—try a specific wine question!"

    return text

def run_chatbot(rag_chain, sample_questions=None):
    """
    Run the chatbot interactively and save responses to a file.

    Args:
        rag_chain (RetrievalQA): Configured RAG pipeline.
        sample_questions (list, optional): List of predefined questions.

    Returns:
        list: List of question-response pairs.
    """
    responses = []

    # Answer sample questions
    if sample_questions:
        for query in sample_questions:
            try:
                result = rag_chain.invoke({"query": query})
                answer = clean_response(result["result"])
                responses.append({"question": query, "answer": answer})
            except Exception as e:
                responses.append({"question": query, "answer": f"Error: {e}"})

    # Interactive loop
    print("🍷 Welcome to the Smart Wine Chatbot!")
    print("Ask anything about wine quality (type 'exit' to quit)\n")

    while True:
        query = input("You: ").strip()
        if query.lower() in ["exit", "quit", "bye"]:
            print("👋 Goodbye! Hope you enjoyed the wine insights.")
            break
        try:
            result = rag_chain.invoke({"query": query})
            answer = clean_response(result["result"])
            responses.append({"question": query, "answer": answer})
            print("\n🍷 Assistant:", answer, "\n")
        except Exception as e:
            print(f"⚠️ Error: {e}")
            responses.append({"question": query, "answer": f"Error: {e}"})

    # Save responses
    with open("chatbot_responses.txt", "w") as f:
        for item in responses:
            f.write(f"Question: {item['question']}\n")
            f.write(f"Answer: {item['answer']}\n")
            f.write("-" * 50 + "\n")

    return responses

def main():
    """
    Main function to execute the RAG Chatbot pipeline.
    """
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    documents, df = load_and_preprocess_data(url)
    rag_chain = setup_rag_pipeline(documents)
    sample_questions = [
        "What is the average quality of the wines?",
        "Tell me about wines with low pH",
        "Describe a high-quality wine",
        "What does pH affect in wine?"
    ]
    run_chatbot(rag_chain, sample_questions)
    print("Responses saved to 'chatbot_responses.txt'")

if __name__ == "__main__":
    main()

Device set to use cpu


🍷 Welcome to the Smart Wine Chatbot!
Ask anything about wine quality (type 'exit' to quit)

You: Describe a high-quality wine

🍷 Assistant: Highquality wines 78 have alcohol 11% and pH 3.33.5. 

You: Tell me about wines with low pH

🍷 Assistant: Crisp and acidic. 

You: describe the wine with high ph

🍷 Assistant: Can’t find that—try a specific wine question! 

You: What is the average quality of the wines?

🍷 Assistant: Can’t find that—try a specific wine question! 

You: What does pH affect in wine?

🍷 Assistant: Can’t find that—try a specific wine question! 



KeyboardInterrupt: Interrupted by user

In [16]:
# robust_rag_wine_chatbot.py
"""
Robust RAG-based Chatbot for Wine Quality Dataset
This script implements a chatbot that answers diverse questions about the wine quality dataset
with concise, personalized responses using LangChain and flan-t5-base on CPU.

Assignment Tasks:
1. Load the wine quality dataset and preprocess it into LangChain Documents.
2. Set up a RAG pipeline with a prompt for distinct, engaging answers.
3. Build an interactive chatbot that saves responses to a file.

Citations:
- Wine Quality Dataset: UCI Machine Learning Repository (https://archive.ics.uci.edu/ml/datasets/Wine+Quality)
- LangChain Documentation: https://python.langchain.com/docs
- HuggingFace Transformers: https://huggingface.co/docs/transformers
- Used GitHub Copilot for code suggestions and debugging.
"""

# Import necessary libraries
import pandas as pd
from langchain_core.documents import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import os
import warnings
import re

# Suppress warnings
warnings.filterwarnings("ignore", category=UserWarning)

def load_and_preprocess_data(url):
    """
    Load the wine quality dataset and create LangChain Documents with detailed metadata.

    Args:
        url (str): URL to the CSV file.

    Returns:
        tuple: List of LangChain Document objects and the original DataFrame.
    """
    df = pd.read_csv(url, sep=";").dropna().sample(200, random_state=42)
    documents = []

    # Create sample-based documents
    for idx, row in df.iterrows():
        pH_category = "low" if row['pH'] < 3.3 else "moderate" if row['pH'] < 3.5 else "high"
        quality_category = "high" if row['quality'] >= 7 else "moderate" if row['quality'] >= 5 else "low"
        text = (
            f"Wine Sample #{idx+1}\n"
            f"Quality: {row['quality']} ({quality_category})\n"
            f"Alcohol: {row['alcohol']}%\n"
            f"pH: {row['pH']} ({pH_category} acidity)\n"
            f"Sulphates: {row['sulphates']} g/L\n"
            f"Residual Sugar: {row['residual sugar']} g/L\n"
            f"Description: A {quality_category}-quality wine with {row['alcohol']}% alcohol and {pH_category} acidity (pH {row['pH']})."
        )
        metadata = {
            "index": idx,
            "type": "sample",
            "quality": row['quality'],
            "pH": row['pH'],
            "pH_category": pH_category,
            "quality_category": quality_category
        }
        documents.append(Document(page_content=text, metadata=metadata))

    # Add summary document
    summary_text = (
        f"Wine Quality Dataset Summary:\n"
        f"Samples: {len(df)}\n"
        f"Average quality: {df['quality'].mean():.2f} (range: {df['quality'].min()}-{df['quality'].max()})\n"
        f"Average pH: {df['pH'].mean():.2f} (range: {df['pH'].min():.2f}-{df['pH'].max():.2f})\n"
        f"pH Info: Low pH (<3.3) wines are crisp, acidic, often rated 5–6. High pH (>3.5) wines are softer, smoother.\n"
        f"Quality Info: High-quality wines (7-8) have alcohol >11% and pH 3.3-3.5.\n"
        f"pH Impact: pH affects taste and stability; low pH adds crispness, high pH softness.\n"
        f"Attributes: {', '.join(df.columns)}"
    )
    documents.append(Document(page_content=summary_text, metadata={"type": "summary"}))

    return documents, df

def setup_rag_pipeline(documents):
    """
    Set up a RAG pipeline with a prompt for concise, personalized answers on CPU.

    Args:
        documents (list): List of LangChain Document objects.

    Returns:
        RetrievalQA: Configured RAG chain for question answering.
    """
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(documents, embeddings)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 4, "fetch_k": 10})

    # Use flan-t5-base
    model_name = "google/flan-t5-base"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    qa_pipe = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=60,
        truncation=True,
        device=-1  # CPU
    )
    llm = HuggingFacePipeline(pipeline=qa_pipe)

    # Prompt for concise, personalized responses
    prompt_template = """
You are a friendly wine expert answering about a wine quality dataset.
Using the context, give a concise (1 sentence, max 25 words), accurate, engaging answer specific to the question.
Add a personal touch (e.g., "Love...").

Context:
{context}

Question: {question}

Answer:
"""
    prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=False,
        chain_type_kwargs={"prompt": prompt}
    )
    return qa_chain

def clean_response(text):
    """
    Clean the response to ensure coherence and brevity.

    Args:
        text (str): Raw response from the model.

    Returns:
        str: Cleaned response.
    """
    # Extract answer
    text = text.split("Answer:")[-1].strip() if "Answer:" in text else text
    text = re.sub(r'[^\w\s%.]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()

    # Truncate to 1 sentence, max 25 words
    sentences = text.split(". ")
    text = sentences[0].strip() + "." if sentences else text
    words = text.split()
    if len(words) > 25:
        text = " ".join(words[:25]) + "."

    # Fallback for incoherent or empty responses
    if not text or len(text.split()) < 3 or any(word.lower() in text.lower() for word in ["what", "where", "?"]):
        return "Can’t find that—try a specific wine question!"

    return text

def run_chatbot(rag_chain, sample_questions=None):
    """
    Run the chatbot interactively and save responses to a file.

    Args:
        rag_chain (RetrievalQA): Configured RAG pipeline.
        sample_questions (list, optional): List of predefined questions.

    Returns:
        list: List of question-response pairs.
    """
    responses = []

    # Answer sample questions
    if sample_questions:
        for query in sample_questions:
            try:
                result = rag_chain.invoke({"query": query})
                answer = clean_response(result["result"])
                responses.append({"question": query, "answer": answer})
            except Exception as e:
                responses.append({"question": query, "answer": f"Error: {e}"})

    # Interactive loop
    print("🍷 Welcome to the Smart Wine Chatbot!")
    print("Ask anything about wine quality (type 'exit' to quit)\n")

    while True:
        query = input("You: ").strip()
        if query.lower() in ["exit", "quit", "bye"]:
            print("👋 Goodbye! Hope you enjoyed the wine insights.")
            break
        try:
            result = rag_chain.invoke({"query": query})
            answer = clean_response(result["result"])
            responses.append({"question": query, "answer": answer})
            print("\n🍷 Assistant:", answer, "\n")
        except Exception as e:
            print(f"⚠️ Error: {e}")
            responses.append({"question": query, "answer": f"Error: {e}"})

    # Save responses
    with open("chatbot_responses.txt", "w") as f:
        for item in responses:
            f.write(f"Question: {item['question']}\n")
            f.write(f"Answer: {item['answer']}\n")
            f.write("-" * 50 + "\n")

    return responses

def main():
    """
    Main function to execute the RAG Chatbot pipeline.
    """
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    documents, df = load_and_preprocess_data(url)
    rag_chain = setup_rag_pipeline(documents)
    sample_questions = [
        "What is the average quality of the wines?",
        "Tell me about wines with low pH",
        "Describe a high-quality wine",
        "Describe the wine with high pH",
        "What does pH affect in wine?"
    ]
    run_chatbot(rag_chain, sample_questions)
    print("Responses saved to 'chatbot_responses.txt'")

if __name__ == "__main__":
    main()

Device set to use cpu


🍷 Welcome to the Smart Wine Chatbot!
Ask anything about wine quality (type 'exit' to quit)

You: What does pH affect in wine?

🍷 Assistant: taste and stability. 

You: Describe a high-quality wine

🍷 Assistant: 7.0 high Alcohol 10.6% pH 3.17 low acidity Sulphates 0.66 gL Residual Sugar 2.4 gL Description A highquality wine with 10.5% alcohol and low acidity. 

You: Tell me about wines with low pH

🍷 Assistant: Crisp acidic often rated 56. 



KeyboardInterrupt: Interrupted by user