## 1. Web Scraping (example with “Books to Scrape”)

In [None]:
import requests
from bs4 import BeautifulSoup
import json
import time

def scrape_books(base_url="https://books.toscrape.com/"):
    """
    Extracts book data (title, price, category) from the BooksToScrape site (just 1 or 2 pages for demo).
    Returns a list of dictionaries with the scraped information.
    """
    all_books = []

    # We'll scrape the first page as an example
    page_url = base_url + "catalogue/page-1.html"
    response = requests.get(page_url)
    if response.status_code != 200:
        print(f"Error accessing {page_url}")
        return all_books

    soup = BeautifulSoup(response.text, "html.parser")
    # Find all book containers
    books = soup.select("article.product_pod")

    for book in books:
        title_element = book.select_one("h3 a")
        price_element = book.select_one("p.price_color")
        # Link to the individual book page
        book_link = title_element.get("href")

        title = title_element.get("title", "No Title")
        price = price_element.text if price_element else "No Price"

        # For category, we'll grab data from the individual book page (optional, simplified demo)
        # We'll make a second request:
        full_link = base_url + "catalogue/" + book_link
        book_resp = requests.get(full_link)
        if book_resp.status_code == 200:
            book_soup = BeautifulSoup(book_resp.text, "html.parser")
            # Normally, we'd extract more info, e.g., the category from the navigation bar
            category_element = book_soup.select("ul.breadcrumb li a")
            if category_element and len(category_element) >= 3:
                category = category_element[2].text.strip()
            else:
                category = "No Category"

            product_description_element = book_soup.select_one("#product_description ~ p")
            if product_description_element:
                description = product_description_element.text.strip()
            else:
                description = "Description not available."

        else:
            category = "No Category"
            description = "Description not available."

        all_books.append({
            "title": title,
            "price": price,
            "category": category,
            "description": description
        })

        # Brief pause to avoid sending too many requests in a short time
        time.sleep(1)

    return all_books

def save_data(data, filename="books_data.json"):
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

if __name__ == "__main__":
    data = scrape_books()
    print(f"Scraped {len(data)} books.")
    save_data(data)


## 2. Loading the Data and Creating the Virtual Assistant

In [None]:
import requests
import json

def load_products(filepath="books_data.json"):
    """
    Loads the scraped data from a local JSON file.
    """
    with open(filepath, "r", encoding="utf-8") as f:
        products = json.load(f)
    return products

def search_products(query, products):
    """
    A simple search in 'title', 'category', or 'description'.
    """
    query_lower = query.lower()
    results = []
    for p in products:
        content = (
            p['title'].lower() + " " +
            p['category'].lower() + " " +
            p['description'].lower()
        )
        # This line checks if any word in the query is in the content
        if any(word in content for word in query_lower.split()):
            results.append(p)
    return results

def query_ollama(context, question):
    """
    Sends the question + context to Ollama to generate a natural-language answer.
    """
    prompt = f"""
You are a virtual customer service assistant. Answer in English using ONLY the following product information:
{context}

Question: {question}

If you do not have enough information in the context, say: "I'm sorry, I don't have that information."
"""

    try:
        resp = requests.post(
            "http://localhost:11411/generate",  # Adjust if Ollama is running on a different port or address
            json={"prompt": prompt}
        )
        if resp.status_code == 200:
            data = resp.json()
            return data.get("response", "")
        else:
            return f"Error {resp.status_code}: Could not contact Ollama server."
    except Exception as e:
        return f"Error connecting to Ollama: {str(e)}"

def main():
    print("=== Virtual Assistant (BooksToScrape) ===")
    # 1. Load data
    products = load_products()

    # 2. Chat loop
    while True:
        user_question = input("\nUser: ").strip()
        if not user_question:
            continue
        if user_question.lower() in ["quit", "exit", "bye"]:
            print("Goodbye.")
            break

        # 2a. Search relevant products
        matches = search_products(user_question, products)

        # 2b. Build a 'context' from the first 3 results
        if matches:
            context_text = ""
            for c in matches[:3]:
                context_text += (
                    f"- Title: {c['title']}\n"
                    f"  Category: {c['category']}\n"
                    f"  Price: {c['price']}\n"
                    f"  Description: {c['description'][:200]}...\n\n"
                )
        else:
            context_text = "No relevant products found."

        # 2c. Query Ollama
        answer = query_ollama(context_text, user_question)

        # 2d. Display the answer
        print(f"Assistant: {answer}")

if __name__ == "__main__":
    main()

