In [1]:
# Cell 1: Install Minimal Dependencies
!pip install transformers torch flask pyngrok requests

print("✅ Dependencies installed")


✅ Dependencies installed


In [2]:
# Cell 2: Import Libraries
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from flask import Flask, request, jsonify
from pyngrok import ngrok
import threading
import time
import requests

print("✅ Libraries imported")


✅ Libraries imported


In [3]:
# Cell 3: Load Your YAML Prompts (Embedded directly)
prompts_config = {
    "version": "1.0",
    "created": "2025-09-23",
    "author": "Oriane Barakat Rahmeh",

    "base_retrieval_prompt": {
        "description": "Standard prompt for straightforward questions with clear context",
        "role": "You are a precise Shoplite customer service assistant with access to official documentation.",
        "goal": "Provide accurate, concise answers using only the provided context from Shoplite documentation.",
        "context_guidelines": [
            "Use ONLY information from the provided document snippets",
            "If the context doesn't contain relevant information, politely decline to answer",
            "Cite specific document titles when referencing information",
            "Be factual and avoid speculation",
            "Maintain a helpful and professional tone"
        ],
        "response_format": "Answer: [Your response based strictly on the provided context]\nSources: [List the document titles referenced]\nConfidence: [High/Medium/Low based on context relevance]"
    },

    "complex_question_prompt": {
        "description": "For multi-faceted questions requiring synthesis across documents",
        "role": "You are an expert Shoplite support specialist trained in multi-document analysis and synthesis.",
        "goal": "Solve complex customer questions by reasoning step-by-step and synthesizing information from multiple document sources.",
        "context_guidelines": [
            "First, break down the question into sub-questions",
            "For each sub-question, identify relevant document sections",
            "Compare information across documents for consistency",
            "Synthesize a comprehensive answer addressing all aspects",
            "Acknowledge when information comes from multiple sources",
            "If documents conflict, note the discrepancy and provide the most common policy"
        ],
        "reasoning_format": "Thinking Process:\n1. Question Analysis: [Break down the complex question into components]\n2. Document Mapping: [Which documents address each part of the question]\n3. Information Synthesis: [How the pieces fit together logically]\n4. Consistency Check: [Verify no contradictions between sources]\n\nFinal Answer: [Comprehensive synthesized response]\nSources Used: [List all document titles referenced]\nSynthesis Notes: [Any important caveats or integration points]"
    },

    "no_context_prompt": {
        "description": "Handles queries where no relevant context is available",
        "role": "You are a cautious Shoplite assistant who avoids providing unverified information.",
        "goal": "Politely decline to answer when the provided context doesn't contain relevant information, while maintaining helpfulness.",
        "context_guidelines": [
            "Do not make up information or use external knowledge",
            "Clearly state that the information isn't available in Shoplite documentation",
            "Suggest alternative ways the user might find the information",
            "Maintain helpful tone while being firm about limitations",
            "Offer to help with related topics that are covered in the knowledge base"
        ],
        "response_format": "I'm unable to answer this question based on the available Shoplite documentation.\n\nSuggestion: [Alternative action like contacting support or checking specific resources]\nAvailable Topics: [List 2-3 related topics that are covered in the knowledge base]\n\nPlease rephrase your question or contact Shoplite support for further assistance."
    },

    "clarification_prompt": {
        "description": "Requests clarification for ambiguous or incomplete questions",
        "role": "You are a detail-oriented Shoplite assistant who asks clarifying questions to provide accurate information.",
        "goal": "Identify ambiguous or incomplete questions and request specific clarification to provide better answers.",
        "context_guidelines": [
            "Analyze the question for ambiguity, missing details, or multiple interpretations",
            "Ask specific, helpful clarifying questions",
            "Provide examples of what information would help",
            "Suggest related topics from the knowledge base to guide the user",
            "Maintain patience and professionalism throughout"
        ],
        "response_format": "I want to provide the most accurate information for your question about [topic].\n\nTo help you better, could you clarify: [Specific clarifying question]?\n\nFor example: [Example of the type of detail needed]\n\nRelated topics that might help: [List 2-3 related topics from available documentation]"
    },

    "chain_of_thought_prompt": {
        "description": "Demonstrates step-by-step reasoning for complex queries",
        "role": "You are a methodical Shoplite analyst who reasons through problems step by step using native thinking.",
        "goal": "Demonstrate clear reasoning processes for complex questions, showing your thought process before providing final answers.",
        "context_guidelines": [
            "Think through the problem step by step in natural language",
            "Explicitly state your reasoning at each step",
            "Reference specific context snippets when making conclusions",
            "Check for logical consistency throughout your reasoning",
            "Acknowledge uncertainties or limitations in the available information"
        ],
        "reasoning_chain": "Let me think through this step by step:\n\nStep 1: Understanding the question - [Restate and analyze the question]\nStep 2: Identifying relevant information - [Which parts of the context apply]\nStep 3: Connecting the dots - [How the information answers the question]\nStep 4: Verification - [Checking for completeness and accuracy]\n\nBased on this reasoning:\nAnswer: [Final comprehensive answer]\nSources: [Referenced documents with specific relevance]"
    },

    "self_consistency_prompt": {
        "description": "Validates answers through multiple reasoning paths",
        "role": "You are a thorough Shoplite validator who checks answers for consistency across multiple reasoning paths.",
        "goal": "Generate multiple reasoning paths for complex questions and verify they lead to consistent conclusions.",
        "context_guidelines": [
            "Generate at least two different reasoning approaches",
            "Compare conclusions from different reasoning paths",
            "Resolve any inconsistencies between approaches",
            "Provide the most consistent and well-supported answer",
            "Note if different interpretations are possible"
        ],
        "consistency_check": "Reasoning Path 1: [First approach to solving the problem]\nConclusion 1: [Answer from first approach]\n\nReasoning Path 2: [Second approach to solving the problem]\nConclusion 2: [Answer from second approach]\n\nConsistency Analysis: [Compare conclusions and resolve differences]\n\nFinal Verified Answer: [Most consistent and well-supported conclusion]\nConfidence Level: [Based on consistency across approaches]"
    },

    "evaluation_ready_prompt": {
        "description": "Optimized for systematic testing and validation",
        "role": "You are a test-ready Shoplite assistant optimized for systematic evaluation.",
        "goal": "Provide responses that are easily evaluatable against ground truth criteria with clear structure.",
        "context_guidelines": [
            "Structure responses to match evaluation checklist formats",
            "Explicitly include required keywords when present in context",
            "Avoid forbidden terms and content",
            "Provide clear source attribution for each piece of information",
            "Maintain consistent response formatting for automated testing"
        ],
        "response_format": "Answer: [Response containing required keywords and avoiding forbidden terms]\n\nSource Documents: [List of referenced documents with relevance explanation]\n\nKeyword Check: [Explicit mention of required keywords from context]\n\nConfidence: [High/Medium/Low based on context match]\n\nEvaluation Ready: Yes"
    }
}

print("✅ Loaded", len(prompts_config) - 3, "prompt configurations")  # Subtract version, created, author
print("Available prompt types:")
for key in prompts_config.keys():
    if key not in ['version', 'created', 'author']:
        print(f"  - {key}: {prompts_config[key]['description']}")


✅ Loaded 7 prompt configurations
Available prompt types:
  - base_retrieval_prompt: Standard prompt for straightforward questions with clear context
  - complex_question_prompt: For multi-faceted questions requiring synthesis across documents
  - no_context_prompt: Handles queries where no relevant context is available
  - clarification_prompt: Requests clarification for ambiguous or incomplete questions
  - chain_of_thought_prompt: Demonstrates step-by-step reasoning for complex queries
  - self_consistency_prompt: Validates answers through multiple reasoning paths
  - evaluation_ready_prompt: Optimized for systematic testing and validation


In [4]:
# Cell 4: Complete Knowledge Base (Embedded directly from your documents)
knowledge_base = [
    {
        "id": "doc1",
        "title": "Shoplite User Registration Process",
        "content": "Creating a Shoplite account is designed to be a seamless experience that balances security with user convenience. The registration process begins when users visit the Shoplite homepage or mobile app and click the 'Register' button. Prospective users must provide a valid email address, create a secure password meeting our complexity requirements, and enter basic personal information including first name, last name, and date of birth for age verification purposes. Email verification constitutes a critical security step in our registration workflow. Within minutes of submitting registration details, users receive a verification email containing a unique six-digit code and a verification link. This code must be entered on the verification page or the link clicked to activate the account. Accounts remain in pending status for up to 24 hours awaiting verification, after which unverified registrations are automatically purged from our systems for security and data protection reasons. Users face a fundamental account type decision during registration: buyer accounts versus seller accounts. Standard buyer accounts provide immediate access to Shoplite's comprehensive shopping features upon email verification. These free accounts enable product browsing, purchasing, wishlist creation, and review writing. Seller accounts, conversely, require additional business verification steps including tax identification number validation, business registration document submission, and in some cases, video verification for high-risk categories. The seller verification process typically requires 2-3 business days for completion, during which our dedicated verification team examines submitted documents for authenticity and compliance with marketplace standards."
    },
    {
        "id": "doc2",
        "title": "Product Search and Filtering Features",
        "content": "Shoplite's sophisticated search infrastructure combines multiple technologies to deliver highly relevant product discoveries across our extensive marketplace. The primary search interface accepts natural language queries, understanding conversational phrases like 'comfortable running shoes under $100' and translating them into precise product matches. Our search algorithm incorporates multiple ranking signals including product relevance, seller reputation, customer ratings, inventory availability, and promotional status to surface optimal results. Advanced filtering capabilities empower users to narrow search results across numerous dimensions. Price range filters allow setting minimum and maximum thresholds with real-time result updates. Brand filtering supports multi-select functionality, enabling users to include or exclude specific manufacturers. Seller rating filters restrict results to merchants meeting minimum feedback scores, ensuring quality shopping experiences. Product condition filters differentiate between new, refurbished, pre-owned, and open-box items with clear condition descriptions for each category. Specialized filtering options cater to specific product categories and user preferences. Electronics searches can filter by technical specifications like screen size, storage capacity, or processor type. Fashion searches support filtering by size, color, material composition, and clothing style. Home goods searches include filters for dimensions, assembly requirements, and eco-certifications."
    },
    {
        "id": "doc3",
        "title": "Shopping Cart and Checkout Process",
        "content": "The Shoplite shopping cart serves as the central hub where users compile potential purchases from multiple sellers within a unified interface. This multi-seller cart technology enables seamless cross-merchant shopping experiences, allowing customers to add products from different vendors while maintaining separate order tracking and shipping arrangements. The cart interface provides comprehensive item details including product images, variant selections, unit pricing, seller information, and estimated delivery timeframes. Cart management features empower users with flexible control over their prospective purchases. Quantity adjustment tools allow modifying item counts with real-time subtotal updates. The 'Save for Later' functionality moves items to a separate wishlist section while preserving selection details for future consideration. The checkout process transitions users from cart compilation to order completion through a streamlined three-step workflow. Step one collects essential shipping information, including address selection from saved profiles or new entry, delivery method preferences balancing speed against cost, and gift wrapping options with personalized message capabilities. Payment method selection constitutes step two of the checkout journey. Users choose from saved payment instruments or add new options, with security indicators reinforcing our PCI DSS compliance. Order review forms the final step, presenting comprehensive purchase summaries including item details, cost breakdowns, delivery estimates, and applied promotions."
    },
    {
        "id": "doc4",
        "title": "Payment Methods and Security",
        "content": "Shoplite supports an extensive array of payment options designed to accommodate diverse customer preferences and regional financial ecosystems. Traditional payment instruments include major credit cards (Visa, MasterCard, American Express, Discover), debit cards with credit network logos, and prepaid gift cards. Digital wallet integrations encompass PayPal, Apple Pay, Google Pay, and Samsung Pay, providing streamlined checkout experiences through tokenized payment data. Security constitutes our foundational priority in payment processing. All transactions benefit from PCI DSS Level 1 compliance, the highest certification in payment security standards. Tokenization technology replaces sensitive payment data with unique identification symbols during transmission and storage, ensuring actual card numbers never reside on Shoplite systems. 3D Secure authentication adds an additional verification layer for card-not-present transactions, requiring password entry or biometric confirmation through issuer banking platforms. Encryption protocols protect data throughout the payment lifecycle. Transport Layer Security (TLS) 1.3 encryption secures data in transit between user devices and our payment processors. At rest, payment information receives AES-256 encryption, the same standard used by government agencies for classified information."
    },
    {
        "id": "doc5",
        "title": "Order Tracking and Delivery",
        "content": "Order tracking within the Shoplite ecosystem provides comprehensive visibility into purchase journeys from warehouse selection to final delivery. The tracking system initiates upon order confirmation, when automated emails dispatch containing order summaries, estimated delivery timelines, and preliminary tracking information. Within 24 hours of order processing, active tracking numbers become available through the order management interface, connecting to carrier systems for real-time status updates. The order tracking dashboard presents consolidated information through intuitive visual timelines. Key milestones include order confirmation, payment processing, inventory sourcing, packaging completion, carrier handoff, transit initiation, regional facility processing, out-for-delivery status, and successful delivery confirmation. Delivery options span multiple service levels balancing speed against cost considerations. Standard delivery typically completes within 3-7 business days, utilizing ground transportation networks. Express delivery guarantees 1-2 business day delivery through premium carrier services. Same-day delivery options exist in eligible metropolitan areas for orders placed before cutoff times. International delivery timelines extend to 7-14 business days accounting for customs processing, with detailed import duty estimates provided during checkout."
    },
    {
        "id": "doc6",
        "title": "Return and Refund Policies",
        "content": "Shoplite's comprehensive return and refund framework balances customer satisfaction with marketplace sustainability. Our standard return policy grants customers a 30-day window from the delivery date to initiate returns for most product categories. This timeframe allows adequate product evaluation while ensuring sellers receive timely inventory returns. To qualify for return, items must maintain original condition with all manufacturer packaging, tags, accessories, and documentation intact. Products showing signs of use, damage, or missing components may receive partial refunds or be ineligible for return based on seller assessment. Category-specific return restrictions protect sellers from abuse while maintaining fair customer treatment. Personalized items including monogrammed products, custom configurations, and made-to-order merchandise typically qualify as final sales. Digital products such as software licenses, e-books, and digital downloads become ineligible once accessed or downloaded. Perishable goods including food, flowers, and certain health supplements cannot be returned for safety reasons. The return initiation process begins through the order history section of user accounts. Customers select the specific items for return, specify reasons from standardized categories, and provide detailed descriptions of any product issues. Refund processing follows strict timelines once returned items pass inspection. Standard refunds typically process within 5-7 business days, though banking institution variations may extend actual fund availability."
    },
    {
        "id": "doc7",
        "title": "Product Reviews and Ratings System",
        "content": "Shoplite's review ecosystem cultivates authentic customer feedback while maintaining marketplace integrity. The dual-component rating system combines quantitative star ratings with qualitative written reviews, providing comprehensive product assessments. Star ratings employ a 1-5 scale with clear definitions: 1 star indicates severe dissatisfaction, 2 stars reflects significant issues, 3 stars represents adequate performance, 4 stars denotes above-average quality, and 5 stars signifies exceptional excellence. Review verification mechanisms maintain authenticity throughout the feedback process. The purchase verification system restricts reviews to customers who have actually bought the product through Shoplite, preventing fabricated testimonials. Order confirmation and delivery verification precede review eligibility, typically allowing feedback submission 7-14 days after delivery to ensure adequate product testing. Verified purchase badges distinguish genuine customer reviews from other feedback types, providing credibility indicators for prospective buyers. Multimedia enrichment options enhance review usefulness beyond textual descriptions. Image upload capabilities allow customers to showcase product actual appearance, highlighting details often missing from professional marketing photography. Video reviews demonstrate product functionality, size proportions, and real-world usage scenarios."
    },
    {
        "id": "doc8",
        "title": "Seller Account Setup and Management",
        "content": "Seller account establishment on Shoplite involves a comprehensive verification process ensuring marketplace integrity and consumer protection. The initial registration collects fundamental business information including legal business name, physical address, contact details, and tax identification numbers. Business structure classification differentiates between individual proprietors, partnerships, limited liability companies, and corporations, with documentation requirements varying accordingly. Documentation requirements escalate based on business scale and product categories. All sellers must provide government-issued identification matching business registration details. Business registration certificates validate legal entity status, while tax documentation confirms compliance with jurisdictional requirements. High-volume sellers or those in regulated categories (electronics, luxury goods, health products) undergo enhanced verification including bank statements, business location verification, and in some cases, video conference interviews with verification specialists. The verification timeline typically spans 2-3 business days for standard applications, though complex cases or incomplete documentation can extend this period. Account management occurs through the comprehensive seller dashboard, providing centralized control over all business operations. Profile management sections handle business information updates, branding elements, and communication preferences."
    },
    {
        "id": "doc9",
        "title": "Inventory Management for Sellers",
        "content": "Shoplite's inventory management system provides sellers with sophisticated tools for stock control across multiple sales channels and fulfillment locations. The centralized inventory dashboard offers real-time visibility into stock levels, with color-coded indicators differentiating adequate inventory (green), low stock warnings (yellow), and out-of-stock situations (red). Automated low stock alerts trigger at customizable thresholds, sending notifications through dashboard, email, and mobile app based on seller preferences. Inventory tracking methodologies accommodate diverse business models and product types. Single SKU tracking suits simple products with no variations, while matrix inventory management handles products with multiple options like size, color, or configuration. Bundle products enable grouping individual SKUs into packaged offerings, with automatic component deduction upon sale. Multi-location inventory management supports sellers operating across warehouses, retail stores, or fulfillment centers. Location-specific stock levels prevent overselling by reserving inventory at the fulfillment point closest to customers. Stock transfer functionality facilitates movement between locations with automated updating of availability estimates. Inventory synchronization maintains consistency across Shoplite and external sales channels through robust API connections."
    },
    {
        "id": "doc10",
        "title": "Commission and Fee Structure",
        "content": "Shoplite's revenue model uses tiered commissions varying by product category and seller volume. Base rates range from 8-15% of final sale prices, excluding taxes and shipping. Category specifics include Electronics (8%), Fashion (12%), Home Goods (10%), and Luxury Items (15%). These percentages cover payment processing, platform maintenance, and customer service infrastructure. Seller tiers provide commission reductions based on performance. Starter tier (0-50 monthly orders) pays standard rates. Professional tier (51-500 orders) receives 1% reduction, while Enterprise tier (500+ orders) gets 2% reduction. Monthly tier reviews automatically adjust commission rates for qualifying sellers. Additional fees include payment processing (2.9% + $0.30 per transaction for credit cards) and subscription fees for advanced tiers ($29.99 monthly for Professional, $79.99 for Enterprise). Fee transparency mechanisms ensure sellers understand costs before transactions. Real-time fee calculators during product listing provide commission estimates. Order detail pages break down exact fee calculations post-transaction. Monthly statements consolidate all charges with clear categorization."
    },
    {
        "id": "doc11",
        "title": "Customer Support Procedures",
        "content": "Shoplite's multi-channel support infrastructure ensures comprehensive assistance across customer preferences. The tiered model begins with self-service options including knowledge bases, troubleshooting guides, and video tutorials addressing 60% of inquiries without agent intervention. Live chat serves as the primary real-time channel, available 24/7 through website and mobile apps. Automated chatbots handle straightforward queries, escalating to human agents for complex issues with under 2-minute wait times. Chat sessions handle order modifications, basic account issues, and policy questions, with transcripts automatically emailed. Email support manages less urgent inquiries through a ticketing system. Categorization algorithms route messages to specialized teams, with service level agreements guaranteeing responses within 4 hours for standard issues and 1 hour for urgent matters. Telephone support reserves voice communication for complex scenarios requiring detailed discussion. Verified account ownership through multi-factor authentication precedes account-specific discussions. Call routing directs customers to specialized departments with average hold times under 3 minutes."
    },
    {
        "id": "doc12",
        "title": "Mobile App Features",
        "content": "The Shoplite mobile application delivers optimized shopping experiences through iOS and Android platforms, leveraging device capabilities unavailable through traditional web interfaces. The app's authentication system incorporates biometric login options including Touch ID, Face ID, and fingerprint recognition, eliminating password memorization while enhancing security. Enhanced search functionality capitalizes on mobile-specific capabilities beyond traditional text input. Visual search technology enables product discovery through camera captures or existing photos, with computer vision algorithms identifying items and finding similar products within our catalog. Barcode scanning provides instant product identification and price comparison, particularly useful when shopping in physical retail environments. Voice search understands natural language queries, processing spoken requests through advanced speech recognition technology. Personalization engines within the mobile app deliver context-aware shopping experiences. Location-based services trigger relevant notifications when users approach physical stores carrying products from their wishlists or recent searches. Push notification systems maintain customer engagement through carefully timed, relevant messaging."
    },
    {
        "id": "doc13",
        "title": "API Documentation for Developers",
        "content": "The Shoplite API ecosystem provides comprehensive programmatic access to marketplace functionality through RESTful architectural principles. All API endpoints utilize HTTPS encryption with JSON request/response formatting, ensuring standardized communication across diverse programming environments. Authentication employs OAuth 2.0 protocol with both client credentials flow for server-to-server communication and authorization code flow for user-facing applications. Rate limiting policies balance system protection with developer needs through tiered allocation structures. Basic developer accounts receive 1,000 requests per hour across all endpoints, sufficient for small-scale integrations and development testing. Professional tier increases to 10,000 requests hourly with additional endpoints access, suitable for production applications with moderate traffic. Core API categories encompass the complete Shoplite platform functionality. Product API endpoints provide access to catalog information including detailed product data, inventory levels, pricing history, and customer reviews. Order API methods enable order creation, status retrieval, and management operations for authorized sellers."
    },
    {
        "id": "doc14",
        "title": "Security and Privacy Policies",
        "content": "Shoplite's security framework implements defense-in-depth principles across application, infrastructure, and organizational layers. Data encryption protects information both in transit and at rest, utilizing TLS 1.3 for network transmission and AES-256 encryption for stored data. Key management follows industry best practices with regular rotation and secure storage in hardware security modules. Access control systems enforce least-privilege principles through role-based permissions and mandatory multi-factor authentication for administrative accounts. User authentication incorporates adaptive risk assessment, evaluating login attempts based on device recognition, geographic patterns, and behavioral biometrics. Vulnerability management programs proactively identify and address potential security weaknesses. Automated scanning tools continuously assess applications and infrastructure for known vulnerabilities, with severity-based prioritization guiding remediation efforts. Privacy controls empower users with granular data management capabilities through comprehensive preference centers. Data export functionality enables users to download complete personal information archives in standardized formats."
    },
    {
        "id": "doc15",
        "title": "Promotional Codes and Discounts",
        "content": "Shoplite's promotional engine supports diverse discount strategies through flexible code configuration and targeted application rules. Promotion types include percentage discounts (10% off, 25% off), fixed amount reductions ($10 off, $50 off), free shipping offers, and bundle deals (buy one get one). Each promotion type serves specific business objectives, from inventory clearance to customer acquisition to average order value increase. Code generation options accommodate various marketing channel requirements. Public codes distribute broadly through email campaigns, social media, and advertising with minimal restrictions. Private codes target specific customer segments through personalized communications with usage limits per recipient. Single-use codes provide exclusive offers for high-value customers or complaint resolution. Eligibility rules ensure promotional effectiveness while controlling costs. Minimum purchase requirements prevent discount application to small orders where processing costs outweigh margin. Category restrictions focus discounts on strategic product groups while excluding low-margin or already discounted items."
    },
    {
        "id": "doc16",
        "title": "Shipping and Logistics Management",
        "content": "Shoplite's shipping infrastructure integrates multiple carrier solutions with customizable seller configurations to balance cost, speed, and reliability. Carrier partnerships span national providers (FedEx, UPS, USPS), regional specialists, and local delivery services, ensuring comprehensive coverage across diverse geographic markets. Real-time rate calculation during checkout considers package dimensions, weight, destination, and service level, presenting customers with multiple options balancing delivery speed against cost considerations. Shipping rule configurations empower sellers with granular control over delivery options and costs. Flat rate shipping sets consistent prices regardless of order characteristics, simplifying customer expectations. Calculated shipping dynamically prices based on actual carrier rates with optional markup percentages or fixed surcharges. Free shipping thresholds trigger waived delivery costs when order values exceed configured minimums, effectively increasing average order values. International shipping management handles cross-border complexities through automated documentation and compliance checks."
    },
    {
        "id": "doc17",
        "title": "Customer Loyalty Program",
        "content": "The Shoplite Rewards program structures member benefits through tiered progression systems that increase value with customer engagement. Membership begins at Silver tier (0-999 points), progressing to Gold (1,000-4,999 points), and culminating at Platinum (5,000+ points). Each tier unlocks additional benefits including earning multipliers, exclusive promotions, and enhanced services. Point accumulation follows multifaceted earning strategies across customer engagement activities. Base earning rates provide 1 point per dollar spent on all purchases, with category bonuses during promotional periods offering 2-5 points per dollar in targeted departments. Non-purchase activities generate additional points, including product review submission (50 points), social media sharing (25 points), referral conversions (500 points), and profile completion (100 points). Redemption options provide flexibility in reward utilization across different customer preference segments. Discount conversion allows point redemption for shopping credits at standardized rates (100 points = $1), applicable toward future purchases. Charitable donation options enable point conversion to monetary contributions for partnered nonprofit organizations."
    },
    {
        "id": "doc18",
        "title": "Marketplace Quality Standards",
        "content": "Shoplite's marketplace quality framework establishes minimum performance thresholds maintaining customer experience consistency across diverse sellers. Seller performance metrics encompass order fulfillment rates, cancellation percentages, and customer satisfaction scores. The fulfillment rate requirement mandates 95% successful order completion within promised timeframes, measuring reliability in delivering purchased items. Cancellation rate thresholds maintain below 2% except for documented inventory discrepancies or customer-requested cancellations, ensuring order commitment integrity. Product listing standards enforce accuracy and completeness requirements across all marketplace offerings. Title formatting prohibits excessive keyword stuffing while requiring clear product identification. Image requirements mandate minimum resolution standards, multiple angle coverage, and scale context through lifestyle shots or size comparison objects. Description completeness ensures comprehensive feature enumeration, accurate dimension reporting, and honest condition assessments for non-new items. Prohibited items policies delineate products unsuitable for marketplace distribution due to legal restrictions, safety concerns, or brand protection requirements."
    }
]

print(f"✅ Loaded {len(knowledge_base)} documents into knowledge base")
print("Document titles:")
for doc in knowledge_base:
    print(f"  - {doc['id']}: {doc['title']}")


✅ Loaded 18 documents into knowledge base
Document titles:
  - doc1: Shoplite User Registration Process
  - doc2: Product Search and Filtering Features
  - doc3: Shopping Cart and Checkout Process
  - doc4: Payment Methods and Security
  - doc5: Order Tracking and Delivery
  - doc6: Return and Refund Policies
  - doc7: Product Reviews and Ratings System
  - doc8: Seller Account Setup and Management
  - doc9: Inventory Management for Sellers
  - doc10: Commission and Fee Structure
  - doc11: Customer Support Procedures
  - doc12: Mobile App Features
  - doc13: API Documentation for Developers
  - doc14: Security and Privacy Policies
  - doc15: Promotional Codes and Discounts
  - doc16: Shipping and Logistics Management
  - doc17: Customer Loyalty Program
  - doc18: Marketplace Quality Standards


In [5]:
# Cell 5: Load a Proven, Reliable Model
print("🔄 Loading a reliable model...")

# Use distilgpt2 - it's small, fast, and reliable
model_name = "distilgpt2"

try:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    # Set padding token
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # Quick test to verify model works
    test_input = tokenizer("Hello, ", return_tensors="pt")
    test_output = model.generate(
        test_input.input_ids,
        max_new_tokens=10,
        temperature=0.7
    )
    test_response = tokenizer.decode(test_output[0], skip_special_tokens=True)

    print(f"✅ Model '{model_name}' loaded successfully")
    print(f"   Test response: '{test_response}'")

except Exception as e:
    print(f"❌ Model loading failed: {e}")
    # We'll handle this if it happens


🔄 Loading a reliable model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to

✅ Model 'distilgpt2' loaded successfully
   Test response: 'Hello,  








'


In [6]:
# Cell 6: Simple Keyword Retrieval
def simple_retrieve(query, docs=knowledge_base, top_k=2):
    query_lower = query.lower()
    relevant_docs = []

    for doc in docs:
        content = doc['content'].lower()
        title = doc['title'].lower()

        # Simple keyword matching
        score = sum(1 for word in query_lower.split() if word in content or word in title)
        if score > 0:
            relevant_docs.append(doc)
            if len(relevant_docs) >= top_k:
                break

    return relevant_docs

print("✅ Retrieval function defined")


✅ Retrieval function defined


In [7]:
# Cell 7: Flask App with Robust Endpoints
app = Flask(__name__)

@app.route('/health', methods=['GET'])
def health():
    return jsonify({
        "status": "healthy",
        "model": model_name,
        "knowledge_base_size": len(knowledge_base),
        "message": "Week 3 LLM endpoint ready for Week 5"
    })

@app.route('/generate', methods=['POST', 'GET'])
def generate():
    """Robust text generation for Week 5"""
    try:
        # If it's a GET request, return a simple test response
        if request.method == 'GET':
            return jsonify({
                "message": "Generate endpoint is working",
                "usage": "Send POST request with {'prompt':'your text','max_tokens':50}"
            })
        data = request.json
        prompt = data.get('prompt', 'Hello, how can I help you?')
        max_tokens = data.get('max_tokens', 100)

        # Ensure we have a proper prompt
        if not prompt.strip():
            prompt = "Please provide a helpful response:"

        # Tokenize
        inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True, padding=True)

        # Generate
        with torch.no_grad():
            outputs = model.generate(
                inputs.input_ids,
                attention_mask=inputs.attention_mask,
                max_new_tokens=max_tokens,
                temperature=0.8,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )

        # Decode
        full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract response after prompt
        if full_text.startswith(prompt):
            response = full_text[len(prompt):].strip()
        else:
            response = full_text.strip()

        # Fallback for empty responses
        if not response:
            response = "I'm here to help with Shoplite questions. How can I assist you?"

        return jsonify({
            "text": response,
            "model": model_name,
            "tokens_generated": max_tokens
        })

    except Exception as e:
        return jsonify({
            "error": str(e),
            "fallback": "Technical issue - please try again"
        }), 500

print("✅ Flask app routes defined")


✅ Flask app routes defined


In [8]:
# Cell 8: Start Flask Server in Background
def run_flask():
    app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)

print("Starting Flask server...")
flask_thread = threading.Thread(target=run_flask)
flask_thread.daemon = True
flask_thread.start()

# Wait for server to start
time.sleep(3)
print("✅ Flask server running on port 5000")


Starting Flask server...
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


✅ Flask server running on port 5000


In [9]:
# Cell 9: Setup Ngrok Tunnel
print("Setting up ngrok tunnel...")

# You'll need to enter your ngrok token here
try:
    ngrok_token = input("Enter your ngrok auth token: ").strip()
    ngrok.set_auth_token(ngrok_token)

    public_url = ngrok.connect(5000).public_url
    print(f"🎯 Public URL: {public_url}")
    print(f"🔧 Health endpoint: {public_url}/health")
    print(f"⚡ Generate endpoint: {public_url}/generate")

except Exception as e:
    print(f"❌ Ngrok setup failed: {e}")
    public_url = "https://your-ngrok-url.ngrok.io"  # Fallback


Setting up ngrok tunnel...
Enter your ngrok auth token: 33VpwGbuVEllhpbTSo5Nq4C7Zza_4HBbwbpJo4uRHwnfjEERk
🎯 Public URL: https://matilda-nonallelic-malaysia.ngrok-free.dev
🔧 Health endpoint: https://matilda-nonallelic-malaysia.ngrok-free.dev/health
⚡ Generate endpoint: https://matilda-nonallelic-malaysia.ngrok-free.dev/generate


In [10]:
# Cell 10: Comprehensive Testing
print("🧪 Testing all endpoints...")
print("=" * 50)

# Use the public URL from previous cell
url = public_url

# Test 1: Health endpoint
print("1. Testing /health...")
try:
    health = requests.get(f"{url}/health")
    print(f"   Status: {health.status_code}")
    if health.status_code == 200:
        print("   ✅ Health endpoint working!")
    else:
        print(f"   ❌ Health failed: {health.text}")
except Exception as e:
    print(f"   ❌ Health error: {e}")

# Test 2: Generate endpoint
print("\n2. Testing /generate...")
test_prompts = [
    "Hello! What is Shoplite?",
    "What is your return policy?",
    "How do I track my order?"
]

for i, prompt in enumerate(test_prompts, 1):
    print(f"   Test {i}: '{prompt}'")
    try:
        data = {"prompt": prompt, "max_tokens": 50}
        response = requests.post(f"{url}/generate", json=data, timeout=10)

        if response.status_code == 200:
            result = response.json()
            text = result.get('text', 'No text')
            print(f"      ✅ Response: '{text}'")
        else:
            print(f"      ❌ Failed: {response.status_code} - {response.text}")

    except Exception as e:
        print(f"      ❌ Error: {e}")

print("\n" + "=" * 50)
print("🎉 Testing complete! Your Week 3 endpoint is ready for Week 5!")


INFO:werkzeug:127.0.0.1 - - [21/Oct/2025 16:34:11] "GET /health HTTP/1.1" 200 -


🧪 Testing all endpoints...
1. Testing /health...
   Status: 200
   ✅ Health endpoint working!

2. Testing /generate...
   Test 1: 'Hello! What is Shoplite?'


INFO:werkzeug:127.0.0.1 - - [21/Oct/2025 16:34:15] "POST /generate HTTP/1.1" 200 -


      ✅ Response: 'The shoplifter is a game with a simple level system. It is a way to connect to the community, to buy items, and to do so through the system. It is an alternative game where you could just buy items from the shops,'
   Test 2: 'What is your return policy?'


INFO:werkzeug:127.0.0.1 - - [21/Oct/2025 16:34:19] "POST /generate HTTP/1.1" 200 -


      ✅ Response: 'Is your current policy changed?'
   Test 3: 'How do I track my order?'


INFO:werkzeug:127.0.0.1 - - [21/Oct/2025 16:34:23] "POST /generate HTTP/1.1" 200 -


      ✅ Response: '›
The order is in the range of 12-16, and I'll try to do exactly that, but I can't tell you its a straight line. It's not an easy one to get around, and I'm not sure if'

🎉 Testing complete! Your Week 3 endpoint is ready for Week 5!


In [11]:
# Check if Flask is running
import requests
try:
    response = requests.get("http://localhost:5000/health", timeout=5)
    print("✅ Flask is running locally:", response.json())
except:
    print("❌ Flask is NOT running locally")

# Check ngrok status
try:
    from pyngrok import ngrok
    tunnels = ngrok.get_tunnels()
    print("✅ Ngrok tunnels:", tunnels)
except Exception as e:
    print("❌ Ngrok issue:", e)


INFO:werkzeug:127.0.0.1 - - [21/Oct/2025 16:34:23] "GET /health HTTP/1.1" 200 -


✅ Flask is running locally: {'knowledge_base_size': 18, 'message': 'Week 3 LLM endpoint ready for Week 5', 'model': 'distilgpt2', 'status': 'healthy'}
✅ Ngrok tunnels: [<NgrokTunnel: "https://matilda-nonallelic-malaysia.ngrok-free.dev" -> "http://localhost:5000">]


In [12]:
# Test the endpoint from within Colab
import requests
url = "http://localhost:5000"  # Local test

try:
    # Test health
    health = requests.get(f"{url}/health")
    print("Health:", health.status_code, health.json())

    # Test generate
    data = {"prompt": "Test prompt", "max_tokens": 30}
    gen = requests.post(f"{url}/generate", json=data)
    print("Generate:", gen.status_code, gen.json())

except Exception as e:
    print("Error:", e)


INFO:werkzeug:127.0.0.1 - - [21/Oct/2025 16:34:23] "GET /health HTTP/1.1" 200 -


Health: 200 {'knowledge_base_size': 18, 'message': 'Week 3 LLM endpoint ready for Week 5', 'model': 'distilgpt2', 'status': 'healthy'}


INFO:werkzeug:127.0.0.1 - - [21/Oct/2025 16:34:28] "POST /generate HTTP/1.1" 200 -


Generate: 200 {'model': 'distilgpt2', 'text': "I'm here to help with Shoplite questions. How can I assist you?", 'tokens_generated': 30}


In [13]:
# Phase 3 Local Test
import requests

def test_phase_3_locally():
    base_url = "http://localhost:5000"  # Local Flask

    # Simulate Phase 3 assistant calls
    test_prompts = [
        "What is Shoplite's return policy?",
        "How do I create an account?",
        "Can you help me track my order?"
    ]

    for prompt in test_prompts:
        print(f"🧠 Testing: '{prompt}'")

        # Build a proper grounded prompt (like Phase 3 would)
        grounded_prompt = f"""
        You are a Shoplite customer service assistant. Answer the following question helpfully:

        Question: {prompt}

        Answer: """

        data = {"prompt": grounded_prompt, "max_tokens": 100}
        response = requests.post(f"{base_url}/generate", json=data)

        if response.status_code == 200:
            result = response.json()
            print(f"✅ Response: {result['text']}")
        else:
            print(f"❌ Failed: {response.status_code}")
        print("-" * 50)

test_phase_3_locally()


🧠 Testing: 'What is Shoplite's return policy?'


INFO:werkzeug:127.0.0.1 - - [21/Oct/2025 16:34:53] "POST /generate HTTP/1.1" 200 -


✅ Response: I'm here to help with Shoplite questions. How can I assist you?
--------------------------------------------------
🧠 Testing: 'How do I create an account?'


INFO:werkzeug:127.0.0.1 - - [21/Oct/2025 16:34:57] "POST /generate HTTP/1.1" 200 -


✅ Response: ? Answer:
--------------------------------------------------
🧠 Testing: 'Can you help me track my order?'


INFO:werkzeug:127.0.0.1 - - [21/Oct/2025 16:35:03] "POST /generate HTTP/1.1" 200 -


✅ Response: I'm here to help with Shoplite questions. How can I assist you?
--------------------------------------------------
