In [None]:
from fastcore.utils import *
import fastcore.all as fc, re, math, itertools, functools, numpy as np, types, typing, dataclasses, matplotlib.pyplot as plt, collections, regex
from regex import search
from collections import Counter
from collections.abc import Iterable
np.set_printoptions(linewidth=150, suppress=True)
plt.rcParams['figure.dpi'] = 50

In [None]:
!ls static

Understanding-ISO-17025-for-Laser-Power-and-Energy-Measurement-043520ed-a855-43f6-b05c-a9c83afded79.pdf
enhanced_laser_nodes.pkl
labmax-touch-ds.pdf
laser_nodes.db
laser_qdrant.pkl
multipurpose-energymax-sensors-usb-rs232-ds.pdf
parsed_lmc_docs.pkl
pm10k-plus-ds.pdf
requirements.txt


In [None]:
from fastcore.utils import *
path = Path('static')

In [None]:
!pip install llama-cloud-services llama-index-core llama-index-readers-file llama-index-embeddings-openai llama-index-llms-openai llama_index.postprocessor.cohere_rerank llama-index-program-openai qdrant-client llama-index-vector-stores-qdrant python-fasthtml "fastapi[standard]" MonsterUI

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [None]:
import asyncio, threading

def run_in_thread(coro):
    res = None
    def target():
        nonlocal res
        asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
        loop = asyncio.new_event_loop()
        res = loop.run_until_complete(coro)
        loop.close()
    t = threading.Thread(target=target)
    t.start(); t.join()
    return res

# usage of function
# variable = run_in_thread(<pass a function>)

In [None]:
import os
import pickle
import sqlite3
from typing import List
from llama_index.core.schema import NodeWithScore, TextNode, QueryBundle
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.chat_engine import ContextChatEngine
from qdrant_client import QdrantClient
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

# Initialize LLM and embeddings
llm = OpenAI(model="gpt-4o", api_key=os.environ["OPENAI_API_KEY"])
embed_model = OpenAIEmbedding(model="text-embedding-3-large")
Settings.llm = llm
Settings.embed_model = embed_model

# SQLite FTS Retriever
class SQLiteFTSRetriever:
    def __init__(self, db_path="static/laser_nodes.db", top_k=5):
        self.db_path = db_path
        self.top_k = top_k
        
    def retrieve(self, query_str: str) -> List[NodeWithScore]:
        conn = sqlite3.connect(self.db_path)
        c = conn.cursor()
        escaped_query = f'"{query_str}"'
        c.execute(
            f"""
            SELECT nodes.node_id, nodes.content, nodes.metadata, nodes_fts.rank
            FROM nodes_fts 
            JOIN nodes ON nodes_fts.rowid = nodes.rowid
            WHERE nodes_fts MATCH ? 
            ORDER BY nodes_fts.rank
            LIMIT {self.top_k}
            """, 
            (escaped_query,)
        )
        
        results = []
        for node_id, content, metadata_blob, rank in c.fetchall():
            import json
            metadata = json.loads(metadata_blob)
            node = TextNode(text=content, metadata=metadata, id_=node_id)
            score = 1.0 / (1.0 + float(rank))
            results.append(NodeWithScore(node=node, score=score))
        
        conn.close()
        return results

# Hybrid Retriever with Reranking
class HybridRetrieverWithReranking(BaseRetriever):
    def __init__(self, vector_retriever, keyword_retriever, reranker, 
                vector_weight=0.7, keyword_weight=0.3, initial_top_k=20):
        self.vector_retriever = vector_retriever
        self.keyword_retriever = keyword_retriever
        self.reranker = reranker
        self.vector_weight = vector_weight
        self.keyword_weight = keyword_weight
        self.initial_top_k = initial_top_k
        super().__init__()
    
    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        query_str = query_bundle.query_str
        
        # Get results from both retrievers
        vector_results = self.vector_retriever.retrieve(query_str)
        keyword_results = self.keyword_retriever.retrieve(query_str)
        
        # Combine scores
        node_scores = {}
        for i, result in enumerate(vector_results):
            node_id = result.node.node_id
            score = self.vector_weight * (1.0 / (i + 1))
            node_scores[node_id] = {"node": result.node, "score": score}
        
        for i, result in enumerate(keyword_results):
            node_id = result.node.node_id
            keyword_score = self.keyword_weight * (1.0 / (i + 1))
            if node_id in node_scores:
                node_scores[node_id]["score"] += keyword_score
            else:
                node_scores[node_id] = {"node": result.node, "score": keyword_score}
        
        # Sort by score
        sorted_results = sorted(
            node_scores.values(), 
            key=lambda x: x["score"], 
            reverse=True
        )
        
        # Convert to NodeWithScore objects
        initial_results = [
            NodeWithScore(node=item["node"], score=item["score"]) 
            for item in sorted_results[:self.initial_top_k]
        ]
        
        # Apply reranking
        reranked_nodes = self.reranker.postprocess_nodes(
            initial_results, 
            query_bundle
        )
        
        return reranked_nodes

# Load function for vector stores and create retriever
def create_retriever(cohere_api_key):
    # Load SQLite retriever
    sqlite_retriever = SQLiteFTSRetriever()
    
    # Load Qdrant collection
    with open('static/laser_qdrant.pkl', 'rb') as f:
        collection_data = pickle.load(f)
    
    # Create Qdrant client and collection
    qdrant_client = QdrantClient(":memory:")
    collection_name = collection_data["collection_name"]
    
    if qdrant_client.collection_exists(collection_name):
        qdrant_client.delete_collection(collection_name)
    
    qdrant_client.create_collection(
        collection_name=collection_name,
        vectors_config={"size": collection_data["vector_size"], "distance": "Cosine"}
    )
    
    if collection_data["points"]:
        qdrant_client.upsert(
            collection_name=collection_name,
            points=collection_data["points"]
        )
    
    # Create vector store and retriever
    vector_store = QdrantVectorStore(
        client=qdrant_client,
        collection_name=collection_name
    )
    
    index = VectorStoreIndex.from_vector_store(vector_store)
    vector_retriever = index.as_retriever(similarity_top_k=10)
    
    # Create Cohere reranker
    reranker = CohereRerank(
        api_key=cohere_api_key,
        model="rerank-v3.5",
        top_n=5
    )
    
    # Create and return hybrid retriever with reranking
    return HybridRetrieverWithReranking(
        vector_retriever=vector_retriever,
        keyword_retriever=sqlite_retriever,
        reranker=reranker,
        vector_weight=0.7,
        keyword_weight=0.3,
        initial_top_k=20
    )

# Create chat engine
def create_chat_engine(retriever):
    memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
    return ContextChatEngine.from_defaults(
        retriever=retriever,
        memory=memory,
        system_prompt="""You are a helpful assistant specializing in laser measurement technology.
        Answer questions based on the context provided. If you don't know the answer, say so."""
    )

# Main initialization function to use in FastAPI
def init_chat_engine():
    cohere_api_key = os.environ.get("COHERE_API_KEY")
    retriever = create_retriever(cohere_api_key)
    return create_chat_engine(retriever)

# This is where you would start building your FastAPI app
# Example:
# chat_engine = init_chat_engine()

In [None]:
# Test the refactored code
if __name__ == "__main__":
    # Initialize the chat engine
    chat_engine = init_chat_engine()
    
    # Test with a simple query
    test_query = "What are the specifications of the PM10K+ laser power sensor?"
    response = chat_engine.chat(test_query)
    
    print(f"Query: {test_query}")
    print(f"Response: {response.response}")

Query: What are the specifications of the PM10K+ laser power sensor?
Response: The specifications of the PM10K+ laser power sensor are as follows:

- **Wavelength Range:** 0.19 to 11 µm
- **Power Range:** 100 to 10,000 W
- **Maximum Intermittent Power (<5 min.):** 12 kW (beam size dependent)
- **Noise Equivalent Power (at 6 lpm):**
  - USB and RS-232: <0.2 W
  - DB-25 with meter: <1.0 W
- **Maximum Power Density (kW/cm²):**
  - 6.0 at 1 kW
  - 2.6 at 5 kW
  - 2.7 at 10 kW
  - 2.5 at 12 kW
- **Recommended Minimum Beam Size (mm):**
  - 6.5 at 1 kW
  - 22 at 5 kW
  - 31 at 10 kW
  - 35 at 12 kW
- **Minimum Water Flow Rate:** 6 lpm (~10 PSI)
- **Water Temperature Range:** 10 to 25 °C
- **Response Time (to 0 to 99%) (at 6 lpm):**
  - Speed-up On: <3 seconds
  - Speed-up Off: 10 seconds
- **Maximum Energy Density (1064 nm, 10 ns):** 600 mJ/cm²
- **Detector Coating:** BB+
- **Detector Dimensions:** 65 x 65 mm
- **Calibration Uncertainty:** ±3%
- **Power Linearity:** ±2%
- **Spectral Compensat

**AI Prompt**

Does my original retriever and chatbot code need any updates to support streaming responses?

**AI Response**

Looking at your original code, you'll need a few modifications to properly support streaming responses. Here's what needs to be updated:

## Streaming Support Modifications

Your current implementation uses `chat_engine.chat()` which returns a complete response. For streaming, you'll need to use `chat_engine.stream_chat()` instead, which returns an object that can be asynchronously iterated over to get response chunks.

Let's look at the specific changes needed:

1. **Update the `ContextChatEngine` initialization**:

The default `ContextChatEngine` configuration should work with streaming, but you might want to adjust a few parameters:

```python
def create_chat_engine(retriever):
    memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
    return ContextChatEngine.from_defaults(
        retriever=retriever,
        memory=memory,
        system_prompt="""You are a helpful assistant specializing in laser measurement technology.
        Answer questions based on the context provided. If you don't know the answer, say so.""",
        # These parameters can help with streaming performance
        streaming=True,
        verbose=True
    )
```

2. **Add a streaming method to test streaming functionality**:

```python
async def test_streaming():
    chat_engine = init_chat_engine()
    query = "What are the specifications of the PM10K+ laser power sensor?"
    
    # Get streaming response
    streaming_response = chat_engine.stream_chat(query)
    
    # Print chunks as they arrive
    print("Streaming response:")
    async for chunk in streaming_response.async_response_gen():
        print(chunk, end="", flush=True)
    print("\nStreaming complete!")
```

3. **Make sure your LLM supports streaming**:

Your current initialization of the OpenAI model looks good for streaming:

```python
llm = OpenAI(model="gpt-4o", api_key=os.environ["OPENAI_API_KEY"])
```

The OpenAI integration in LlamaIndex supports streaming by default, so this should work fine.

## Additional Considerations

1. **Error Handling for Streaming**:

When implementing streaming in a web application, you should add proper error handling:

```python
async def stream_response(query):
    try:
        response = chat_engine.stream_chat(query)
        async for chunk in response.async_response_gen():
            yield chunk
    except Exception as e:
        yield f"\nError during streaming: {str(e)}"
```

2. **Managing Context Window**:

With streaming responses, you need to be careful about context window management. The `ChatMemoryBuffer` with a token limit helps, but you might want to add a function to clear chat history when needed:

```python
def reset_chat_memory(chat_engine):
    chat_engine.memory.reset()
    return "Chat history has been cleared."
```

3. **Handling Large Retrieved Contexts**:

If your retriever is bringing back a lot of context, it might slow down the initial response time before streaming starts. Consider adjusting the number of retrieved documents or implementing a more aggressive filtering mechanism:

```python
def create_retriever(cohere_api_key):
    # Your existing code...
    
    return HybridRetrieverWithReranking(
        vector_retriever=vector_retriever,
        keyword_retriever=sqlite_retriever,
        reranker=reranker,
        vector_weight=0.7,
        keyword_weight=0.3,
        initial_top_k=10  # Reduced from 20 to improve streaming latency
    )
```

Your core retrieval architecture (the hybrid approach with vector search, keyword search, and reranking) doesn't need fundamental changes to support streaming. The main updates are in how you initialize the chat engine and how you consume its responses.

The streaming implementation looks good overall, and these adjustments should help you get the best streaming performance from your existing retriever and chatbot code.

**AI Prompt**

The development environment that I'm working in does not support async or streaming yet, so we will park the streaming for now. I will need to update the code to use streaming outputs later. For now we will proceed with the original code.

**AI Response**

Understood! It makes sense to focus on getting your core functionality working first before adding streaming capabilities.

For now, you can continue using your original implementation with `chat_engine.chat()` which works synchronously and returns complete responses. This approach is simpler to implement and debug in environments without async support.

When you're ready to add streaming later, you can revisit the modifications we discussed:
1. Updating to use `chat_engine.stream_chat()`
2. Adding async handling for the response chunks
3. Configuring the chat engine with streaming parameters

Your current retriever architecture (hybrid approach with vector search, keyword search, and reranking) is solid and doesn't need changes regardless of whether you're using streaming or not. The core search and ranking functionality will work the same way.

Let me know if you need any other adjustments to your current non-streaming implementation!

**AI Prompt**

We cannot run asyncio in this development environment. If we need async we can try wrapping it within the 'run_in_thread' function.

**AI Response**

I understand that you're working in an environment where directly running asyncio is causing issues. Let's modify our approach to use the `run_in_thread` function you've defined earlier for handling any async operations.

Here's an updated version of the FastAPI application that should work in your environment:

```python
from fasthtml.common import *
from fastapi import FastAPI, Depends, HTTPException
from monsterui.all import *
from pydantic import BaseModel
from typing import List
import os

# Import your chat engine initialization function
# chat_engine = init_chat_engine()

# Initialize FastAPI app with FastHTML and MonsterUI
app = FastAPI()
html_app, rt = fast_app(hdrs=Theme.blue.headers())

# Pydantic models for request/response
class ChatMessage(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    messages: List[ChatMessage]
    query: str

class ChatResponse(BaseModel):
    response: str

# In-memory message storage
chat_history = []

# FastAPI endpoint for chat
@app.post("/api/chat", response_model=ChatResponse)
def chat(request: ChatRequest):
    try:
        # Process the query using chat engine
        response = chat_engine.chat(request.query)
        return {"response": response.response}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Mount FastHTML app to FastAPI
app.mount("/", html_app)

# Simple test route to verify the server is running
@rt("/")
def get():
    return Titled("Laser Measurement Assistant", 
                  P("API is running. Use /api/chat endpoint for queries."))
```

To run this server in your environment, we'll use the `run_in_thread` function to handle the async parts:

```python
# Function to start the server (to be used with run_in_thread)
async def start_server():
    import uvicorn
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    await server.serve()
    
# Start the server in a thread
server_thread = threading.Thread(target=lambda: run_in_thread(start_server()))
server_thread.daemon = True  # Make the thread terminate when the main program exits
server_thread.start()

print("Server started in background thread. Access at http://localhost:8000")
```

For testing the endpoint, let's create a simple function that doesn't use async:

```python
def test_endpoint():
    import requests
    import json
    import time
    
    # Give the server a moment to start
    time.sleep(2)
    
    url = "http://localhost:8000/api/chat"
    payload = {
        "messages": [
            {"role": "user", "content": "Hello, I have a question about laser measurement."}
        ],
        "query": "What are the specifications of the PM10K+ laser power sensor?"
    }
    
    headers = {"Content-Type": "application/json"}
    try:
        response = requests.post(url, data=json.dumps(payload), headers=headers)
        
        print(f"Status Code: {response.status_code}")
        if response.status_code == 200:
            print("Response:")
            print(json.dumps(response.json(), indent=2))
        else:
            print(f"Error: {response.text}")
        return response
    except Exception as e:
        print(f"Error: {str(e)}")
        return None

# Run the test
response = test_endpoint()
```

This approach should work in your environment by:

1. Starting the FastAPI server in a separate thread using your `run_in_thread` function
2. Testing the endpoint with a simple synchronous request

If you still encounter issues, we can simplify further by:

1. Creating just the API endpoint without FastHTML integration first
2. Testing with a simple curl command or requests library
3. Adding the UI components once the API is confirmed working

Would you like me to provide a more simplified version focusing just on the API endpoint first?

In [None]:
from fasthtml.common import *
from fastapi import FastAPI, Depends, HTTPException
from monsterui.all import *
from pydantic import BaseModel
from typing import List
import os

# Import your chat engine initialization function
# chat_engine = init_chat_engine()

# Initialize FastAPI app with FastHTML and MonsterUI
app = FastAPI()
html_app, rt = fast_app(hdrs=Theme.blue.headers())

# Pydantic models for request/response
class ChatMessage(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    messages: List[ChatMessage]
    query: str

class ChatResponse(BaseModel):
    response: str

# In-memory message storage
chat_history = []

# FastAPI endpoint for chat
@app.post("/api/chat", response_model=ChatResponse)
def chat(request: ChatRequest):
    try:
        # Process the query using chat engine
        response = chat_engine.chat(request.query)
        return {"response": response.response}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Mount FastHTML app to FastAPI
app.mount("/", html_app)

# Simple test route to verify the server is running
@rt("/")
def get():
    return Titled("Laser Measurement Assistant", 
                  P("API is running. Use /api/chat endpoint for queries."))

In [None]:
# Function to start the server (to be used with run_in_thread)
async def start_server():
    import uvicorn
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    await server.serve()
    
# Start the server in a thread
server_thread = threading.Thread(target=lambda: run_in_thread(start_server()))
server_thread.daemon = True  # Make the thread terminate when the main program exits
server_thread.start()

print("Server started in background thread. Access at http://localhost:8000")

Server started in background thread. Access at http://localhost:8000


In [None]:
def test_endpoint():
    import requests
    import json
    import time
    
    # Give the server a moment to start
    time.sleep(2)
    
    url = "http://localhost:8000/api/chat"
    payload = {
        "messages": [
            {"role": "user", "content": "Hello, I have a question about laser measurement."}
        ],
        "query": "What are the specifications of the PM10K+ laser power sensor?"
    }
    
    headers = {"Content-Type": "application/json"}
    try:
        response = requests.post(url, data=json.dumps(payload), headers=headers)
        
        print(f"Status Code: {response.status_code}")
        if response.status_code == 200:
            print("Response:")
            print(json.dumps(response.json(), indent=2))
        else:
            print(f"Error: {response.text}")
        return response
    except Exception as e:
        print(f"Error: {str(e)}")
        return None

# Run the test
response = test_endpoint()

Status Code: 200
Response:
{
  "response": "The specifications of the PM10K+ laser power sensor are as follows:\n\n- **Wavelength Range (\u00b5m):** 0.19 to 11\n- **Power Range (W):** 100 to 10,000\n- **Maximum Intermittent Power (kW) (<5 min.):** 12 (beam size dependent)\n- **Noise Equivalent Power (W) (at 6 lpm):**\n  - USB and RS-232: <0.2\n  - DB-25 with meter: <1.0\n- **Maximum Power Density (kW/cm\u00b2):**\n  - 6.0 at 1 kW\n  - 2.6 at 5 kW\n  - 2.7 at 10 kW\n  - 2.5 at 12 kW\n- **Recommended Minimum Beam Size (mm):**\n  - 6.5 at 1 kW\n  - 22 at 5 kW\n  - 31 at 10 kW\n  - 35 at 12 kW\n- **Minimum Water Flow Rate (lpm):** 6 (~10 PSI)\n- **Water Temperature Range (\u00b0C):** 10 to 25\n- **Response Time (to 0 to 99%) (at 6 lpm):**\n  - Speed-up On (seconds): <3\n  - Speed-up Off (seconds): 10\n- **Maximum Energy Density (mJ/cm\u00b2) (1064 nm, 10 ns):** 600\n- **Detector Coating:** BB+\n- **Detector Dimensions (mm):** 65 x 65\n- **Calibration Uncertainty (%):** \u00b13\n- **Power L

**AI Prompt**

Ok! Looks like that's working now! Anything else we need to doublecheck before moving on?

**AI Response**

Great! The API endpoint is working correctly. Let's do a quick check of a few things before moving forward:

1. **API Response Format**: The response looks good - it's properly formatted JSON with the expected structure.

2. **Content Quality**: The response about the PM10K+ laser power sensor is detailed and well-formatted with bullet points, which means your retriever and chat engine are working correctly.

3. **Server Running**: The server started successfully in the background thread, which confirms our threading approach works.

Everything looks good from the API perspective! Here are a couple of things we might want to verify:

1. **Error Handling**: You might want to test with an invalid query to ensure error handling works correctly.

2. **Memory Usage**: If you plan to handle multiple users or long sessions, you might want to monitor memory usage during extended use.

Now that the API endpoint is verified working, we can move on to building the FastHTML UI components. The next steps would be:

1. Build the chat interface using MonsterUI components
2. Connect the UI to the API endpoint
3. Implement chat history management
4. Add any additional features like message timestamps or user session management

Would you like to start implementing the chat interface now?

**AI Prompt**

Yes, let's proceed with building the chat interface using FastHTML and MonsterUI components. I don't want it to have a dark background or a lot of color bubbles. I'd like it to look like ChatGPT or Perplexity or Anthropic user interfaces. Clean and sharp and light.

**AI Response**

Let's build a clean, light-colored chat interface similar to ChatGPT, Perplexity, or Anthropic using FastHTML and MonsterUI. Here's the implementation:

```python
from fasthtml.common import *
from monsterui.all import *

# Update the route for the main page
@rt("/")
def get():
    return Titled("Laser Measurement Assistant", 
                 chat_interface())

def chat_interface():
    """Create a clean, light chat interface"""
    return Div(
        # Header
        Div(
            H1("Laser Measurement Assistant", cls="text-xl font-medium"),
            P("Ask questions about laser measurement technology", cls=TextPresets.muted_sm),
            cls="border-b pb-4 mb-6"
        ),
        
        # Chat messages container
        Div(
            # Initial welcome message
            message("Welcome to the Laser Measurement Assistant. How can I help you today?", "assistant"),
            id="chat-messages",
            cls="space-y-6 mb-6 overflow-y-auto max-h-[60vh]"
        ),
        
        # Input form
        Form(
            Div(
                # Text input
                TextArea(
                    placeholder="Ask a question about laser measurement...",
                    id="user-input",
                    name="query",
                    rows=2,
                    cls="w-full p-3 border rounded-md focus:outline-none focus:ring-2 focus:ring-blue-200 resize-none"
                ),
                # Submit button
                Button(
                    "Send",
                    type="submit",
                    cls="absolute right-3 bottom-3 px-4 py-2 bg-blue-500 text-white rounded-md hover:bg-blue-600 transition-colors"
                ),
                cls="relative"
            ),
            hx_post="/send-message",
            hx_target="#chat-messages",
            hx_swap="beforeend",
            hx_indicator="#spinner",
            cls="mt-4"
        ),
        
        # Loading indicator
        Loading(cls=LoadingT.spinner, htmx_indicator=True, id="spinner", 
                cls="fixed bottom-4 right-4 opacity-0"),
        
        cls="max-w-3xl mx-auto p-6 bg-white rounded-lg shadow-sm"
    )

def message(content, role):
    """Create a chat message component"""
    is_user = role == "user"
    
    return Div(
        # Avatar or icon
        Div(
            UkIcon("user" if is_user else "bot", height=24, width=24),
            cls="w-8 h-8 rounded-full bg-gray-100 flex items-center justify-center"
        ),
        
        # Message content
        Div(
            P(content, cls="whitespace-pre-wrap"),
            cls="flex-1"
        ),
        
        cls=f"flex gap-4 items-start {'' if is_user else 'bg-gray-50 p-4 rounded-lg'}"
    )

# Add route to handle message sending
@rt("/send-message", methods=["POST"])
def send_message(query: str):
    """Handle user message and get response"""
    if not query.strip():
        return ""
    
    # Create user message
    user_message = message(query, "user")
    
    try:
        # Get response from chat engine
        response = chat_engine.chat(query)
        
        # Create assistant message
        assistant_message = message(response.response, "assistant")
        
        # Clear input via OOB swap
        clear_input = TextArea(
            placeholder="Ask a question about laser measurement...",
            id="user-input",
            name="query",
            rows=2,
            cls="w-full p-3 border rounded-md focus:outline-none focus:ring-2 focus:ring-blue-200 resize-none",
            hx_swap_oob="true"
        )
        
        return user_message, assistant_message, clear_input
    
    except Exception as e:
        # Return error message
        error_message = message(f"Sorry, an error occurred: {str(e)}", "assistant")
        return user_message, error_message
```

Let's also add some CSS to make the interface even cleaner:

```python
# Add custom CSS to the headers
custom_css = Style("""
    body {
        background-color: #f9fafb;
    }
    
    #chat-messages {
        min-height: 300px;
    }
    
    .message-assistant pre {
        background-color: #f8f9fa;
        padding: 1rem;
        border-radius: 0.5rem;
        overflow-x: auto;
    }
    
    #user-input:focus {
        box-shadow: none;
        border-color: #3b82f6;
    }
    
    /* Improve the typography */
    p {
        line-height: 1.6;
    }
    
    /* Markdown styling */
    .markdown h1, .markdown h2, .markdown h3 {
        margin-top: 1.5rem;
        margin-bottom: 1rem;
    }
    
    .markdown ul, .markdown ol {
        padding-left: 1.5rem;
        margin-bottom: 1rem;
    }
    
    .markdown ul li, .markdown ol li {
        margin-bottom: 0.5rem;
    }
    
    .markdown code {
        background-color: #f1f5f9;
        padding: 0.2rem 0.4rem;
        border-radius: 0.25rem;
        font-size: 0.875rem;
    }
""")

# Update the FastHTML app to include the custom CSS
html_app, rt = fast_app(hdrs=(Theme.blue.headers(), custom_css, MarkdownJS()))
```

This implementation features:

1. **Clean, Light Design**: White background with minimal styling, similar to ChatGPT and other modern chat interfaces.

2. **Message Styling**: 
   - User messages are simple with a small user icon
   - Assistant messages have a subtle gray background to differentiate them
   - Both have proper spacing and layout

3. **Responsive Input Area**: 
   - A textarea that expands as needed
   - Clear button styling with a blue accent color
   - Focus states for better accessibility

4. **Loading Indicator**: A spinner that appears when waiting for a response

5. **Markdown Support**: Added MarkdownJS to render formatted text, which is important for displaying technical information

6. **Proper Message Clearing**: The input field is cleared after sending a message

Let me know if you'd like any adjustments to this design, such as:
- Different color scheme
- Alternative layout for messages
- Additional features like a "clear conversation" button
- Any other specific UI elements you'd prefer

In [None]:
from fasthtml.common import *
from monsterui.all import *

# Update the route for the main page
@rt("/")
def get():
    return Titled("Laser Measurement Assistant", 
                 chat_interface())

def chat_interface():
    """Create a clean, light chat interface"""
    return Div(
        # Header
        Div(
            H1("Laser Measurement Assistant", cls="text-xl font-medium"),
            P("Ask questions about laser measurement technology", cls=TextPresets.muted_sm),
            cls="border-b pb-4 mb-6"
        ),
        
        # Chat messages container
        Div(
            # Initial welcome message
            message("Welcome to the Laser Measurement Assistant. How can I help you today?", "assistant"),
            id="chat-messages",
            cls="space-y-6 mb-6 overflow-y-auto max-h-[60vh]"
        ),
        
        # Input form
        Form(
            Div(
                # Text input
                TextArea(
                    placeholder="Ask a question about laser measurement...",
                    id="user-input",
                    name="query",
                    rows=2,
                    cls="w-full p-3 border rounded-md focus:outline-none focus:ring-2 focus:ring-blue-200 resize-none"
                ),
                # Submit button
                Button(
                    "Send",
                    type="submit",
                    cls="absolute right-3 bottom-3 px-4 py-2 bg-blue-500 text-white rounded-md hover:bg-blue-600 transition-colors"
                ),
                cls="relative"
            ),
            hx_post="/send-message",
            hx_target="#chat-messages",
            hx_swap="beforeend",
            hx_indicator="#spinner",
            cls="mt-4"
        ),
        
        # Loading indicator - Fixed the duplicate cls parameter
        Loading(
            cls=(LoadingT.spinner, "fixed bottom-4 right-4 opacity-0"), 
            htmx_indicator=True, 
            id="spinner"
        ),
        
        cls="max-w-3xl mx-auto p-6 bg-white rounded-lg shadow-sm"
    )

def message(content, role):
    """Create a chat message component"""
    is_user = role == "user"
    
    return Div(
        # Avatar or icon
        Div(
            UkIcon("user" if is_user else "bot", height=24, width=24),
            cls="w-8 h-8 rounded-full bg-gray-100 flex items-center justify-center"
        ),
        
        # Message content
        Div(
            P(content, cls="whitespace-pre-wrap"),
            cls="flex-1"
        ),
        
        cls=f"flex gap-4 items-start {'' if is_user else 'bg-gray-50 p-4 rounded-lg'}"
    )

# Add route to handle message sending
@rt("/send-message", methods=["POST"])
def send_message(query: str):
    """Handle user message and get response"""
    if not query.strip():
        return ""
    
    # Create user message
    user_message = message(query, "user")
    
    try:
        # Get response from chat engine
        response = chat_engine.chat(query)
        
        # Create assistant message
        assistant_message = message(response.response, "assistant")
        
        # Clear input via OOB swap
        clear_input = TextArea(
            placeholder="Ask a question about laser measurement...",
            id="user-input",
            name="query",
            rows=2,
            cls="w-full p-3 border rounded-md focus:outline-none focus:ring-2 focus:ring-blue-200 resize-none",
            hx_swap_oob="true"
        )
        
        return user_message, assistant_message, clear_input
    
    except Exception as e:
        # Return error message
        error_message = message(f"Sorry, an error occurred: {str(e)}", "assistant")
        return user_message, error_message

**AI Prompt**

Let's test it before doing custom css styling.

**AI Response**

Good idea! Let's test the interface as it is before adding custom CSS styling. This will help us see how the basic components work together and identify any issues that need to be fixed.

To test the interface:

1. Make sure the FastAPI app with the new routes is running (using the same approach we used earlier with the `run_in_thread` function)

2. Visit the main page at http://localhost:8000/ to see the chat interface

3. Try sending a test message to see if the chat functionality works correctly

Let's update our server code to include the new routes and restart it:

```python
# Restart the server with the updated routes
async def restart_server():
    # First, let's initialize the chat engine if not already done
    global chat_engine
    if 'chat_engine' not in globals():
        chat_engine = init_chat_engine()
    
    import uvicorn
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    await server.serve()

# Stop previous server thread if it exists
if 'server_thread' in globals():
    # No direct way to stop the thread, but we can create a new one
    print("Restarting server...")

# Start a new server thread
server_thread = threading.Thread(target=lambda: run_in_thread(restart_server()))
server_thread.daemon = True
server_thread.start()

print("Server restarted in background thread. Access at http://localhost:8000")
```

After testing the basic interface, we can then decide what CSS customizations would be most beneficial. This approach allows us to:

1. Verify that the core functionality works
2. See what the interface looks like with just the default styling
3. Make more informed decisions about what CSS changes are needed

Let me know what you observe when testing the interface, and we can make adjustments as needed!

In [None]:
# Restart the server with the updated routes
async def restart_server():
    # First, let's initialize the chat engine if not already done
    global chat_engine
    if 'chat_engine' not in globals():
        chat_engine = init_chat_engine()
    
    import uvicorn
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    await server.serve()

# Stop previous server thread if it exists
if 'server_thread' in globals():
    # No direct way to stop the thread, but we can create a new one
    print("Restarting server...")

# Start a new server thread
server_thread = threading.Thread(target=lambda: run_in_thread(restart_server()))
server_thread.daemon = True
server_thread.start()

print("Server restarted in background thread. Access at http://localhost:8000")

Restarting server...
Server restarted in background thread. Access at http://localhost:8000


**AI Prompt**

The only thing about accessing the local host is that this is a cloud development server environment. I need to access it from my home PC to test it.

**AI Response**

I understand - you're running this in a cloud development environment and need to access it from your home PC. Let's adjust our approach for this scenario.

## Options for Accessing from Your Home PC

1. **Port Forwarding / Tunneling**:
   You can use a service like ngrok, localtunnel, or cloudflared to create a temporary public URL that forwards to your local server.

2. **Public IP Configuration**:
   If your cloud environment has a public IP, we need to:
   - Make sure the server binds to `0.0.0.0` (all interfaces) instead of just localhost
   - Configure any necessary firewall rules to allow incoming connections

Let's set up a tunnel using a common service:

```python
# Install a tunneling service if needed
!pip install pyngrok

# Import and set up ngrok
from pyngrok import ngrok

# Restart the server binding to all interfaces
async def restart_server_public():
    # First, let's initialize the chat engine if not already done
    global chat_engine
    if 'chat_engine' not in globals():
        chat_engine = init_chat_engine()
    
    import uvicorn
    # Bind to 0.0.0.0 to make it accessible externally
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    await server.serve()

# Start a new server thread
server_thread = threading.Thread(target=lambda: run_in_thread(restart_server_public()))
server_thread.daemon = True
server_thread.start()

# Set up an ngrok tunnel to the server
# This creates a public URL that forwards to your local server
public_url = ngrok.connect(8000).public_url
print(f"Server accessible at: {public_url}")
```

Alternatively, if your cloud environment already provides a public URL or IP:

```python
# Get the public IP of the cloud environment
import requests

try:
    # This gets the public IP of the machine
    public_ip = requests.get('https://api.ipify.org').text
    print(f"Server should be accessible at: http://{public_ip}:8000")
    print("Note: Make sure any firewall rules allow incoming connections on port 8000")
except:
    print("Could not determine public IP. Check your cloud environment's dashboard for connection details.")
```

## Testing Without Direct Access

If setting up public access is challenging, we can also create a simple test function that simulates user interaction with the chat interface:

```python
def test_chat_interface():
    """Test the chat interface programmatically"""
    import requests
    
    # Base URL of the server
    base_url = "http://localhost:8000"
    
    # Test questions
    test_questions = [
        "What are the specifications of the PM10K+ laser power sensor?",
        "How does the water cooling system work?",
        "What is the maximum power the sensor can handle?"
    ]
    
    print("Testing chat interface with sample questions:")
    for i, question in enumerate(test_questions):
        print(f"\nQuestion {i+1}: {question}")
        
        # Send the question to the /send-message endpoint
        response = requests.post(
            f"{base_url}/send-message",
            data={"query": question}
        )
        
        # Check if request was successful
        if response.status_code == 200:
            print(f"Response received (length: {len(response.text)} characters)")
            # Print a preview of the response
            preview = response.text[:100] + "..." if len(response.text) > 100 else response.text
            print(f"Preview: {preview}")
        else:
            print(f"Error: {response.status_code}")
            print(response.text)
    
    print("\nTesting complete!")

# Run the test
test_chat_interface()
```

Let me know which approach works best for your environment, and we can proceed accordingly!

In [None]:
# Install a tunneling service if needed
!pip install pyngrok

# Import and set up ngrok
from pyngrok import ngrok

# Restart the server binding to all interfaces
async def restart_server_public():
    # First, let's initialize the chat engine if not already done
    global chat_engine
    if 'chat_engine' not in globals():
        chat_engine = init_chat_engine()
    
    import uvicorn
    # Bind to 0.0.0.0 to make it accessible externally
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    await server.serve()

# Start a new server thread
server_thread = threading.Thread(target=lambda: run_in_thread(restart_server_public()))
server_thread.daemon = True
server_thread.start()

# Set up an ngrok tunnel to the server
# This creates a public URL that forwards to your local server
public_url = ngrok.connect(8000).public_url
print(f"Server accessible at: {public_url}")

Defaulting to user installation because normal site-packages is not writeable
Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Downloading ngrok ...Downloading ngrok: 0%Downloading ngrok: 1%Downloading ngrok: 2%Downloading ngrok: 3%Downloading ngrok: 4%Downloading ngrok: 5%Downloading ngrok: 6%Downloading ngrok: 7%Downloading ngrok: 8%Downloading ngrok: 9%Downloading ngrok: 10%Downloading ngrok: 11%Downloading ngrok: 12%Downloading ngrok: 13%Downloading ngrok: 14%Downloading ngrok: 15%Downloading ngrok: 16%Downloading ngrok: 17%Downloading ngrok: 18%Down

INFO:     Started server process [8]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 8000): address already in use
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
t=2025-03-31T00:53:20+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
t=2025-03-31T00:53:20+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
t=2025-03-31

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.

**AI Prompt**

I need to setup a ngrok account for that to work. But I just remembered I signed up for the Plash service which answer.ai has released to deploy FastHTML apps. I have inserted information about this plash service into your context. Let's see if you can deploy this app onto Plash. I have inserted the PLASH_TOKEN and PLASH_EMAIL as environment variables.

**AI Response**

Great! Let's deploy your FastHTML application to Plash. Based on the documentation provided, Plash is designed specifically for FastHTML apps, which makes it a perfect fit for our project.

Here's how we'll deploy your application to Plash:

## Step 1: Organize Project Files

First, let's create the necessary project structure:

```python
# Create a directory structure for Plash deployment
!mkdir -p laser_assistant
```

Now, let's create the main application file:

```python
# Create the main.py file for our app
with open('laser_assistant/main.py', 'w') as f:
    f.write('''
from fasthtml.common import *
from monsterui.all import *
import os
import pickle
import sqlite3
from typing import List
from llama_index.core.schema import NodeWithScore, TextNode, QueryBundle
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.chat_engine import ContextChatEngine
from qdrant_client import QdrantClient
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

# Initialize LLM and embeddings
llm = OpenAI(model="gpt-4o", api_key=os.environ["OPENAI_API_KEY"])
embed_model = OpenAIEmbedding(model="text-embedding-3-large")
Settings.llm = llm
Settings.embed_model = embed_model

# SQLite FTS Retriever
class SQLiteFTSRetriever:
    def __init__(self, db_path="laser_nodes.db", top_k=5):
        self.db_path = db_path
        self.top_k = top_k
        
    def retrieve(self, query_str: str) -> List[NodeWithScore]:
        conn = sqlite3.connect(self.db_path)
        c = conn.cursor()
        escaped_query = f'"{query_str}"'
        c.execute(
            f"""
            SELECT nodes.node_id, nodes.content, nodes.metadata, nodes_fts.rank
            FROM nodes_fts 
            JOIN nodes ON nodes_fts.rowid = nodes.rowid
            WHERE nodes_fts MATCH ? 
            ORDER BY nodes_fts.rank
            LIMIT {self.top_k}
            """, 
            (escaped_query,)
        )
        
        results = []
        for node_id, content, metadata_blob, rank in c.fetchall():
            import json
            metadata = json.loads(metadata_blob)
            node = TextNode(text=content, metadata=metadata, id_=node_id)
            score = 1.0 / (1.0 + float(rank))
            results.append(NodeWithScore(node=node, score=score))
        
        conn.close()
        return results

# Hybrid Retriever with Reranking
class HybridRetrieverWithReranking(BaseRetriever):
    def __init__(self, vector_retriever, keyword_retriever, reranker, 
                vector_weight=0.7, keyword_weight=0.3, initial_top_k=20):
        self.vector_retriever = vector_retriever
        self.keyword_retriever = keyword_retriever
        self.reranker = reranker
        self.vector_weight = vector_weight
        self.keyword_weight = keyword_weight
        self.initial_top_k = initial_top_k
        super().__init__()
    
    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        query_str = query_bundle.query_str
        
        # Get results from both retrievers
        vector_results = self.vector_retriever.retrieve(query_str)
        keyword_results = self.keyword_retriever.retrieve(query_str)
        
        # Combine scores
        node_scores = {}
        for i, result in enumerate(vector_results):
            node_id = result.node.node_id
            score = self.vector_weight * (1.0 / (i + 1))
            node_scores[node_id] = {"node": result.node, "score": score}
        
        for i, result in enumerate(keyword_results):
            node_id = result.node.node_id
            keyword_score = self.keyword_weight * (1.0 / (i + 1))
            if node_id in node_scores:
                node_scores[node_id]["score"] += keyword_score
            else:
                node_scores[node_id] = {"node": result.node, "score": keyword_score}
        
        # Sort by score
        sorted_results = sorted(
            node_scores.values(), 
            key=lambda x: x["score"], 
            reverse=True
        )
        
        # Convert to NodeWithScore objects
        initial_results = [
            NodeWithScore(node=item["node"], score=item["score"]) 
            for item in sorted_results[:self.initial_top_k]
        ]
        
        # Apply reranking
        reranked_nodes = self.reranker.postprocess_nodes(
            initial_results, 
            query_bundle
        )
        
        return reranked_nodes

# Load function for vector stores and create retriever
def create_retriever(cohere_api_key):
    # Load SQLite retriever
    sqlite_retriever = SQLiteFTSRetriever()
    
    # Load Qdrant collection
    with open('laser_qdrant.pkl', 'rb') as f:
        collection_data = pickle.load(f)
    
    # Create Qdrant client and collection
    qdrant_client = QdrantClient(":memory:")
    collection_name = collection_data["collection_name"]
    
    if qdrant_client.collection_exists(collection_name):
        qdrant_client.delete_collection(collection_name)
    
    qdrant_client.create_collection(
        collection_name=collection_name,
        vectors_config={"size": collection_data["vector_size"], "distance": "Cosine"}
    )
    
    if collection_data["points"]:
        qdrant_client.upsert(
            collection_name=collection_name,
            points=collection_data["points"]
        )
    
    # Create vector store and retriever
    vector_store = QdrantVectorStore(
        client=qdrant_client,
        collection_name=collection_name
    )
    
    index = VectorStoreIndex.from_vector_store(vector_store)
    vector_retriever = index.as_retriever(similarity_top_k=10)
    
    # Create Cohere reranker
    reranker = CohereRerank(
        api_key=cohere_api_key,
        model="rerank-v3.5",
        top_n=5
    )
    
    # Create and return hybrid retriever with reranking
    return HybridRetrieverWithReranking(
        vector_retriever=vector_retriever,
        keyword_retriever=sqlite_retriever,
        reranker=reranker,
        vector_weight=0.7,
        keyword_weight=0.3,
        initial_top_k=20
    )

# Create chat engine
def create_chat_engine(retriever):
    memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
    return ContextChatEngine.from_defaults(
        retriever=retriever,
        memory=memory,
        system_prompt="""You are a helpful assistant specializing in laser measurement technology.
        Answer questions based on the context provided. If you don't know the answer, say so."""
    )

# Main initialization function
def init_chat_engine():
    cohere_api_key = os.environ.get("COHERE_API_KEY")
    retriever = create_retriever(cohere_api_key)
    return create_chat_engine(retriever)

# Initialize the chat engine
chat_engine = init_chat_engine()

# Create the FastHTML app
app, rt = fast_app(hdrs=Theme.blue.headers())

# Define message component
def message(content, role):
    """Create a chat message component"""
    is_user = role == "user"
    
    return Div(
        # Avatar or icon
        Div(
            UkIcon("user" if is_user else "bot", height=24, width=24),
            cls="w-8 h-8 rounded-full bg-gray-100 flex items-center justify-center"
        ),
        
        # Message content
        Div(
            P(content, cls="whitespace-pre-wrap"),
            cls="flex-1"
        ),
        
        cls=f"flex gap-4 items-start {'' if is_user else 'bg-gray-50 p-4 rounded-lg'}"
    )

# Define chat interface
def chat_interface():
    """Create a clean, light chat interface"""
    return Div(
        # Header
        Div(
            H1("Laser Measurement Assistant", cls="text-xl font-medium"),
            P("Ask questions about laser measurement technology", cls=TextPresets.muted_sm),
            cls="border-b pb-4 mb-6"
        ),
        
        # Chat messages container
        Div(
            # Initial welcome message
            message("Welcome to the Laser Measurement Assistant. How can I help you today?", "assistant"),
            id="chat-messages",
            cls="space-y-6 mb-6 overflow-y-auto max-h-[60vh]"
        ),
        
        # Input form
        Form(
            Div(
                # Text input
                TextArea(
                    placeholder="Ask a question about laser measurement...",
                    id="user-input",
                    name="query",
                    rows=2,
                    cls="w-full p-3 border rounded-md focus:outline-none focus:ring-2 focus:ring-blue-200 resize-none"
                ),
                # Submit button
                Button(
                    "Send",
                    type="submit",
                    cls="absolute right-3 bottom-3 px-4 py-2 bg-blue-500 text-white rounded-md hover:bg-blue-600 transition-colors"
                ),
                cls="relative"
            ),
            hx_post="/send-message",
            hx_target="#chat-messages",
            hx_swap="beforeend",
            hx_indicator="#spinner",
            cls="mt-4"
        ),
        
        # Loading indicator
        Loading(
            cls=(LoadingT.spinner, "fixed bottom-4 right-4 opacity-0"), 
            htmx_indicator=True, 
            id="spinner"
        ),
        
        cls="max-w-3xl mx-auto p-6 bg-white rounded-lg shadow-sm"
    )

# Define routes
@rt("/")
def get():
    return Titled("Laser Measurement Assistant", chat_interface())

@rt("/send-message", methods=["POST"])
def send_message(query: str):
    """Handle user message and get response"""
    if not query.strip():
        return ""
    
    # Create user message
    user_message = message(query, "user")
    
    try:
        # Get response from chat engine
        response = chat_engine.chat(query)
        
        # Create assistant message
        assistant_message = message(response.response, "assistant")
        
        # Clear input via OOB swap
        clear_input = TextArea(
            placeholder="Ask a question about laser measurement...",
            id="user-input",
            name="query",
            rows=2,
            cls="w-full p-3 border rounded-md focus:outline-none focus:ring-2 focus:ring-blue-200 resize-none",
            hx_swap_oob="true"
        )
        
        return user_message, assistant_message, clear_input
    
    except Exception as e:
        # Return error message
        error_message = message(f"Sorry, an error occurred: {str(e)}", "assistant")
        return user_message, error_message

# Start the server
serve()
''')
```

## Step 2: Create a Requirements File

Let's create a requirements.txt file with all the necessary dependencies:

```python
# Create requirements.txt
with open('laser_assistant/requirements.txt', 'w') as f:
    f.write('''
python-fasthtml
MonsterUI
llama-index-core
llama-index-readers-file
llama-index-embeddings-openai
llama-index-llms-openai
llama_index.postprocessor.cohere_rerank
llama-index-program-openai
qdrant-client
llama-index-vector-stores-qdrant
''')
```

## Step 3: Copy Necessary Data Files

We need to copy the data files from your static directory to the deployment directory:

```python
# Copy necessary data files
import shutil

# Copy database file
shutil.copy('static/laser_nodes.db', 'laser_assistant/laser_nodes.db')

# Copy Qdrant pickle file
shutil.copy('static/laser_qdrant.pkl', 'laser_assistant/laser_qdrant.pkl')
```

## Step 4: Create a plash.env File for Environment Variables

Let's create a plash.env file to store the required API keys:

```python
# Create plash.env file
with open('laser_assistant/plash.env', 'w') as f:
    f.write('''
export OPENAI_API_KEY=your_openai_api_key
export COHERE_API_KEY=your_cohere_api_key
''')
```

Note: You'll need to replace the placeholder values with your actual API keys before deploying.

## Step 5: Deploy to Plash

Now, let's use the plash_deploy command to deploy your application:

```python
# Change to the project directory
import os
os.chdir('laser_assistant')

# Deploy to Plash
!plash_deploy
```

The plash_deploy command will:
1. Upload your code and data files to Plash
2. Install the required dependencies
3. Start your application on Plash's infrastructure
4. Provide you with a URL where your application is accessible

## Troubleshooting

If you encounter any issues during deployment, here are some common troubleshooting steps:

1. Make sure your PLASH_TOKEN and PLASH_EMAIL environment variables are correctly set
2. Verify that all required files are included in your project directory
3. Check that your requirements.txt includes all necessary dependencies
4. Ensure that your plash.env file contains the correct API keys

The deployment process might take a few minutes to complete. Once it's done, you should receive a URL where your application is accessible.

Would you like me to proceed with these steps to deploy your application to Plash?

In [None]:
# Add custom CSS to the headers
custom_css = Style("""
    body {
        background-color: #f9fafb;
    }
    
    #chat-messages {
        min-height: 300px;
    }
    
    .message-assistant pre {
        background-color: #f8f9fa;
        padding: 1rem;
        border-radius: 0.5rem;
        overflow-x: auto;
    }
    
    #user-input:focus {
        box-shadow: none;
        border-color: #3b82f6;
    }
    
    /* Improve the typography */
    p {
        line-height: 1.6;
    }
    
    /* Markdown styling */
    .markdown h1, .markdown h2, .markdown h3 {
        margin-top: 1.5rem;
        margin-bottom: 1rem;
    }
    
    .markdown ul, .markdown ol {
        padding-left: 1.5rem;
        margin-bottom: 1rem;
    }
    
    .markdown ul li, .markdown ol li {
        margin-bottom: 0.5rem;
    }
    
    .markdown code {
        background-color: #f1f5f9;
        padding: 0.2rem 0.4rem;
        border-radius: 0.25rem;
        font-size: 0.875rem;
    }
""")

# Update the FastHTML app to include the custom CSS
html_app, rt = fast_app(hdrs=(Theme.blue.headers(), custom_css, MarkdownJS()))