
# Home Support Agent

Welcome to the Home Support Agent demo!  
This notebook builds a semantic retrieval engine using FAISS and SentenceTransformers, then connects it to a Gradio interface for smart home support queries.  
Designed for Alexa, Hue, and Nest troubleshooting.


_Designed by Team 2377 for a classroom demo and real-world use._

---

In [None]:
# Install core packages
!pip install --quiet streamlit pyngrok faiss-cpu sentence-transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m66.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m56.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m103.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m53.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

## Install Dependencies

This cell installs all required packages for retrieval, embedding, and the interface.


In [None]:
import os
import faiss
import pickle
import re
from sentence_transformers import SentenceTransformer

# Load or define fresh corpus
corpus = [
    {"text": "Try resetting your Hue Bridge and ensure it’s connected via Ethernet.", "device": "Hue"},
    {"text": "Check if Alexa is muted or disconnected from Wi-Fi.", "device": "Alexa"},
    {"text": "Reconnect your Nest thermostat to Wi-Fi and restart your router.", "device": "Nest"},
    {"text": "To reset your Echo device, press and hold the Action button for 20 seconds...", "device": "Alexa"},
    {"text": "Hue lights aren’t responding, check power and Hue app configuration.", "device": "Hue"},
    {"text": "To connect Alexa to a new Wi-Fi network, open the Alexa app...", "device": "Alexa"},
    {"text": "To enable remote access, sign into your Hue account in the Hue app...", "device": "Hue"},
    {"text": "To delete voice recordings, go to Alexa app → Settings → Privacy → Review Voice History.", "device": "Alexa"},
    {"text": "To reset a Hue bulb, toggle power 5 times quickly to restore factory settings.", "device": "Hue"},
    {"text": "Alexa routines can be triggered by voice, time, or device activity...", "device": "Alexa"},
    {"text": "To pair your Philips Hue Bridge, press the circular button in the center...", "device": "Hue"},
    {"text": "If your Hue lights don’t change color, verify you’re using color-capable bulbs...", "device": "Hue"},
    {"text": "If Alexa isn’t responding, make sure the microphone isn’t muted...", "device": "Alexa"}
]

# Extract texts
texts_for_embedding = [item["text"] for item in corpus]

# Load model
model = SentenceTransformer("all-MiniLM-L6-v2")

def embed_texts(texts):
    return model.encode(texts, normalize_embeddings=True)

# Embed and save
embeddings = embed_texts(texts_for_embedding)
os.makedirs("assets", exist_ok=True)
with open("assets/embeddings.pkl", "wb") as f:
    pickle.dump((corpus, embeddings), f)

# Rebuild FAISS index and ID map
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)
id_map = {i: corpus[i] for i in range(len(corpus))}

print("✅ Semantic retrieval components rebuilt successfully.")
print("Corpus size:", len(corpus))
print("Embedding shape:", embeddings.shape)

def retrieve_top_k_solutions(query: str, top_k: int = 5) -> list[tuple[str, float]]:
    """Returns top-k semantic matches from the index."""
    try:
        query_embedding = embed_texts([query])
        scores, indices = index.search(query_embedding, top_k)
        results = [
            (id_map[idx], score)
            for score, idx in zip(scores[0], indices[0])
            if idx != -1 and score < 1000.0
        ]
        return sorted(results, key=lambda x: x[1])
    except Exception as e:
        print("Error in top-k retrieval:", e)
        return []

def answer_query(q: str) -> str:
    """Formats retrieved results for markdown-friendly display."""
    results = retrieve_top_k_solutions(q)
    if not results:
        return "⚠️ No relevant solutions found. Try refining your query."
    return "\n\n".join(
        f"**{i+1}.** {resp}  \n_Score: {score:.2f}_"
        for i, (resp, score) in enumerate(results)
    )

def show_retrieval_results(query: str):
    """Displays top results in a console-friendly format."""
    print(f"\n🔍 Query: {query}\n")
    results = retrieve_top_k_solutions(query)
    if not results:
        print("No relevant results found.\n")
        return
    for i, (solution, score) in enumerate(results, 1):
        print(f"{i}. Score: {score:.4f} → {solution}")
    print()  # Spacing for next block


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Semantic retrieval components rebuilt successfully.
Corpus size: 13
Embedding shape: (13, 384)


In [None]:
import os
import pickle

def save_embeddings(corpus, embeddings, path="assets/embeddings.pkl"):
    """Saves corpus and embeddings to a binary file."""
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "wb") as f:
        pickle.dump((corpus, embeddings), f)

## Define Troubleshooting Corpus

Here we manually define smart home responses as a list of dictionaries, each tagged with its device type.


In [None]:
# Save once after generating embeddings
import pickle

def load_embeddings(path="assets/embeddings.pkl"):
    with open(path, "rb") as f:
        return pickle.load(f)

embeddings = embed_texts(corpus)
save_embeddings(corpus, embeddings)

# Load later for retrieval setup
corpus, embeddings = load_embeddings()


## Embed Text with SentenceTransformer

This block loads the embedding model and converts corpus entries into semantic vector representations.


In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")

def embed_texts(texts):
    return model.encode(texts, normalize_embeddings=True)

In [None]:
import os
os.makedirs("assets", exist_ok=True)

In [None]:
import pickle
import numpy as np  # For slicing and formatting vectors
import os

# Ensure the directory exists
os.makedirs("assets", exist_ok=True)

# Generate and save embeddings
embeddings = embed_texts(corpus)
with open("assets/embeddings.pkl", "wb") as f:
    pickle.dump((corpus, embeddings), f)

# Print a short preview of the test embedding
test_vector = embed_texts(["Test your semantic engine"])[0]  # Get the first vector
print(f"Test vector preview (shape: {test_vector.shape}): {test_vector[:10]}")

Test vector preview (shape: (384,)): [ 0.03247961 -0.03990969 -0.02091562  0.02854027 -0.00842052 -0.05033278
 -0.01317613 -0.03228627 -0.09017497  0.00789993]


In [None]:
# Initialize FAISS index and ID map
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

id_map = {i: corpus[i] for i in range(len(corpus))}

## Semantic Retrieval Functions

These functions power top-k search, format answers, and filter by relevance or device.


In [None]:
def retrieve_top_k_solutions(query: str, top_k: int = 5) -> list[tuple[str, float]]:
    try:
        query_embedding = embed_texts([query])
        scores, indices = index.search(query_embedding, top_k)

        results = [
            (id_map[idx], score)  # Assumes id_map[idx] returns text
            for score, idx in zip(scores[0], indices[0])
            if idx != -1 and score < 1000.0  # Optional threshold
        ]

        return sorted(results, key=lambda x: x[1])  # L2 distance → lower = better

    except Exception as e:
        print("Error in top-k retrieval:", e)
        return []

In [None]:
def answer_query(q: str) -> str:
    results = retrieve_top_k_solutions(q)
    if not results:
        return "⚠️ No relevant solutions found. Try refining your query."

    output_lines = []
    for i, (resp, score) in enumerate(results):
        raw_text = resp.get("text") if isinstance(resp, dict) else str(resp)
        device = resp.get("device", "Unknown") if isinstance(resp, dict) else "Unknown"
        cleaned_text = re.sub(r"\.{3,}$", "", raw_text).strip()
        output_lines.append(f"**{i+1}. {device} —** {cleaned_text}  \n_Score: {score:.2f}_")
    return "\n\n".join(output_lines)

## Launch Gradio Interface

Provides a textbox to enter support queries and displays relevant manual entries.


In [None]:
import gradio as gr

gr.Interface(
    fn=answer_query,
    inputs=gr.Textbox(label="Ask a smart home question", placeholder="e.g. Alexa isn’t responding"),
    outputs="markdown",
    title="🔧 Home Support Agent",
    description="Type a question related to Alexa, Hue, or Nest devices and I’ll suggest manual-based troubleshooting tips.",
    examples=[
        ["Alexa won’t respond to voice"],
        ["Hue lights aren’t pairing"],
        ["How do I reset my Echo?"]
    ]
).launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3a85dfb4d63317420b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
def answer_query(q):
    results = retrieve_top_k_solutions(q)

    if not results:
        return "⚠️ No relevant solutions found. Try refining your query."

    return "\n\n".join(
        f"**{i+1}.** {resp} \n_Score: {score:.2f}_"
        for i, (resp, score) in enumerate(results)
    )

In [None]:
def show_retrieval_results(query: str):
    print(f"\nQuery: {query}\n")
    results = retrieve_top_k_solutions(query)

    if not results:
        print("No relevant results found.")
        return

    for i, (solution, score) in enumerate(results, 1):
      if isinstance(solution, dict):
        solution = solution.get("text", "⚠️ Missing text")
    print(f"{i}. Score: {score:.4f} → {solution}")


In [None]:
print("Corpus length:", len(corpus))
print("ID Map keys:", list(id_map.keys())[:5])  # Show first few keys


Corpus length: 13
ID Map keys: [0, 1, 2, 3, 4]


## Evaluate Sample Queries

Try example prompts to make sure retrieval is working as expected.


In [None]:
print("Semantic Retrieval Test on Sample Queries\n" + "-"*50)

test_queries = [
    "Alexa won’t respond to voice",
    "Hue bulb keeps disconnecting",
    "How do I reset my Echo?"
]

for q in test_queries:
    show_retrieval_results(q)

Semantic Retrieval Test on Sample Queries
--------------------------------------------------

Query: Alexa won’t respond to voice

5. Score: 1.0379 → To delete voice recordings, go to Alexa app → Settings → Privacy → Review Voice History.

Query: Hue bulb keeps disconnecting

5. Score: 1.0403 → To pair your Philips Hue Bridge, press the circular button in the center...

Query: How do I reset my Echo?

5. Score: 1.2767 → To reset a Hue bulb, toggle power 5 times quickly to restore factory settings.


In [None]:
# Make sure corpus is up to date and contains all entries
corpus = [
    {"text": "To reset your Echo device...", "device": "Alexa"},
    {"text": "Hue lights not responding...", "device": "Philips Hue"},
    # Add more items as needed
]
texts_for_embedding = [item["text"] for item in corpus]

embeddings = embed_texts(texts_for_embedding)

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

id_map = {i: corpus[i] for i in range(len(corpus))}


In [None]:
def setup_corpus():
    # Example: return a simple dictionary or list
    return {"text": "This is a sample corpus."}

In [None]:
import json
corpus = setup_corpus()  # Or however you're building it
os.makedirs("assets", exist_ok=True)

with open("assets/corpus.json", "w") as f:
    json.dump(corpus, f, indent=2)

def save_corpus_to_json(corpus, path="assets/corpus.json"):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "w") as f:
        json.dump(corpus, f, indent=2)

In [None]:
def filter_by_device(device_type):
    return [item for item in corpus if item["device"] == device_type]

### Project Scaffold
```env
verba-support-agent/
├── retriever.py           # Embedding + retrieval logic (FAISS, scoring)
├── corpus_loader.py       # Corpus ingestion, cleaning, embedding
├── config.py              # Model paths, corpus location, hyperparameters
├── interface_stub.py      # Placeholder for Gradio, Streamlit, CLI etc.
├── utils/
│   ├── logger.py          # Debugging, usage logging, feedback hooks
│   ├── scoring.py         # Custom ranking logic, device filters, etc.
│   └── validation.py      # Input sanitation and corpus checks
├── assets/
│   ├── manuals_raw/       # Original manual files
│   └── manuals_cleaned/   # Preprocessed JSON/text
├── notebooks/
│   └── experiments.ipynb  # For playing with embeddings, pipeline tweaks
├── README.md              # Project overview and launch instructions
├── requirements.txt       # All necessary dependencies
└── run.py                 # Entry point for launching any interface
```