In [None]:
from fastembed import TextEmbedding
import numpy as np

# Initialize (auto-downloads 33MB model)
model = TextEmbedding()

# Your data
docs = [
    "My phone number is 555-0123",
    "I live at 123 Main Street",
    "My email is john@email.com"
]

# Get embeddings
doc_embs = list(model.embed(docs)) # this one line converts our data into number format (embeddings)

def ask(question):
    # Get question embedding
    q_emb = list(model.embed([question]))[0]
    
    # Find best match
    best_score = -1
    best_doc = None
    
    for doc, doc_emb in zip(docs, doc_embs):
        score = np.dot(q_emb, doc_emb) / (np.linalg.norm(q_emb) * np.linalg.norm(doc_emb))
        if score > best_score:
            best_score = score
            best_doc = doc
    
    print(f"Q: {question}")
    print(f"A: {best_doc} (confidence: {best_score:.3f})")

# Test it
ask("what's my phone number")
ask("Where do u live ?")
ask("How can i email you ?")

Q: what's my phone number
A: My phone number is 555-0123 (confidence: 0.806)
Q: Where do u live ?
A: I live at 123 Main Street (confidence: 0.610)
Q: How can i email you ?
A: My email is john@email.com (confidence: 0.769)
