# Semantic Search and Text Embeddings

In [None]:
import os
import openai
from IPython.display import display, HTML, Markdown
from pprint import pprint

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

In [None]:
from langchain.callbacks import OpenAICallbackHandler

totals_cb = OpenAICallbackHandler()

print(totals_cb)

In [None]:
documents = [
    "Weather in London is usually cloudy and rainy.",
    "Seymour looked through the window. Streets were still crowded by people moving quickly. Weather in London is usually cloudy and rainy. Just like this afternoon.",
    "Oslo is very close to great ski resorts.",
    "America’s wild horses have adapted to surviving on native grasses and shrubs available.",
    "Tom is chasing Jerry around the house.",
    "The movie shows true nature of life in South Pacific.",
    "Le film montre la vraie nature de la vie dans le Pacifique Sud.",
]

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

db = Chroma.from_texts(documents, OpenAIEmbeddings())

In [None]:
doc_with_embeddings = db.get(offset=0, limit=1, include=["documents", "embeddings"])

print(doc_with_embeddings["documents"][0])

emb = doc_with_embeddings["embeddings"][0]

print(f"\nLength of embeddings: {len(emb)}")
pprint(emb)

In [None]:
query = "What is Seymour doing?"

docs = db.similarity_search(query, k=1)

pprint(docs)

In [None]:
query = "What is Seymour doing?"

docs = db.similarity_search(query, k=3)

pprint(docs)

## Semantic Similarity Search returns *most similar documents*, NOT JUST RELEVANT DOCUMENTS! 

In [None]:
query = "What is Seymour doing?"

docs = db.similarity_search_with_relevance_scores(query, k=3)

pprint(docs)

In [None]:
query = "What is the weather today?"

docs = db.similarity_search_with_relevance_scores(query, k=3)

pprint(docs)

In [None]:
query = "Weather in London is usually cloudy and rainy."

docs = db.similarity_search_with_relevance_scores(query, k=7)

pprint(docs)

In [None]:
query = "What is the weather in London usually like?"

docs = db.similarity_search_with_relevance_scores(query, k=3)

pprint(docs)

In [None]:
query = "What is a diet of mustangs?"

docs = db.similarity_search_with_relevance_scores(query, k=3)

pprint(docs)

In [None]:
query = "Tell me about European capitals"

docs = db.similarity_search_with_relevance_scores(query, k=3)

pprint(docs)

In [None]:
query = "The movie shows true nature of life in South Pacific."

docs = db.similarity_search_with_relevance_scores(query, k=3)

pprint(docs)

In [None]:
pprint(totals_cb)