In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import CharacterTextSplitter

In [2]:
loader = TextLoader("../../00-example_data/state_of_the_union.txt")

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

In [3]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    #model="llama3.2:latest",
    model="nomic-embed-text:latest",
    base_url="http://localhost:11434"  # Ensure Ollama is running locally
)

#embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(texts, embeddings)

In [4]:
retriever = vectorstore.as_retriever()

In [5]:
docs = retriever.invoke("what did the president say about ketanji brown jackson?")

In [None]:
docs

### Maximum marginal relevance retrieval

In [7]:
retriever = vectorstore.as_retriever(search_type="mmr")

In [10]:
mmr_docs = retriever.invoke("what did the president say about ketanji brown jackson?")

In [None]:
mmr_docs

### Passing search parameters
#### Similarity score threshold retrieval

In [14]:
retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.3}
)

In [15]:
scr_threshold_docs = retriever.invoke("what did the president say about ketanji brown jackson?")

In [None]:
scr_threshold_docs

#### Specifying top k

In [17]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

In [None]:
docs = retriever.invoke("what did the president say about ketanji brown jackson?")

In [None]:
docs