* see: https://python.langchain.com/docs/use_cases/question_answering/

In [1]:
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# removed OPenAI, using HF
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

from langchain import hub

# removed OpenAI, using Cohere
from langchain.chat_models import ChatCohere

from langchain.schema.runnable import RunnablePassthrough

#### Loading the document

In [2]:
BLOG_POST = "https://luigi-saetta.medium.com/generative-ai-how-to-control-the-creativity-of-your-large-language-model-c7b0322b4c3d"

loader = WebBaseLoader(BLOG_POST)

data = loader.load()

#### Splitting the document in chunks

In [3]:
CHUNK_SIZE = 512

text_splitter = RecursiveCharacterTextSplitter(chunk_size = CHUNK_SIZE, chunk_overlap = 0)

splits = text_splitter.split_documents(data)

In [4]:
print(f"We have {len(splits)} splits...")

We have 15 splits...


In [8]:
splits[1]

Document(page_content='trained to generate text.In this article, I’ll discuss some general characteristics of LLM for text generation. Especially those parameters that can be used, in the inference phase, to control the variety of possible answers to prompts. Then, I’ll add some more details regarding Oracle OCI AI Generative Service, which is based on LLMs provided by Cohere.First of all, some basic concepts: how does an LLM generating text work, what is a prompt, and what does it mean that an LLM can be creative?An LLM for', metadata={'source': 'https://luigi-saetta.medium.com/generative-ai-how-to-control-the-creativity-of-your-large-language-model-c7b0322b4c3d', 'title': 'Generative AI: how to control the creativity of your Large Language Model | by Luigi Saetta | Oct, 2023 | Medium', 'description': 'At the heart of every Generative Service for Text, there is a Large Language Model (LLM), a model that has been trained, on a very large corpus of documents, to learn the structure and…

In [6]:
# We have substitued OpenAI with HF
EMBED_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"

model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}


hf = HuggingFaceEmbeddings(
    model_name=EMBED_MODEL_NAME,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# using Chroma as Vector store
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=hf)

retriever = vectorstore.as_retriever()

#### Define the prompt structure

In [None]:
rag_prompt = hub.pull("rlm/rag-prompt")

#### Define the LLM

In [None]:
API_KEY = "8K74xpSLRe7Fv3mNPLeLWKTIdReVNlL13YXlogg3"

llm = ChatCohere(cohere_api_key=API_KEY, temperature=0.5, verbose=True)

#### Define the (Lang)Chain

In [None]:
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | rag_prompt 
    | llm 
)

#### Process the question

In [None]:
QUESTION = "What is the architecture used for LLM?"

response = rag_chain.invoke(QUESTION)

print(response.content)

#### Explore the vectore store

In [None]:
# Retrieve relevant splits for any question using similarity search.

# This is simply "top K" retrieval where we select documents based on embedding similarity to the query.

TOP_K = 5

docs = vectorstore.similarity_search(QUESTION, 
                                    k = TOP_K)

len(docs)

In [None]:
docs