In [23]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)  # take environment variables from .env.

True

In [None]:
from langchain.globals import set_debug

# Ollama

Local LLM, download from [ollama.com](https://ollama.com/download/linux).

In [43]:
from langchain_community.llms import Ollama
llm_llama = Ollama(model="llama2")

In [12]:
response = llm_llama.invoke("Tell me a joke")
print(response)


Why was the math book sad? Because it had too many problems! 😂


In [7]:
for chunk in llm_llama.stream("Tell me a joke"):
    print(chunk, end="", flush=True)

Why did the scarecrow win an award? Because he was outstanding in his field! 😄

In [4]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages([
    ("system", "When asked to tell a joke, only tell the joke and nothing else (no introduction, etc.)."),
    ("user", "Tell me a joke about {input}")
])

In [3]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()  # The output of LLM will be a `message` -> parse it to a string

In [35]:
chain_llama = prompt | llm_llama | output_parser

In [39]:
response = chain_llama.invoke({"input": "programmers"})
print(response)

Sure! Here's a joke for you:

Why do programmers prefer dark mode?

Because light attracts bugs.


In [38]:
for chunk in chain_llama.stream({"input": "programmers"}):
    print(chunk, end="", flush=True)

Why did the programmer break up with his girlfriend?
She kept trying to fix her own problems, but he couldn't compile a solution.

# OpenAI

Create a key (<https://platform.openai.com/api-keys>) and set it in `.env`

In [9]:
from langchain_openai import ChatOpenAI
from langchain.callbacks import get_openai_callback

llm_openai = ChatOpenAI(model="gpt-3.5-turbo")
chain_openai = prompt | llm_openai | output_parser

In [22]:
with get_openai_callback() as token_usage:
    
    response = chain_openai.invoke({"input": "programmers"})
    print(response)

    print("\n")
    print(token_usage)

Why do programmers prefer dark mode? 

Because light attracts bugs!


Tokens Used: 50
	Prompt Tokens: 37
	Completion Tokens: 13
Successful Requests: 1
Total Cost (USD): $8.15e-05


In [11]:
with get_openai_callback() as token_usage:
    
    for chunk in chain_openai.stream({"input": "programmers"}):
        print(chunk, end="", flush=True)

    print("\n")
    # apparently, token counting does not work with streaming
    print(token_usage)

Why do programmers prefer dark mode?

Because light attracts bugs.

Tokens Used: 0
	Prompt Tokens: 0
	Completion Tokens: 0
Successful Requests: 0
Total Cost (USD): $0.0


# Retrieval Augmented Generation

Use Chroma vectorstore to answer questions with context

In [35]:
from langchain_community.document_loaders import UnstructuredMarkdownLoader

# load the example document
example_path = "example_data/ML-DEECo_README.md"
loader = UnstructuredMarkdownLoader(example_path)
docs = loader.load()

docs[0].page_content[:100]

'ML-DEECo\n\nML-DEECo is a machine-learning-enabled component model for adaptive component architecture'

In [36]:
from langchain.text_splitter import CharacterTextSplitter

# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(docs)

print(len(docs))
docs[0]

14


Document(page_content='ML-DEECo\n\nML-DEECo is a machine-learning-enabled component model for adaptive component architectures. It is based on DEECo component model, which features autonomic components and dynamic component coalitions (called ensembles). ML-DEECo allows exploiting machine learning in decisions about adapting component coalitions at runtime.', metadata={'source': 'example_data/ML-DEECo_README.md'})

In [39]:
from langchain_community.vectorstores import Chroma
# from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from tqdm import tqdm

# embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
embedding_function = OllamaEmbeddings()

vectorstore = Chroma.from_documents([docs[0]], embedding_function)
with tqdm(total=len(docs) - 1, desc="Creating vectorstore") as progress_bar:
    for d in docs[1:]:
        vectorstore.add_documents([d])
        progress_bar.update(1)  

vectorstore

Creating vectorstore:  93%|█████████▎| 13/14 [02:13<00:10, 10.28s/it]


<langchain_community.vectorstores.chroma.Chroma at 0x7f38eb8e5ab0>

In [41]:
# try a query
query = "What is utility?"
result = vectorstore.similarity_search(query)

result[0].page_content

'utility orders the components;\n\ncardinality sets the maximum (or both minimum and maximum) allowed number of components to be picked.'

In [44]:
from langchain.chains.combine_documents import create_stuff_documents_chain

prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

document_chain = create_stuff_documents_chain(llm_llama, prompt)

In [45]:
from langchain.chains import create_retrieval_chain

retriever = vectorstore.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [54]:
response = retrieval_chain.invoke({"input": "What is utility?"})

set_debug(True)
print()
print(response["answer"])
set_debug(False)

[32;1m[1;3m[chain/start][0m [1m[1:chain:retrieval_chain] Entering Chain run with input:
[0m{
  "input": "What is utility?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:retrieval_chain > 2:chain:RunnableAssign<context>] Entering Chain run with input:
[0m{
  "input": "What is utility?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:retrieval_chain > 2:chain:RunnableAssign<context> > 3:chain:RunnableParallel<context>] Entering Chain run with input:
[0m{
  "input": "What is utility?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:retrieval_chain > 2:chain:RunnableAssign<context> > 3:chain:RunnableParallel<context> > 4:chain:retrieve_documents] Entering Chain run with input:
[0m{
  "input": "What is utility?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:retrieval_chain > 2:chain:RunnableAssign<context> > 3:chain:RunnableParallel<context> > 4:chain:retrieve_documents > 5:chain:RunnableLambda] Entering Chain run with input:
[0m{
  "input": "What is utility?"
}
[36;1m[1;3m[chain/end