## Install and Env setup

Install langchain : `pip install langchain`

Using OpenAI API as LLM provider : `pip install openai`

Set environment variables specific to the LLM provider. Note: I have created an Open API key and stored in the environment variable `OPENAI_API_KEY`. 

In [3]:
import os

assert os.environ["OPENAI_API_KEY"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

## Getting Started

In [2]:
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.9)

Get predictions from a language model

In [3]:
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))



Bright Socks Co.


Prompt Templates

In [4]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(input_variables=["product"],
               template="What is a good name for a company that makes {product}?",
)

In [5]:
prompt.format(product="colorful socks")

'What is a good name for a company that makes colorful socks?'

Chains

In [6]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(temperature=0.9)
prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}?",
)

In [7]:
from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)
chain.run("colorful socks")

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')).


'\n\nRainbow Toes Socks'

In [8]:
# Using a Chat model in LLMChain

from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)
human_message_prompt = HumanMessagePromptTemplate(
        prompt=PromptTemplate(
            template="What is a good name for a company that makes {product}?",
            input_variables=["product"],
        )
    )
chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt])
chat = ChatOpenAI(temperature=0.9)
chain = LLMChain(llm=chat, prompt=chat_prompt_template)
print(chain.run("colorful socks"))

Rainbow Socks Co.


In [12]:
# Using a SequntialChain to create a company name for a product 
# and then create a catchphrase for a product

second_prompt = PromptTemplate(
    input_variables=["company_name"],
    template="Write a catchphrase for the following company: {company_name}",
)
chain_two = LLMChain(llm=llm, prompt=second_prompt)

In [14]:
# combine two LLMChains
from langchain.chains import SimpleSequentialChain
overall_chain = SimpleSequentialChain(chains=[chain, chain_two],
                                      verbose=True)

# Run the chain specifying only the input variable for the first chain.
catchphrase = overall_chain.run("colorful socks")
print(catchphrase)



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mRainbow Threads[0m
[33;1m[1;3m

"Make a statement in style with Rainbow Threads!"[0m

[1m> Finished chain.[0m


"Make a statement in style with Rainbow Threads!"


In [15]:
# CustomChain with the Chain class
from langchain.chains import LLMChain
from langchain.chains.base import Chain

from typing import Dict, List


class ConcatenateChain(Chain):
    chain_1: LLMChain
    chain_2: LLMChain

    @property
    def input_keys(self) -> List[str]:
        # Union of the input keys of the two chains.
        all_input_vars = set(self.chain_1.input_keys).union(set(self.chain_2.input_keys))
        return list(all_input_vars)

    @property
    def output_keys(self) -> List[str]:
        return ['concat_output']

    def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
        output_1 = self.chain_1.run(inputs)
        output_2 = self.chain_2.run(inputs)
        return {'concat_output': output_1 + output_2}

In [16]:
prompt_1 = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}?",
)
chain_1 = LLMChain(llm=llm, prompt=prompt_1)

prompt_2 = PromptTemplate(
    input_variables=["product"],
    template="What is a good slogan for a company that makes {product}?",
)
chain_2 = LLMChain(llm=llm, prompt=prompt_2)

concat_chain = ConcatenateChain(chain_1=chain_1, chain_2=chain_2)
concat_output = concat_chain.run("colorful socks")
print(f"Concatenated output:\n{concat_output}")

Concatenated output:


Colorful Toes Socks.

"Put a Colorful Spin on Your Step!"


## Retrieval Question Answering with Sources

Reference:
- https://python.langchain.com/en/latest/modules/chains/index_examples/vector_db_qa_with_sources.html

In [25]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate

In [2]:
# !wget -q https://raw.githubusercontent.com/hwchase17/langchain/master/docs/modules/state_of_the_union.txt

In [3]:
# Get the text
with open('state_of_the_union.txt') as f:
    state_of_the_union = f.read()

In [4]:
# Split text into chunks using splitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
chunks = text_splitter.split_text(state_of_the_union)

Q: What is the difference between `split_text` & `split_documents`

In [5]:
# Initialize embeddings
embeddings = OpenAIEmbeddings()

In [6]:
# Prerequisite: chromadb module is required. install `pip install chromadb`
# Create a Chroma vectorstore from a raw documents.
# If a persist_directory is specified, the collection will be persisted there.
# Otherwise, the data will be ephemeral in-memory.

# also including the source as part of metadata 
docsearch = Chroma.from_texts(texts=chunks, embedding=embeddings,
                  metadatas=[{"source": f"{i}-pl"} for i in range(len(chunks))])

Using embedded DuckDB without persistence: data will be transient


In [7]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain import OpenAI

In [13]:
# Question-answering with sources over an index
chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=OpenAI(temperature=0),
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
)

In [15]:
chain({"question": "What did the president say about Justice Breyer"}, return_only_outputs=True)

{'answer': ' The president honored Justice Breyer for his service and mentioned his legacy of excellence.\n',
 'sources': '31-pl'}

Here's the text about Justice Breyer to validate

> Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. 


In [18]:
# specify a different chain type to load and use (tiktoekn package is a prerequsite)

chain = RetrievalQAWithSourcesChain.from_chain_type(
    OpenAI(temperature=0), 
    chain_type="map_reduce", 
    retriever=docsearch.as_retriever()
)

chain({"question": "What did the president say about Justice Breyer"}, 
      return_only_outputs=True)

{'answer': ' The president said "Justice Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service."\n',
 'sources': '31-pl'}

The answer is much better than the `stuff` chain type.

In [20]:
# `combine_documents_chain` param to control the parameters passed to RetrievalQAWithSourcesChain

from langchain.chains.qa_with_sources import load_qa_with_sources_chain

In [22]:
qa_chain = load_qa_with_sources_chain(
    llm=OpenAI(temperature=0),
    chain_type="stuff"
)
qa = RetrievalQAWithSourcesChain(
    combine_documents_chain=qa_chain,
    retriever=docsearch.as_retriever())

In [23]:
qa({"question": "What did the president say about Justice Breyer"}, return_only_outputs=True)

{'answer': ' The president honored Justice Breyer for his service and mentioned his legacy of excellence.\n',
 'sources': '31-pl'}

### Custom Prompts

Use our own prompts with the chain and get the response in Italian

In [31]:
template = """Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES"). 
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.
Respond in Italian.

QUESTION: {question}
=========
{summaries}
=========
FINAL ANSWER IN ITALIAN:"""
PROMPT = PromptTemplate(template=template, input_variables=["summaries", "question"])

qa_chain = load_qa_with_sources_chain(
    llm=OpenAI(temperature=0),
    chain_type="stuff",
    prompt=PROMPT
)
qa = RetrievalQAWithSourcesChain(
    combine_documents_chain=qa_chain,
    retriever=docsearch.as_retriever())

query = "What did the president say about Justice Breyer"
qa({"input_documents": docsearch.as_retriever().get_relevant_documents(query),
    "question": query }, 
   return_only_outputs=True)

{'answer': '\nIl Presidente non ha fatto alcun riferimento diretto a Justice Breyer nel discorso.\n',
 'sources': '31-pl, 32-pl, 21-pl, 34-pl'}

In [33]:
# return intermediate steps for `map_reduce` chain type
chain = load_qa_with_sources_chain(
    llm=OpenAI(batch_size=5, temperature=0),
    chain_type="map_reduce",
    reduce_intermediate_steps=True
)

ValidationError: 1 validation error for MapReduceDocumentsChain
reduce_intermediate_steps
  extra fields not permitted (type=value_error.extra)