In [1]:
!pip install -r ./requirements.txt -q

In [2]:
!pip show langchain

Name: langchain
Version: 0.0.240
Summary: Building applications with LLMs through composability
Home-page: https://www.github.com/hwchase17/langchain
Author: 
Author-email: 
License: MIT
Location: c:\users\think\anaconda3\lib\site-packages
Requires: async-timeout, dataclasses-json, SQLAlchemy, langsmith, numexpr, aiohttp, PyYAML, requests, numpy, openapi-schema-pydantic, tenacity, pydantic
Required-by: 


In [3]:
!pip install langchain --upgrade -q

### Python-dotenv

In [4]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(),override=True)

### LLM Model (Wrappers):GPT3

In [None]:
from langchain.llms import OpenAI
llm=OpenAI(model_name="text-davinci-003", temperature = 0.7, max_tokens=512)
print(llm)

In [None]:
output = llm("What are environment variables \
and why do we use environment variables?")

print(output)

In [None]:
output =llm.generate(["what is the capital of paksitan?","who \
is the bush?"])


In [None]:
print(output.generations[1][0].text)

### ChatModels: GPT-3.5-Turbo and GPT-4

In [None]:
from langchain.schema import (AIMessage,HumanMessage,SystemMessage)
from langchain.chat_models import ChatOpenAI

In [None]:
chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature = 0,max_tokens = 512)
messages=[SystemMessage(content="You are computer scientist"),
          HumanMessage (content="What are environment varaibles")]

In [None]:
output = chat(messages)

In [None]:
print(output.content)

### Promt Template
* Allows us to create dynamic prompts

In [None]:
from langchain import PromptTemplate

In [None]:
template = """ You are experienced virologist. 
Write a few sentences about the virus {virus} in {language} language"""

prompt = PromptTemplate.from_template(template)

In [None]:
print(prompt)

In [None]:
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003", temperature=0)

In [None]:
output = llm(prompt.format(virus="covid-19",language="punjabi"))

In [None]:
print(output)

### Chains

### LLMChain

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain

In [None]:
llm=ChatOpenAI(temperature = 0)

template = """ You are experienced English to Urdu translator.
Translate this english text : {text} to {language} language."""

prompt= PromptTemplate.from_template(template)

chain= LLMChain(llm=llm, prompt=prompt)

In [None]:
output= chain.run({"text":"If I were you, I would accept that challenge",
                  "language":"urdu"})

In [None]:
print(output)

### SimpleChain

In [None]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain, SimpleSequentialChain

In [None]:
llm = OpenAI( model_name="text-davinci-003",temperature =0)

template_1 = """ You are a translator who can translate from multiple language.
Translate the text {text} into urdu language"""

prompt_1=PromptTemplate.from_template(template_1)

chain_1=LLMChain(llm=llm,prompt=prompt_1)


chat= ChatOpenAI( model_name="gpt-3.5-turbo",temperature =0)

template_2 = """ You are a translator who can translate from multiple language.
Translate the text{text} into french language"""

prompt_2=PromptTemplate.from_template(template_2)

chain_2=LLMChain(llm=chat,prompt=prompt_2)

link = [chain_1,chain_2]

SimpleChain = SimpleSequentialChain(chains=link,verbose=True)




In [None]:
output= SimpleChain.run("We went a marriage sermony yesterday. When we reached, the dinner had started")

In [None]:
print(output)

### LangChain Agents
* When we combine an LLM with different tools e.g APIs etc, we get an agent.
* Agents enable LLM to use different tools.
* LLM do the reasoning for which tool to use.
* Using agents LLM can do complex calculations, run code, search the web or run SQL queries.

In [5]:
from langchain.agents.agent_toolkits import create_python_agent
from langchain.tools.python.tool import PythonREPLTool
from langchain.llms import OpenAI

In [6]:
llm=OpenAI(temperature=0)
agent_executor= create_python_agent(
    llm=llm,
    tool= PythonREPLTool(),
    verbose=True
)

In [9]:
agent_executor.run("calculate 5.1**7.3")



[1m> Entering new AgentExecutor chain...[0m


Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-Zgvqg40aCEUeeAAUlnbvIafN on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-Zgvqg40aCEUeeAAUlnbvIafN on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/acco

[32;1m[1;3m I need to use the power operator
Action: Python_REPL
Action Input: print(5.1**7.3)[0m
Observation: [36;1m[1;3m146306.05007233328
[0m
Thought:

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-Zgvqg40aCEUeeAAUlnbvIafN on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-Zgvqg40aCEUeeAAUlnbvIafN on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/acco

[32;1m[1;3m I now know the final answer
Final Answer: 146306.05007233328[0m

[1m> Finished chain.[0m


'146306.05007233328'

### Diving into Pinecone

In [6]:
os.environ.get("PINECONE_API_KEY")

'19457111-fec1-4bf1-b4eb-36ad5d8bfcc0'

  from tqdm.autonotebook import tqdm


In [10]:
pinecone.info.version()

VersionResponse(server='2.0.11', client='2.2.2')

### Pinecone indexes

In [11]:
pinecone.list_indexes()

[]

In [12]:
index_name = 'langchain-pinecone'

# dimension = 1536 as it is the dimension of OpenAI text embeddings
if index_name not in pinecone.list_indexes():
    print(f'Creating an index with name: {index_name}....')
    pinecone.create_index(index_name, dimension = 1536, metric='cosine', pods=1,pod_type='p1.x2')
    print('Done')
else:
    print(f'Index {index_name} already exists..')

Creating an index with name: langchain-pinecone....
Done


In [13]:
# to see info about index
pinecone.describe_index(index_name)

IndexDescription(name='langchain-pinecone', metric='cosine', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='p1', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')

In [None]:
# deleting the index

if index_name in pinecone.list_indexes():
    print(f'Deleting index {index_name}..')
    pinecone.delete_index(index_name)
    print('Done')
else:
    print(f'{index} index doesn\'t exists...')
    

In [14]:
# selecting index
index=pinecone.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [46]:
# generating some random vectors of dimension 1536 into pinecone index

import random

vectors = [[random.random() for i in range(1536)] for j in range(5)]

In [47]:
vectors

[[0.7601031882104886,
  0.33865264441819565,
  0.8982296276528039,
  0.07750005339376032,
  0.3358798288759194,
  0.1119122709685273,
  0.9220808312445685,
  0.3819900182910845,
  0.7925778407278553,
  0.5504379986359427,
  0.8530158551250645,
  0.8316215197308707,
  0.32020647810894587,
  0.5623945785797071,
  0.2301017047861874,
  0.9287725389211156,
  0.8459274828598432,
  0.2731746159926105,
  0.06934602801915835,
  0.8585460196160082,
  0.325353092065368,
  0.26817008637249484,
  0.7111752398524337,
  0.3181229196690819,
  0.4785466115314062,
  0.13173310381363845,
  0.29562374009594194,
  0.41726809424264844,
  0.04426739913258304,
  0.5771691836456534,
  0.9364219642031484,
  0.9742745414800997,
  0.37528916907574084,
  0.22459025429480417,
  0.9344551959246366,
  0.3217443478397809,
  0.6243545934102531,
  0.7331885646002709,
  0.4411328601426492,
  0.012021837423240589,
  0.18901205409455446,
  0.3389013626911852,
  0.022185752978707396,
  0.3855902745481148,
  0.4111131477371

In [48]:
ids = list('abcde')
ids

['a', 'b', 'c', 'd', 'e']

In [49]:
# we'll insert some random vectors in index
# for inserting the vectors, first we have to select index
# which we did in the above 

index.upsert(vectors=zip(ids,vectors))


{'upserted_count': 5}

In [50]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5}

In [32]:
# updating a vector
# to update a vector we have to provide the id of the vector to be updated
# and a value which would replace that vector
index.upsert(vectors=[('c',[random.random() for i in range(1536)])])

{'upserted_count': 1}

In [33]:
# fetching a vector
index.fetch(ids=['c','d'])

{'namespace': '',
 'vectors': {'c': {'id': 'c',
                   'values': [0.522251546,
                              0.785386086,
                              0.937577188,
                              0.795941353,
                              0.927936673,
                              0.109911948,
                              0.908389628,
                              0.709775507,
                              0.214737147,
                              0.284075081,
                              0.586535633,
                              0.0899972767,
                              0.860904157,
                              0.793042123,
                              0.263784,
                              0.0109153418,
                              0.239131272,
                              0.846329272,
                              0.0238831881,
                              0.0171076152,
                              0.071369,
                              0.924677432,
        

In [34]:
# delect vectors
index.delete(ids=['b','c'])

{}

In [35]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3}

In [36]:
# fetching a vector that doesn't exist will return an empty vector
index.fetch(ids=['a','b'])

{'namespace': '',
 'vectors': {'a': {'id': 'a',
                   'values': [0.944784284,
                              0.59917587,
                              0.104458705,
                              0.833875537,
                              0.646584034,
                              0.891388774,
                              0.398873478,
                              0.596449673,
                              0.453042358,
                              0.0656702071,
                              0.0742078274,
                              0.301659971,
                              0.665524423,
                              0.7496472,
                              0.725652754,
                              0.265594453,
                              0.735339224,
                              0.1655664,
                              0.157327965,
                              0.2221248,
                              0.450423896,
                              0.128252193,
           

In [37]:
# to delete all vectors in the index
index.delete(delete_all=True)

{}

In [38]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [58]:
queries=[[random.random() for i in range(1536)] for j in range(2)]

In [59]:
# top_k=3 returns top 3 query that are closer to our query
index.query(
    queries=queries,
    top_k=3,
    include_values=False
)

{'results': [{'matches': [{'id': 'c', 'score': 0.751559556, 'values': []},
                          {'id': 'e', 'score': 0.750520766, 'values': []},
                          {'id': 'd', 'score': 0.74642688, 'values': []}],
              'namespace': ''},
             {'matches': [{'id': 'd', 'score': 0.757943571, 'values': []},
                          {'id': 'b', 'score': 0.755634248, 'values': []},
                          {'id': 'e', 'score': 0.748056054, 'values': []}],
              'namespace': ''}]}

### Splitting and Embedding Test Using LangChain

In [64]:
# by default \n\n,\n,whitesspace
from langchain.text_splitter import RecursiveCharacterTextSplitter

with open('./Security refund froebel\'s.txt') as f:
    doc=f.read()

text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
)

In [66]:
chunks=text_splitter.create_documents([doc])

In [76]:
print(chunks[4])

page_content='no. is B01732. This is to inform that our session from (2018-19) has come to an' metadata={}


In [77]:
print(chunks[4].page_content)

no. is B01732. This is to inform that our session from (2018-19) has come to an


In [80]:
print(f'Total np. of chunks: {len(chunks)}')

Total np. of chunks: 9


### Embedding cost

In [83]:
def print_embedding_cost(texts):
    import tiktoken
    enc = tiktoken.encoding_for_model('text-embedding-ada-002')
    total_tokens=sum([len(enc.encode(page.page_content))for page in texts])
    print(f'Total tokens:{total_tokens}')
    print(f'Embedding Cost in USD: {total_tokens/1000 *0.0004:.6f} ')
print_embedding_cost(chunks)         

Total tokens:198
Embedding Cost in USD: 0.000079 


In [96]:
from langchain.embeddings import OpenAIEmbeddings
embeddings= OpenAIEmbeddings()

In [86]:
vector = embeddings.embed_query('my name is raja')

In [87]:
vector

[-0.009570090100169182,
 -0.008733931928873062,
 -0.0038313022814691067,
 -0.020629586651921272,
 -0.01682768017053604,
 0.026966096833348274,
 -0.027279656380414963,
 -0.018238697201013565,
 -0.016945265233516693,
 -0.008080683648586273,
 -0.014737285673618317,
 -0.0069505637511610985,
 0.0050398120656609535,
 0.0060098860412836075,
 0.0010060026543214917,
 -0.007460097316652536,
 0.025110870599746704,
 -0.027122875675559044,
 0.03266242519021034,
 0.005258650053292513,
 0.016683965921401978,
 -0.0012068765936419368,
 0.0100077660754323,
 -0.010680612176656723,
 -0.008466100320219994,
 0.0020218039862811565,
 0.02754095569252968,
 0.0006879522698000073,
 0.01105949655175209,
 -0.029004231095314026,
 -0.011967511847615242,
 0.002167151775211096,
 -0.0013652893248945475,
 -0.023255646228790283,
 -0.021609459072351456,
 -0.011288133449852467,
 0.0018209300469607115,
 0.008341982960700989,
 0.012614227831363678,
 0.006075210869312286,
 0.02806355431675911,
 -0.0012803670251742005,
 -0.003

### Inserting the Embeddings into a Pinecone index

In [97]:
from langchain.vectorstores import Pinecone
from langchain.embeddings import OpenAIEmbeddings

In [91]:
index.delete(delete_all=True)

{}

In [92]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [95]:
pinecone.describe_index(index_name)

IndexDescription(name='langchain-pinecone', metric='cosine', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='p1', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')

In [103]:
# Pinecone.from_documents() returns a vecterstore object
embeddings= OpenAIEmbeddings()
vector_store= Pinecone.from_documents(chunks,embeddings,index_name=index_name)

 ### Asking Questions (Similarity Search)

In [104]:
#query is converted into text vector by the embedding object that we
#passed to the vector store as an argument. Then similiar vectors are
# fetched adn returned from the vector store

query = "what was the admission no.?"
result = vector_store.similarity_search(query)

In [108]:
print(result)

before. I shall be grateful to you.
Yours obediently,
Raja Haider Ali
Admission no. B01732
A2E


In [109]:
for i in result:
    print(i.page_content)
    print('-'* 30)

was a student of A Levelâ€™s pre engineering (A2E) in your college. My admission
------------------------------
was a student of A Levelâ€™s pre engineering (A2E) in your college. My admission
------------------------------
before. I shall be grateful to you.
Yours obediently,
Raja Haider Ali
Admission no. B01732
A2E
------------------------------
before. I shall be grateful to you.
Yours obediently,
Raja Haider Ali
Admission no. B01732
A2E
------------------------------


#### In the above ocode, we got the similiar text from the vector store. Now to get answer in  natural language we will use an LLM

In [110]:

from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature = 0)

retriever = vector_store.as_retriever(
    search_type='similarity',
    search_kwargs={'k':3}
)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever = retriever 
)

In [121]:
query = "what is the name of the school"
answer = chain.run(query)

In [122]:
print(answer)

The name of the school is Froebel's International School.
