In [None]:
%pip install cohere tiktoken

In [None]:
# very imp and useful util
import os

from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
os.environ["GOOGLE_API_KEY"] = os.environ.get("GOOGLE_API_KEY")

from IPython.display import display
from IPython.display import Markdown
import textwrap


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [None]:
# useful util
import sys
import pkg_resources
print("Python interpreter:", sys.executable)
print("Packages:")
for dist in pkg_resources.working_set:
    print(dist)


In [None]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [None]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        # this is helps to parse the html given by the url to text
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    )
)


In [None]:
docs = loader.load()
docs

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
# chunk_siz refers to words, chunk_overlap refers to last 100 words previous chunk and
# first 100 words of next chunk
# startindex adds indexes to the metadata i guess is helpfull when storing in db

In [None]:
splits = text_splitter.split_documents(docs)
for split in splits:
   print(split.metadata)

# you can see all indexes that need to be made embeddings to store in vector db maybe, but not sure

In [None]:

vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [None]:

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
retrieved_docs = retriever.invoke("What are the approaches to Task Decomposition?")
retrieved_docs[0].page_content

In [None]:
# do all your prompt prefrences here
# example:
from langchain_core.prompts import PromptTemplate

rag_template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""

# use custom_rag_prompt at place of prompt in rag_chain
custom_rag_prompt = PromptTemplate.from_template(rag_template) 

In [None]:
# prompt with chat history memory
template = """You are usually good at {ability}. 

Here is some context for the question tha can be asked:
{context}

Question: {question}

Relevant pieces of previous conversation:
{history}

Helpful Answer:"""

with_history_prompt = PromptTemplate.from_template(template)

In [None]:
# choose your llm and chatmodel
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
from operator import itemgetter

# rag_chain = (
#     {"context": itemgetter("question") | retriever | format_docs, "question": itemgetter("question")}
#     | with_history_prompt
#     | llm
#     | StrOutputParser()
# )

In [None]:
# from langchain_core.messages import AIMessage, HumanMessage
# def process_chat_history(chat_history):
#     history = []
    
#     for message in chat_history:
#         if isinstance(message, AIMessage):
#             history.append(f"AI: {message.content}")
#         elif isinstance(message, HumanMessage):
#             history.append(f"Human: {message.content}")
    
#     return history

In [1]:
%pip install langchain_google_genai

Collecting langchain_google_genai
  Downloading langchain_google_genai-0.0.5-py3-none-any.whl (14 kB)
Collecting google-generativeai<0.4.0,>=0.3.1
  Downloading google_generativeai-0.3.2-py3-none-any.whl (146 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m146.9/146.9 KB[0m [31m836.4 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting google-api-core
  Downloading google_api_core-2.15.0-py3-none-any.whl (121 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.0/122.0 KB[0m [31m888.0 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting google-ai-generativelanguage==0.4.0
  Downloading google_ai_generativelanguage-0.4.0-py3-none-any.whl (598 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m598.7/598.7 KB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting proto-plus<2.0.0dev,>=1.22.3
  Downloading proto_plus-1.23.0-py3-none-any.whl (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import faiss

from langchain.docstore import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain.memory import VectorStoreRetrieverMemory
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding_size = 1536 # Dimensions of the OpenAIEmbeddings
index = faiss.IndexFlatL2(embedding_size)
embedding_fn = GoogleGenerativeAIEmbeddings().embed_query
convo_vectorstore = FAISS(embedding_fn, index, InMemoryDocstore({}), {})

convo_retriever = convo_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
convo_memory = VectorStoreRetrieverMemory(retriever=convo_retriever)
# convo_memory.save_context({"Human": "My name is Tom"}, {"Ai": "Hi Tom"})

  from .autonotebook import tqdm as notebook_tqdm


ValidationError: 2 validation errors for GoogleGenerativeAIEmbeddings
model
  field required (type=value_error.missing)
__root__
  Did not find google_api_key, please add an environment variable `GOOGLE_API_KEY` which contains it, or pass `google_api_key` as a named parameter. (type=value_error)

In [80]:
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import RedisChatMessageHistory
from langchain.chains import ConversationChain
REDIS_URL = "redis://localhost:6379/0"
def save_response(parsed_response):
    print('hey')
    convo_memory.save_context({"Ai":parsed_response},{"Human":""})
    return parsed_response

def save_question(params):
   print(params['question'])
   convo_memory.save_context({"Human":params['question']},{"Ai":"Processing..."})
   return params['question']

def retrieval_complete(params):
    print('retrieval complete')
    return params

history_chain = (
    {
        "history": itemgetter("question") | convo_retriever | format_docs,
        "context": itemgetter("question") | retriever | format_docs, 
        "question": save_question ,
        "ability": itemgetter("ability")
    }
    | with_history_prompt 
    | retrieval_complete
    | llm
    | StrOutputParser()
    | save_response
)



In [81]:

question = "Do you remember the secret word i told you?"
ai_msg = history_chain.invoke(
    {"ability":"ai","question": question}
)
# convo_memory.load_memory_variables({"Human":"What is my name"})

Do you remember the secret word i told you?


: 

In [None]:
ai_msg