In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=E2shqsYwxck"


In [2]:
from langchain_openai.chat_models import ChatOpenAI

model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")



In [3]:
model.invoke("Which is the best cricket team in the world?")

AIMessage(content='It is subjective to determine the best cricket team in the world as rankings and performance can vary over time. However, some of the top cricket teams currently are India, Australia, England, and New Zealand.')

In [4]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser

chain.invoke("What is the problem rag solves in ai ? ")

'The problem that rag (retrieval-augmented generation) solves in AI is the issue of generating more coherent and contextually relevant responses in conversational systems. Traditional language models often struggle with understanding and maintaining context over a longer conversation, leading to generic or off-topic responses. \n\nRAG addresses this problem by first retrieving relevant information from a large pre-existing knowledge base and then using this information to guide the generation of a more accurate and contextually appropriate response. This approach helps improve the overall quality and relevance of the generated responses in AI systems, making them more effective in natural language processing tasks.'

In [5]:
from langchain.prompts import ChatPromptTemplate

template = """
Answer the question based on the context below. If you cant 
answer the question, reply "I dont know"

Context: {context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt.format(context="Mary's sister is Susana", question="Who is Mary's Daughter?")

'Human: \nAnswer the question based on the context below. If you cant \nanswer the question, reply "I dont know"\n\nContext: Mary\'s sister is Susana\n\nQuestion: Who is Mary\'s Daughter?\n'

In [6]:
import os

from langchain_core.output_parsers import StrOutputParser
from langchain_openai.chat_models import ChatOpenAI
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")

parser = StrOutputParser()

chain = prompt | model | parser
chain.invoke({
    "context" : "Mary's sister is Susana",
    "question": "Who is Mary's sister?"
})

'Susana'

In [7]:
from operator import itemgetter

translation_prompt = ChatPromptTemplate.from_template(
    "Translate {answer} to {language}"
)

translation_chain = (
    {"answer" : chain, "language" : itemgetter("language")} | translation_prompt | model | parser
)

translation_chain.invoke (
    {
    "context" : "Mary's sister is Susana and her brother name is raul",
    "question": "How many siblings does mary have?",
    "language" : "Hindi",
}

)

'मेरी के दो भाई बहन हैं।'

In [9]:
import tempfile
import whisper
from pytube import YouTube


if not os.path.exists("transcription.txt"):
    youtube = YouTube(YOUTUBE_VIDEO)
    audio = youtube.streams.filter(only_audio=True).first()

    whisper_model = whisper.load_model("base")

    with tempfile.TemporaryDirectory() as tmpdir:
        file = audio.download(output_path=tmpdir)
        transcription = whisper_model.transcribe(file, fp16=False)["text"].strip()
        
        with open("transcription.txt", "w") as file:
            file.write(transcription)


In [10]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("transcription.txt")
text_documents = loader.load()
text_documents

[Document(page_content="Hi, this is Lance from the chain team. I'm going to talk about building a self-reflective rag apps from scratch using only open source and local models that run strictly on my laptop. Now one of those interesting trends in the rag research and a lot of methods that become pretty popular in recent months and weeks is this idea of self-reflection. So when you do rag, you perform retrieval based upon a question from an index. And this idea of self-reflection is saying based upon, for example, the relevance of the tree documents to my question or based upon the generations relative to my question or the generations relative to the documents, I want to make, I want to perform some kind of reasoning and potentially feed back and retry various steps. So that's kind of the big idea. And there's a few really interesting papers that implement this. And what I want to kind of show is that implementing these ideas using something that we've developed recently called LandGra

In [11]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=250,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

text_splitter.split_documents(text_documents)[:50]


[Document(page_content="Hi, this is Lance from the chain team. I'm going to talk about building a self-reflective rag apps from scratch using only open source and local models that run strictly on my laptop. Now one of those interesting trends in the rag research and a lot", metadata={'source': 'transcription.txt'}),
 Document(page_content='research and a lot of methods that become pretty popular in recent months and weeks is this idea of self-reflection. So when you do rag, you perform retrieval based upon a question from an index. And this idea of self-reflection is saying based', metadata={'source': 'transcription.txt'}),
 Document(page_content='is saying based upon, for example, the relevance of the tree documents to my question or based upon the generations relative to my question or the generations relative to the documents, I want to make, I want to perform some kind of reasoning and', metadata={'source': 'transcription.txt'}),
 Document(page_content="of reasoning and potentiall

In [12]:
from langchain_openai.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
embedded_query = embeddings.embed_query("Who is Mary's Sister?")

print(f"Embedding length: {len(embedded_query)}")

print(embedded_query[:10])

Embedding length: 1536
[0.0003943086566131897, -0.03709936963004439, -0.010911204144300022, 0.0011529632847324287, -0.023190303249241876, 0.014407647137365998, -0.01196588972874877, -0.0005864692040685945, -0.01259870107941802, -0.03684368805046346]


In [13]:
from pinecone import Pinecone, ServerlessSpec, PodSpec
import time
import os

use_serverless = True
pinecone_api_key = os.environ.get('PINECONE_API_KEY')

# configure client
pc = Pinecone(api_key=pinecone_api_key)

if use_serverless:
    spec = ServerlessSpec(cloud='aws', region='us-west-2')
else:
    # if not using a starter index, you should specify a pod_type too
    spec = PodSpec()

# check for and delete index if already exists
index_name = 'langchain-retrieval-augmentation-fast'
if index_name in pc.list_indexes().names():
    pc.delete_index(index_name)

# create a new index
pc.create_index(
    index_name,
    dimension=1536,  # dimensionality of text-embedding-ada-002
    metric='dotproduct',
    spec=spec
)

# wait for index to be initialized
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

  from tqdm.autonotebook import tqdm


In [14]:
index = pc.Index(index_name)
index.describe_index_stats()
len(text_splitter.split_documents(text_documents))
for batch in text_splitter.split_documents(text_documents):
    print(batch)



page_content="Hi, this is Lance from the chain team. I'm going to talk about building a self-reflective rag apps from scratch using only open source and local models that run strictly on my laptop. Now one of those interesting trends in the rag research and a lot" metadata={'source': 'transcription.txt'}
page_content='research and a lot of methods that become pretty popular in recent months and weeks is this idea of self-reflection. So when you do rag, you perform retrieval based upon a question from an index. And this idea of self-reflection is saying based' metadata={'source': 'transcription.txt'}
page_content='is saying based upon, for example, the relevance of the tree documents to my question or based upon the generations relative to my question or the generations relative to the documents, I want to make, I want to perform some kind of reasoning and' metadata={'source': 'transcription.txt'}
page_content="of reasoning and potentially feed back and retry various steps. So that's ki

In [15]:
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_community.vectorstores import DocArrayInMemorySearch


embeddings = OpenAIEmbeddings()
documents = text_splitter.split_documents(text_documents)
vectorstore2 = DocArrayInMemorySearch.from_documents(documents, embeddings)


In [16]:
from langchain_pinecone import PineconeVectorStore

index_name = "langchain-retrieval-augmentation-fast"

pinecone = PineconeVectorStore.from_documents(
    documents, embeddings, index_name=index_name
)

In [17]:
pinecone.similarity_search("What is GPT-4")[:3]

[Document(page_content='assistant tool. So that kind of gives you the main idea. And the key point is this is all running locally. Again, I used GPT for all embeddings for indexing up at the top right here. And I used Olawa with mistral 7B instruct and JSON mode for that', metadata={'source': 'transcription.txt'}),
 Document(page_content="play with these as you want. The point is here I'm just building up a quick local index. So I load it. I split it into chunks. Now this is the interesting bit. I'm going to use GPT for all embeddings from NOMIC, which is, let's actually pull up the", metadata={'source': 'transcription.txt'}),
 Document(page_content="this is the initial work describing our paper, SBIRT basically. So the key point is this. This is a locally running CPU optimized embedding model that works quite well. I found runs on your system, no API, nothing. So it's pretty nice. Runs fast. So", metadata={'source': 'transcription.txt'})]

In [33]:
chain = (
    {"context": pinecone.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | model
    | parser
)

chain.invoke("Details on the content?")

'The content includes a blog post on autonomous agents, a discussion on a paper called Corrective Rag or Sea Rag, and the use of a rag prompt for question answering tasks.'