In [None]:
import os
from dotenv import load_dotenv

load_dotenv()


# print("Loaded Environment Variables:", dict(os.environ))

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# print(GOOGLE_API_KEY)


YOUTUBE_VIDEO="https://www.youtube.com/watch?v=cdiD-9MMpb0"


In [41]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI

model="gemini-1.5-pro"

llm=ChatGoogleGenerativeAI(model=model, google_api_key=GOOGLE_API_KEY)

# llm=ChatOpenAI(model="gpt-3.5-turbo")


In [42]:
result =llm.invoke("what is the square root of 100")
print(result.content)

The square root of 100 is 10.


In [4]:
result=llm.invoke("what is 2+2?")
# print(result.content)

In [5]:
from langchain_core.output_parsers import StrOutputParser

parser=StrOutputParser()

chain = llm | parser

chain.invoke("What MLB team Won the world series during the COVID-19 pandamic?")

'The Los Angeles Dodgers won the 2020 World Series, which was played during the COVID-19 pandemic.'

In [6]:
from langchain_core.prompts import ChatPromptTemplate


template="""

Answer the question based on the context below. If you cannot find the answer in the context,
 just say "I don't know". Don't try to make up an answer.

 context: {context}

 question: {question}

"""

prompt=ChatPromptTemplate.from_template(template)
prompt.format(context="Mary's sister is sussana.", question="who is Mary's sister's ?")


'Human: \n\nAnswer the question based on the context below. If you cannot find the answer in the context,\n just say "I don\'t know". Don\'t try to make up an answer.\n\n context: Mary\'s sister is sussana.\n\n question: who is Mary\'s sister\'s ?\n\n'

In [8]:
chain= prompt | llm | parser

chain.invoke({
    "context":"Mary's sister is sussana.", 
    "question":"who is sussana's sister's ?"
})

'Mary'

# COMBINING CHAINS IT IS APART FROM THE FLOW 

we can combine different chains to create more complex workflows. for explample lets create a second chain that trans;ltes the result  from first chain into differenbt language 

![Alt Text](Screenshot 2025-03-11 at 23.09.25.png)

In [9]:
translate_prompt=ChatPromptTemplate.from_template(
    "translate {answer} to {language} "
)

In [12]:
from operator import itemgetter

translate_chain=(
    {"answer":chain, "language":itemgetter("Language")}
    | translate_prompt
    | llm
    | parser
)

translate_chain.invoke(
    {
    "context": "Mary's sister is Susana. She doesn't have any more siblings.",
    "question": "How many sisters does mary have?",
    "Language": "Hindi",
    }
)

'The Hindi word for "one" is **एक (ek)**.'

# Transcribing the youtube video 

the context we want to send to the model comes from a youtube video and transcribe it usning 

In [14]:
import tempfile
import whisper
from pytube import YouTube


# Let's do this only if we haven't created the transcription file yet.
if not os.path.exists("transcription.txt"):
    youtube = YouTube(YOUTUBE_VIDEO)
    audio = youtube.streams.filter(only_audio=True).first()

    # Let's load the base model. This is not the most accurate
    # model but it's fast.
    whisper_model = whisper.load_model("base")

    with tempfile.TemporaryDirectory() as tmpdir:
        file = audio.download(output_path=tmpdir)
        transcription = whisper_model.transcribe(file, fp16=False)["text"].strip()

        with open("transcription.txt", "w") as file:
            file.write(transcription)

In [15]:
with open("transcription.txt") as file:
    transcription = file.read()

transcription[:100]

"I think it's possible that physics has exploits and we should be trying to find them. arranging some"

In [43]:
try:
    chain.invoke({
        "context": transcription,
        "question": "Is reading papers a good idea?"
    })
except Exception as e:
    print(e)

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


429 Resource has been exhausted (e.g. check quota).


In [59]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("transcription.txt")
TextDocuments = loader.load()

print(TextDocuments[0].page_content[:100])

I think it's possible that physics has exploits and we should be trying to find them. arranging some


In [65]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
)

docs = text_splitter.split_documents(TextDocuments)
print(docs[:5])

[Document(metadata={'source': 'transcription.txt'}, page_content="I think it's possible that physics has exploits and we should be trying to find them. arranging some"), Document(metadata={'source': 'transcription.txt'}, page_content='arranging some kind of a crazy quantum mechanical system that somehow gives you buffer overflow,'), Document(metadata={'source': 'transcription.txt'}, page_content='buffer overflow, somehow gives you a rounding error in the floating point. Synthetic intelligences'), Document(metadata={'source': 'transcription.txt'}, page_content="intelligences are kind of like the next stage of development. And I don't know where it leads to."), Document(metadata={'source': 'transcription.txt'}, page_content='where it leads to. Like at some point, I suspect the universe is some kind of a puzzle. These')]


In [67]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=20,
)

docs = text_splitter.split_documents(TextDocuments)
print(docs[:5])

[Document(metadata={'source': 'transcription.txt'}, page_content="I think it's possible that physics has exploits and we should be trying to find them. arranging some kind of a crazy quantum mechanical system that somehow gives you buffer overflow, somehow gives you a rounding error in the floating point. Synthetic intelligences are kind of like the next stage of development. And I don't know where it leads to. Like at some point, I suspect the universe is some kind of a puzzle. These synthetic AIs will uncover that puzzle and solve it. The following is a conversation with Andre Kappathi, previously the director of AI at Tesla. And before that, at OpenAI and Stanford, he is one of the greatest scientist engineers and educators in the history of artificial intelligence. This is the Lex Friedman podcast to support it. Please check out our sponsors and now to your friends. Here's Andre Kappathi. What is a neural network? And what does it seem to do such a surprisingly good job of learning

In [97]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

embedde_query=embeddings.embed_query("Who os mary's sister?")

print(f"length of embedding: {len(embedde_query)}")

print(embedde_query[:4])





# // for openai subsss 



# from langchain_openai.embeddings import OpenAIEmbeddings

# embeddings = OpenAIEmbeddings() 

# embedde_query=embeddings.embed_query("What is the square root of 100")

# print(f"length of embedding: {len(embedde_query)}")
# print(embedde_query)

length of embedding: 768
[0.05077454447746277, -0.058103397488594055, -0.01842026598751545, -0.025286737829446793]


In [98]:
embedde_query2=embeddings.embed_query("dog is a animal")

sentence1=embeddings.embed_query("animal is not a dog")
sentence2=embeddings.embed_query("american and chinese are two different countries")

In [100]:
# for checking the similarities 

from sklearn.metrics.pairwise import cosine_similarity

query_sentence1_similarity = cosine_similarity([embedde_query2], [sentence1])[0][0]
query_sentence2_similarity = cosine_similarity([embedde_query2], [sentence2])[0][0]

query_sentence1_similarity, query_sentence2_similarity


(np.float64(0.8425441147468032), np.float64(0.5983353623710814))

In [101]:
#setting up the vector store

from langchain_community.vectorstores import DocArrayInMemorySearch

vectorStore1=DocArrayInMemorySearch.from_texts(
    [
        "Mary's sister is Susana",
        "John and Tommy are brothers",
        "Patricia likes white cars",
        "Pedro's mother is a teacher",
        "Lucia drives an Audi",
        "Mary has two siblings",
        "Mercedes are amazing automobiles"

    ],
    embedding=embeddings

)


In [102]:
vectorStore1.similarity_search_with_score(query="Who is Mary's sister?", k=3)

[(Document(metadata={}, page_content="Mary's sister is Susana"),
  np.float64(0.7797462256563394)),
 (Document(metadata={}, page_content='Mary has two siblings'),
  np.float64(0.7638965117390767)),
 (Document(metadata={}, page_content='John and Tommy are brothers'),
  np.float64(0.6143092531303924))]

In [103]:
retriever1 = vectorStore1.as_retriever()
retriever1.invoke("Who is Mary's sister?")

[Document(metadata={}, page_content="Mary's sister is Susana"),
 Document(metadata={}, page_content='Mary has two siblings'),
 Document(metadata={}, page_content='John and Tommy are brothers'),
 Document(metadata={}, page_content='Patricia likes white cars')]

In [104]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

setup = RunnableParallel(context=retriever1, question=RunnablePassthrough())
setup.invoke("What color is Patricia's car?")

{'context': [Document(metadata={}, page_content='Patricia likes white cars'),
  Document(metadata={}, page_content='Lucia drives an Audi'),
  Document(metadata={}, page_content="Pedro's mother is a teacher"),
  Document(metadata={}, page_content='Mercedes are amazing automobiles')],
 'question': "What color is Patricia's car?"}

In [105]:
chain=setup | prompt | llm | parser
chain.invoke("What color is Patricia's car?")

'White'

In [106]:
chain.invoke("What'a a great car?")

'Mercedes'

In [None]:
len(docs)

221

In [108]:
# now we will do for the actual transcription 

vectorStore2=DocArrayInMemorySearch.from_documents(docs, embedding=embeddings)



In [110]:
setup2 = RunnableParallel(context=vectorStore2.as_retriever(), question=RunnablePassthrough())

chain=setup2 | prompt | llm | parser

chain.invoke("What is AGI?")

"Based on the context, AGI is mentioned in relation to intelligence, generative models, and world models, and it's something that might be achievable through digital interaction alone or possibly through interaction with the physical world via robotics like Optimus.  However, a precise definition of AGI is not provided."

In [112]:
chain=(
    { "context": vectorStore2.as_retriever(),"question": RunnablePassthrough()}
    | prompt
    | llm
    | parser
)
chain.invoke("What is systhetic intelligence?")

"I don't know."

In [114]:
#setting up the pinecone vector store 

from langchain_pinecone import PineconeVectorStore

index_name = "firstrag-underfitted"

pinecone = PineconeVectorStore.from_documents(
    docs, embeddings, index_name=index_name
)


  from tqdm.autonotebook import tqdm


In [117]:
pinecone.similarity_search_with_score(query="What is Hollywood going to start doing?", k=3)

[(Document(id='3453df23-0ad6-4371-bf9d-9769397755ce', metadata={'source': 'transcription.txt'}, page_content="It's like high quality audio and you're speaking usually pretty clearly. I don't know what open AI's plans are either. Yeah, there's always fun projects basically. And stable diffusion also is opening up a huge amount of experimentation. I would say in the visual realm and generating images and videos and movies. I'll think like videos now. And so that's going to be pretty crazy. That's going to almost certainly work and it's going to be really interesting when the cost of content creation is going to fall to zero. You used to need a painter for a few months to paint a thing and now it's going to be speak to your phone to get your video. So Hollywood will start using it to generate scenes, which completely opens up. Yeah, so you can make a movie like Avatar eventually for under a million dollars. Much less. Maybe just by talking to your phone. I mean, I know it sounds kind of c

In [118]:
chain=(
    { "context": pinecone.as_retriever(),"question": RunnablePassthrough()}
    | prompt
    | llm
    | parser
)

chain.invoke("What is Hollywood going to start doing??")

'Hollywood will start using AI to generate scenes.'