In [76]:
from langchain_openai import ChatOpenAI
from constants import OPENAI_KEY
import os
os.environ["OPENAI_API_KEY"] = OPENAI_KEY

llm = ChatOpenAI(openai_api_key=OPENAI_KEY, model_name="gpt-3.5-turbo-1106", temperature=0.7)
#llm = OpenAI(model_name="text-davinci-003")

In [71]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import YoutubeLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain 

In [5]:
embeddings = OpenAIEmbeddings()

In [6]:
def create_db_from_youtube_video_url(video_url: str) -> FAISS:
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    docs = text_splitter.split_documents(transcript)

    db = FAISS.from_documents(docs, embeddings)
    return db


In [7]:
data = create_db_from_youtube_video_url("https://www.youtube.com/watch?v=dlJQ-YiXgCs")

In [12]:
query = "What did the document say?"
docs = data.similarity_search(query)

In [13]:
docs

[Document(page_content="the first method that we have we added this really early on in Lang chain and it's been around ever since it's it's it was inspired by offer press's self- askk paper and it just prints out um the steps that happen um at at a highest level so we're not getting all the details but we're getting the high level of what's happening so if we set verose equals true here um and then recreate the agent and then we call it again we can see here that we have some nice printing of what exactly is going on so we're calling this tool with this search query we're getting back this response and then we're getting back this final answer and so again we're not seen all the information but we're seeing high level what steps are occurring so this is a good first pass we've added another method since then called debug equals true and this logs way more information and so it might be Overkill sometimes but other times it can be really helpful to see exactly what's going on so here we

In [61]:
query="Ask me about the video?"
docs = data.similarity_search(query, 4)
docs_page_content = " ".join([d.page_content for d in docs]) 

In [62]:
docs_page_content

"environment like this uh uh Jupiter notebook but when you're maybe not in this or when you want to persist these logs more or if you just want to explore these logs in a more intuitive way right this is this is a little bit of a dump of everything um then you're probably going to want a more uh full-fledged tool and and that's why we built Lang Smith um Lang Smith isn't the focus of uh this video or the uh uh launch of Lang chain 0.1 in general um but it's absolutely worth highlighting because Lang chain is built to be the most observable framework but we've also built tools along the side to make that a reality so with Lang Smith what we can do is we can go into lsmith we can go to our project we can just set these three environment variables um and so we can do that here and I'm going to pause the video and put in my API key so you don't see it I've done that now we can now set debug equals false just to get rid of this noise and then we can run it and what's going to happen is in t

In [77]:
query="Ask me about the video?"
docs = data.similarity_search(query, k=4)
docs_page_content = " ".join([d.page_content for d in docs])
prompt = PromptTemplate(
    input_variables=["question", "docs"],
    template="""
    You are a helpful assistant that that can answer questions about youtube videos 
    based on the video's transcript.
    
    Answer the following question: {question}
    By searching the following video transcript: {docs}
    
    Only use the factual information from the transcript to answer the question.
    
    If you feel like you don't have enough information to answer the question, say "I don't know".
    
    Your answers should be verbose and detailed.
    """,
)

chain = LLMChain(llm=llm, prompt=prompt)

response = chain.run(question=query, docs=docs_page_content)
response = response.replace("\n", "")

In [79]:
response

"I'm sorry, but I don't have enough information to answer the question based on the provided transcript."

In [78]:
def get_response_from_query(llm, db, query, k=4):
   
   docs = db.similarity_search(query, k=4)
   docs_page_content = " ".join([d.page_content for d in docs])
   prompt = PromptTemplate(
      input_variables=["question", "docs"],
      template="""
      You are a helpful assistant that that can answer questions about youtube videos 
      based on the video's transcript.
      
      Answer the following question: {question}
      By searching the following video transcript: {docs}
      
      Only use the factual information from the transcript to answer the question.
      
      If you feel like you don't have enough information to answer the question, say "I don't know".
      
      Your answers should be verbose and detailed.
      """,
   )

   chain = LLMChain(llm=llm, prompt=prompt)

   response = chain.run(question=query, docs=docs_page_content)
   response = response.replace("\n", "")
   return response, docs

In [80]:
query="Ask me about the video?"
response, docs  = get_response_from_query(llm=llm, db=data, query=query)

In [81]:
response

"I'm sorry, but based on the provided transcript, I cannot answer the question as it is not clear what the video is about. The transcript seems to discuss a tool called Lang Smith and its features, but it does not provide specific details about the topic of the video. Therefore, I don't have enough information to accurately answer the question."

In [56]:
from langchain_core.prompts import PromptTemplate

# Instantiation using from_template (recommended)
#prompt = PromptTemplate.from_template("Say {foo}")
#prompt.format(foo="bar")

# Instantiation using initializer
prompt = PromptTemplate(input_variables=["foo"], template="Say hi {foo}")

chain = prompt | llm

In [57]:
#chain = prompt | llm
#chain = LLMChain(llm=llm, prompt=prompt)
print(chain.invoke({"foo":"what is your name"}))

content='Hi, my name is Assistant. How can I help you today?'


In [52]:
chain.input_schema.schema()

{'title': 'PromptInput',
 'type': 'object',
 'properties': {'foo': {'title': 'Foo', 'type': 'string'}}}

In [46]:
from langchain_core.prompts import PromptTemplate

template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

In [47]:
chain = prompt | llm

In [48]:
question = "Who was the president in the year Justin Beiber was born?"
print(chain.invoke({"question": question}))

content='Justin Bieber was born on March 1, 1994. The president of the United States at that time was Bill Clinton, who was in office from January 20, 1993, to January 20, 2001.'


In [49]:
chain.input_schema.schema()

{'title': 'PromptInput',
 'type': 'object',
 'properties': {'question': {'title': 'Question', 'type': 'string'}}}

### Complete Youtube Vidio Analysis 

In [82]:
db = create_db_from_youtube_video_url("https://www.youtube.com/watch?v=U6SddR1NnqY")

In [83]:
db

<langchain_community.vectorstores.faiss.FAISS at 0x18bfeab3a90>

In [84]:
query="Ask me about the video?"
response, docs  = get_response_from_query(llm=llm, db=db, query=query)

In [85]:
response

'The video transcript discusses a variety of topics, including the demolition of temples and the construction of new ones, the worship of iconic figures, the concept of God, and the idea of belief systems. The speaker also addresses the historical and cultural significance of certain places and the impact of past wars on communities. Additionally, the speaker talks about the divisive nature of certain issues and the need for people to coexist peacefully despite their differing beliefs.The transcript does not provide a specific question to answer, but it does touch on the topic of the construction of temples and the significance of certain places. The speaker discusses the demolition of temples and the desire to build new ones, as well as the cultural and religious importance of these sites. The speaker also addresses the impact of past wars and the division between communities, emphasizing the need for peaceful coexistence.Overall, the transcript provides a deep insight into the cultur