# Import Libraries and APIs

In [8]:
import os
import textwrap
from dotenv import find_dotenv, load_dotenv

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAI
from langchain_core.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.chains import LLMChain
import openai


from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.agents import load_tools
from langchain.agents import get_all_tool_names

from langchain_community.embeddings import OpenAIEmbeddings
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS

from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import RetrievalQA

# Load API Keys from .env File

In [10]:
# load_dotenv(find_dotenv())
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

# Model

## Initializing Embedding Model

In [11]:
embeddings = OpenAIEmbeddings()

## Declaring useful functions

In [21]:
## Sam Altman: OpenAI, GPT-5, Sora, Board Saga, Elon Musk, Ilya, Power & AGI | Lex Fridman Podcast 
video_url = "https://www.youtube.com/watch?v=jvqFAi7vkBc" 
business_tax_url = "https://www.youtube.com/watch?v=tQoVfGUH--o"

In [12]:
### Input: The Youtube Video URL
### Output: A vector Store based on the transcripts of the input youtube video

def create_db_from_youtube_video_url(video_url):
    ##Loading Youtube Transcripts as documents
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()
    
    ##Splitting the documents into chunks, with a chunk_size and overlap strategy
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap =100)
    docs = text_splitter.split_documents(transcript)
    
    ## Creating a Vector store client based on th documents and chuks
    db = FAISS.from_documents(docs, embeddings)
    
    return db

In [15]:
db_test = create_db_from_youtube_video_url(business_tax_url)

In [26]:
query = "why should i incorporate?"

In [27]:
results = db_test.similarity_search(
    query,
    k=5
)
for res in results:
    print(f'* {res.page_content} [Score: {res.metadata}] *')

* I want to get clear because I feel like a lot of people get really excited about this and get a little confused about how it works I cannot claim 100 of these uh expenses as tax deductions only a percentage so I can only claim a certain percentage of my home that I use for business purposes only another thing I also want to share too if you do not own your home you can claim your rent so even if you're renting you can claim that as a tax deduction as well and next we've got professional fees now if you watch my recent video about um a sole proprietorship versus Corporation and I shared my experience of being a sole proprietor for about five and a half years and a year ago incorporating my business the one thing I shared is I'm spending a lot more money on professional fees now that I'm Incorporated the good thing is those fee and I'm talking about you know legal fees accounting fees and Consulting fees um you can write those off so that is one kind of nice Silver Lining so if you you

In [29]:
## Initializing a ChatModel 
llm = ChatOpenAI(
    model = "gpt-3.5-turbo"
    ,api_key=openai_api_key
    ,temperature=0.2
    )
    
#Conversation Memory
conversational_memory = ConversationBufferMemory(
    memory_key = "chat_history",
    k=5,
    return_messages=True
    )
# retrieval qa Chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db_test.as_retriever(),
    return_source_documents=True,
    memory = conversational_memory
)

In [31]:
# qa.run(query)

In [32]:
from langchain.agents import Tool

tools =[
    Tool(
        name= "knowledge base",
        func=qa.run,
        description=(
            'use this tool for every query to get more information and stories on the topic'
        )
    )
]

In [33]:
from langchain.agents import initialize_agent

agent = initialize_agent(
    agent = 'chat-conversational-react-description',
    tools=tools,
    llm=llm,
    verbose=True
    ,memory = conversational_memory,
    max_iterations=3,
    early_stopping_method="generate"
)

  warn_deprecated(


In [34]:
agent.agent.llm_chain.prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'chat_history', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='Assistant is a large language model trained by OpenAI.\n\nAssistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and 

In [39]:
sys_msg = """You are a helpful assistant that can answer questions about the youtube videos based on the video's transcript: {docs}
                
                Only use the factual information from the transcript to answer the question.
                
                If you feel like you don't have enough information to answer the question, say 'I don't know'
                
                Your answers should be verbosed and detailed.
"""


In [40]:
prompt =agent.agent.create_prompt(
    system_message=sys_msg,
    tools=tools
)
agent.agent.llm_chain.prompt = prompt

In [41]:
agent.agent.llm_chain.prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'chat_history', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template="You are a helpful assistant that can answer questions about the youtube videos based on the video's transcript:\n                \n                Only use the factual information from the transcript 

In [42]:

agent("Tell me about yourself")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "I am an assistant here to help answer questions based on the transcript of YouTube videos."
}
```[0m

[1m> Finished chain.[0m


{'input': 'Tell me about yourself',
 'chat_history': [HumanMessage(content='Tell me about yourself'),
  AIMessage(content='I am an assistant here to help answer questions based on the transcript of YouTube videos.')],
 'output': 'I am an assistant here to help answer questions based on the transcript of YouTube videos.'}

In [43]:
agent("what can you help me with?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "I am here to help answer questions based on the transcript of YouTube videos."
}
```[0m

[1m> Finished chain.[0m


{'input': 'what can you help me with?',
 'chat_history': [HumanMessage(content='Tell me about yourself'),
  AIMessage(content='I am an assistant here to help answer questions based on the transcript of YouTube videos.'),
  HumanMessage(content='what can you help me with?'),
  AIMessage(content='I am here to help answer questions based on the transcript of YouTube videos.')],
 'output': 'I am here to help answer questions based on the transcript of YouTube videos.'}

In [44]:
agent("are you alive?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "I am an AI assistant programmed to assist with answering questions based on the transcript of YouTube videos."
}
```[0m

[1m> Finished chain.[0m


{'input': 'are you alive?',
 'chat_history': [HumanMessage(content='Tell me about yourself'),
  AIMessage(content='I am an assistant here to help answer questions based on the transcript of YouTube videos.'),
  HumanMessage(content='what can you help me with?'),
  AIMessage(content='I am here to help answer questions based on the transcript of YouTube videos.'),
  HumanMessage(content='are you alive?'),
  AIMessage(content='I am an AI assistant programmed to assist with answering questions based on the transcript of YouTube videos.')],
 'output': 'I am an AI assistant programmed to assist with answering questions based on the transcript of YouTube videos.'}

In [None]:
### Input: The vector store created in the last step, Query(Input) and k (number of closest embeddings to Query)
### Output: Response to the Query, k closest docs joined together(for model transparency and Understanding)

def get_response_from_query(db, query, k=4):
    
    ## Finding the k closest vector embeddings to the query in the VS
    docs = db.similarity_search(query, k)
    docs_page_content = " ".join([d.page_content for d in docs])
    
    
    ## Initializing a ChatModel 
    chat = ChatOpenAI(
        model = "gpt-3.5-turbo"
        ,api_key=openai_api_key
        ,temperature=0.2
    )
    
    #Conversation Memory
    conversational_memory = ConversationBufferMemory(
        memory_key = "chat_history",
        k=5,
        return_messages=True
    )

    #Retrival qa Chain
    qa = RetrievalQA.from_chain_type(
        llm=chat,
        chain_type="stuff",
        retriever=db.as_retriever(),
        return_source_documents=True,
        memory = conversational_memory
    )
    
    ##Initializing a template
    template = """
                You are a helpful assistant that can answer questions about the youtube videos based on the video's transcript: {docs}
                
                Only use the factual information from the transcript to answer the question.
                
                If you feel like you don't have enough information to answer the question, say 'I don't know'
                
                Your answers should be verbosed and detailed.
                """
    ## Initializing system prompt
    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
    
    
    ## Initializing Human prompt
    human_template = "Answer the following question {question}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    
    ## Creating a chatpromot based on Human and System prompt
    chat_prompt = ChatPromptTemplate.from_messages(
                [system_message_prompt, human_message_prompt])
    
    ## Chaining everything
    chain = LLMChain(llm=chat, prompt=chat_prompt)

    
    ## Getting the response to the query
    response = chain.run(question= query, docs= docs_page_content)
    response = response.replace("\n","")
       
    
    return response, docs
    

In [None]:
def ask_up(video_url):
    db = create_db_from_youtube_video_url
    return

## Inference

In [None]:
db = create_db_from_youtube_video_url(business_tax_url)

query = "Give me a short summary of the video"
    
response, docs = get_response_from_query(db, query)

print(textwrap.fill(response, width=85))

In [None]:
# Continue the conversation with memory
follow_up_query = "What are the main points discussed?"
follow_up_response, _ = get_response_from_query(db, follow_up_query)
print(textwrap.fill(follow_up_response, width=85))

In [None]:
### End