# Import Libraries and APIs

In [1]:
import os
import textwrap
from dotenv import find_dotenv, load_dotenv

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAI
from langchain_core.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.chains import LLMChain
import openai

from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.agents import load_tools
from langchain.agents import get_all_tool_names

from langchain_community.embeddings import OpenAIEmbeddings
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS

from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain

# Load API Keys from .env File

In [2]:
!pwd

/Users/karankaran/Desktop/Github_Karan/Youtube_Questioning_ChatBot/Jupyter Notebooks


In [3]:
# load_dotenv(find_dotenv())
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

# Model

## Initializing Embedding Model

In [4]:
embeddings = OpenAIEmbeddings()

  warn_deprecated(


## Declaring useful functions

In [5]:
### Input: The Youtube Video URL
### Output: A vector Store based on the transcripts of the input youtube video

def create_db_from_youtube_video_url(video_url):
    ##Loading Youtube Transcripts as documents
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()
    
    ##Splitting the documents into chunks, with a chunk_size and overlap strategy
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap =100)
    docs = text_splitter.split_documents(transcript)
    
    ## Creating a Vector store client based on th documents and chuks
    db = FAISS.from_documents(docs, embeddings)
    
    return db

In [6]:
### Input: The vector store created in the last step, Query(Input) and k (number of closest embeddings to Query)
### Output: Response to the Query, k closest docs joined together(for model transparency and Understanding)

def get_response_from_query(db, query, k=4):
    
    ## Finding the k closest vector embeddings to the query in the VS
    docs = db.similarity_search(query, k)
    docs_page_content = " ".join([d.page_content for d in docs])
    
    
    ## Initializing a ChatModel 
    chat = ChatOpenAI(
        model = "gpt-3.5-turbo"
        ,api_key=openai_api_key
        ,temperature=0.2
    )
    chunks = []
    
    ##Initializing a template
    template = """
                You are a helpful assistant that can answer questions about the youtube videos based on the video's transcript: {docs}
                
                Only use the factual information from the transcript to answer the question.
                
                If you feel like you don't have enough information to answer the question, say 'I don't know'
                
                Your answers should be verbosed and detailed.
                """
    ## Initializing system prompt
    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
    
    
    ## Initializing Human prompt
    human_template = "Answer the following question {question}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    
    ## Creating a chatpromot based on Human and System prompt
    chat_prompt = ChatPromptTemplate.from_messages(
                [system_message_prompt, human_message_prompt])
    
    ## Chaining everything
    chain = LLMChain(llm=chat, prompt=chat_prompt)

    
    ## Getting the response to the query
    response = chain.run(question= query, docs= docs_page_content)
    response = response.replace("\n","")
       
    
    return response, docs
    

In [7]:
def ask_up(video_url):
    db = create_db_from_youtube_video_url
    return

## Inference

In [12]:
## Sam Altman: OpenAI, GPT-5, Sora, Board Saga, Elon Musk, Ilya, Power & AGI | Lex Fridman Podcast 
video_url = "https://www.youtube.com/watch?v=jvqFAi7vkBc" 
business_tax_url = "https://www.youtube.com/watch?v=tQoVfGUH--o"

In [13]:
db = create_db_from_youtube_video_url(business_tax_url)

query = "Give me a short summary of the video"
    
response, docs = get_response_from_query(db, query)

print(textwrap.fill(response, width=85))

The video is a sponsored content piece discussing the top 10 tax write-offs for
Canadian small businesses. The host, Jessica Morehouse, highlights the importance of
tax deductions for small businesses and introduces a cloud accounting software called
reinvest wealth that helps Canadian small businesses streamline their finances. The
software offers smart accounting tools, including a recommendations engine that
suggests tax write-offs based on the business's expenses. Jessica mentions various
write-offs like payroll, research and development expenses, repair and maintenance
expenses, advertising and marketing write-offs, and travel expenses. She encourages
viewers to utilize the software to track their expenses and maximize their tax
deductions. Additionally, she invites viewers to ask questions in the comments
section for future videos and promotes subscribing to her channel for more financial
education content.


In [14]:
# Continue the conversation with memory
follow_up_query = "What are the main points discussed?"
follow_up_response, _ = get_response_from_query(db, follow_up_query)
print(textwrap.fill(follow_up_response, width=85))

The main points discussed in the video are:1. The speaker, Jessica Morehouse, talks
about the benefits of running a small business, such as being your own boss and
having tax write-offs.2. She mentions that for new small businesses or those looking
for more tax deductions towards the end of the year, there are specific tax write-
offs available for Canadian small businesses.3. Jessica introduces the top 10 tax
write-offs for Canadian small businesses that viewers should be aware of.4. She
thanks Reinvest Wealth for sponsoring the video and promoting their accounting
software, which is free to use.5. Jessica encourages viewers to ask questions about
taxes and tax write-offs in the comments section for future video ideas.6. She
explains a feature of the accounting software called the recommendations engine,
which helps users identify potential write-offs like marketing agency expenses or
travel costs.7. The software also helps track monthly cash flow, business expenses,
net income, and t

In [11]:
### End