In [None]:
from langchain_community.vectorstores import Chroma
from langchain.schema.output_parser import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_cohere import CohereEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import ChatPromptTemplate
import assemblyai as aai
from langchain_core.runnables import RunnablePassthrough
import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
# url = "https://www.youtube.com/watch?v=I2ZK3ngNvvI&ab_channel=LexClips"

# config = aai.TranscriptionConfig(
#     speaker_labels=True)

# audio_file_path = "audio.mp3"
# transcriber = aai.Transcriber()

# transcript = transcriber.transcribe(audio_file_path, config).utterances 
# transcript =  "".join([f"Speaker {utterance.speaker}: {utterance.text}" for utterance in transcript])

In [None]:
# with open("transcript.txt",'w') as file:
#     file.write(transcript)

In [None]:
with open("transcript.txt",'r') as file:
    transcript = file.read()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=400,
    chunk_overlap=100)

In [None]:
splits = text_splitter.create_documents([transcript])
print(len(splits))

In [None]:
print(splits[0].page_content)

In [None]:
embeddings = CohereEmbeddings(model="embed-english-light-v3.0",cohere_api_key=os.getenv("cohere_api_key"))

vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=embeddings)

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={"k":5})

In [None]:
rag_prompt = """You are a helpful assistant who answers the users' queries from the given YouTube video transcript parts. Answer the question based only on the following YouTube video transcript part:

{context}

Question: {question}"""

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",google_api_key=os.getenv("google_api_key"))

prompt = ChatPromptTemplate.from_template(rag_prompt)

rag_chain = (
    {"context": retriever,"question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
res = rag_chain.invoke("How do I become an expert at some arbitrary skill?")
print(res)

In [None]:
retriever.get_relevant_documents("How do I become an expert at some arbitrary skill?")

In [None]:
res = rag_chain.invoke("what they guest speaker said about backpropagation?")
print(res)

In [None]:
res = rag_chain.invoke("On which thing beginners should focued on?")
print(res)

In [None]:
context = """Document(page_content="10,000 hours, you can literally pick an arbitrary thing. And I think if you spend 10,000 hours of deliberate effort and work, you actually will become an expert at it. And so I think it's kind of like a nice thought. And so basically I would focus more on like are you spending 10,000 hours? That's what I focus on.Speaker A: So, and then thinking about what kind of mechanisms maximize your"),
 Document(page_content="Speaker A: You're one of the greatest teachers of machine learning AI ever, from CS 231 N to today. What advice would you give to beginners interested in getting into machine learning?Speaker B: Beginners are often focused on what to do and I think the focus should be more like how much you do. So I am kind of like believer on a high level in this 10,000 hours kind of concept where you just kind"),
 Document(page_content="where you put it. And you'll iterate and you'll improve and you'll waste some time. I don't know if there's a better way you need to put in 10,000 hours, but I think it's actually really nice because I feel like there's some sense of determinism about being an expert at a thing. If you spend 10,000 hours, you can literally pick an arbitrary thing. And I think if you spend 10,000 hours of"),
 Document(page_content="1 hour of content.Speaker A: To get 1 hour. It's interesting. I mean, is it difficult to go back to the basics? Do you draw a lot of wisdom from going back to the basics?Speaker B: Yeah, going back to backpropagation, loss functions, where they come from. And one thing I like about teaching a lot, honestly, is it definitely strengthens your understanding. So it's not a purely altruistic activity."),
 Document(page_content="on a bunch of lectures just now. I was reminded back to my days of 331 and just how much work it is to create some of these materials and make them good. The amount of iteration and thought, and you go down blind alleys and just how much you change it. So creating something good in terms of educational value is really hard, and it's not fun.Speaker A: It's difficult. So people should definitely")]"""

question = 'How do I become an expert at some arbitrary skill?'

rag_prompt = f"""You are a helpful assistant who answers the users' queries from the given YouTube video transcript parts. Answer the question based only on the following YouTube video transcript part:

{context}

Question: {question}"""

print(rag_prompt)