In [58]:
from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate

In [59]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS

In [60]:
video_id = "ybuJ_nIXwGE"  #THis is the video ID fro joe rogram podcast with magnus clarsen episode 2275
try:
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
    transcript = " ".join([item['text'] for item in transcript_list])
    
except TranscriptsDisabled:
    print("No caption found for this video.")

In [61]:
print("Transcript length:", len(transcript))

Transcript length: 115648


In [62]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([transcript])

In [63]:
len(chunks)

145

In [64]:
chunks[0].page_content

"Joe Rogan podcast check it out The Joe Rogan Experience Train by day Joe Rogan podcast by night all day all right we're up and roll Magnus Carlon ladies and gentlemen you want some coffee no oh this is water uh tell Jeff to bring in the coffee forgot to bring in the coffee no no I'm good with water well I need coffee I'm going to keep up with you buddy and of course Tony hinchliff is here who's a gigantic chess fan and just his pants yesterday when I told him you were coming in and then immediately I said you got to come with me and so Tony's here as well it's an honor to meet you man um I I I'm always fascinated by people that are at the top of something that's insanely difficult like chess and I'm always wondering like how much time is involved how much how often do you play and when did you start how old were you when you first started playing I think my dad my dad is an avid chess player so I think he uh t thought that I might have some Talent so he thought he taught me pretty"

In [65]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [66]:
vectorestore = FAISS.from_documents(chunks, embeddings)

# Retrival

In [67]:
retriver = vectorestore.as_retriever(
    search_kwargs={"k": 3},
    search_type="similarity",
    )

In [68]:
llm = OllamaLLM(model="phi3:latest", temperature=0.1)

In [69]:
template = PromptTemplate(
    template="You are a helpful assistant. Answer the question only from the context provided don't generate on your own. If you don't know the answer, say 'I don't know'. Context: {context} Question: {question}",
    input_variables=["context", "question"]
)

In [70]:
question = "Who is magnus clarsen?"
retrived_docs = retriver.invoke(question)

In [71]:
context_text = "\n".join([doc.page_content for doc in retrived_docs])

In [72]:
final_prompt = template.invoke({
    "context": context_text,
    "question": question
})

# Generation 

In [74]:
answer = llm.invoke(final_prompt)

In [75]:
from langchain_core.runnables import RunnablePassthrough ,RunnableParallel, RunnableLambda

In [76]:
def format_text(text):
    context_text = "\n".join([doc.page_content for doc in text])
    return context_text

In [77]:
parallel_chain = RunnableParallel({
    'context': retriver | RunnableLambda(format_text),
    'question': RunnablePassthrough()
})

In [78]:
from langchain_core.output_parsers import StrOutputParser

In [79]:
parser = StrOutputParser()

In [80]:
main_chain = parallel_chain | template | llm | parser

In [81]:
main_chain.invoke("What does magnus clarsen do in his freetime?")

'Magnus Carlsen, the current world champion of chess from Norway, has shared that he plays casual online blitz or rapid games for money on a site called Chess.com during his free time to earn Cloud engine times which allows him more access to powerful chess engines like Stockfish and Komodo when playing longer classical games.'