In [None]:
#Run only once to install
%pip install langchain langchain-community langchain-ollama pytubefix faiss-cpu protobuf IPython sentence-transformers faster-whisper

In [None]:
#Versions are available in requirements.txt
from faster_whisper import WhisperModel
from langchain_ollama import OllamaEmbeddings
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import OllamaLLM
from langchain_core.runnables import RunnablePassthrough
from langchain_community.vectorstores import FAISS
from IPython.display import display, Markdown
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
import warnings
from pytubefix import YouTube
from pytubefix.cli import on_progress
from langchain.docstore.document import Document
from langchain.chains.retrieval_qa.base import RetrievalQA
from IPython.display import clear_output

warnings.filterwarnings('ignore')
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'


In [None]:
url= input("Enter the URL of your YouTube video: ")
yt = YouTube(url, on_progress_callback = on_progress)
print(yt.title)
 
ys = yt.streams.filter(only_audio=True).first()
output = ys.download()

model = WhisperModel("base")
result = ""
for segment in model.transcribe(output)[0]:
   result += segment.text + " "


In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=300)#Chunk overlap is key to avoid splitting text mid context.
document = Document(page_content=result)
chunks = text_splitter.split_documents([document])

print(f"Split into {len(chunks)} chunks") #To controll if everything is working fine
print("First chunk preview:\n", chunks[0].page_content[:300])

In [None]:
# Create embeddings and vector store 
# Make sure you run ollama before running this block
print("Creating embeddings and vector store (this may take a few minutes)...")
embeddings = OllamaEmbeddings(model="nomic-embed-text")
vector_store = FAISS.from_documents(chunks, embeddings)
print("Vector store created successfully")

In [None]:
print("Setting up retriever...")
retriever = vector_store.as_retriever(search_kwargs={"k": 10})  #Maybe increased if computer RAM is powerful

#There is up to 671b models of deepseek-r1 but require high storage and RAM
llm = OllamaLLM(model="deepseek-r1:1.5b", temperature=0.9, verbose=False, top_k=3, top_p=1.0)# Near zero temperature for really factual quizes

# QA chain for RAG
print("Creating QA chain...")
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

In [None]:
 # You can change the base prompt if you wish:
question = "From now on act as a teacher and prepare me a quiz which has factual answers and will improve my knowledge on this context. Have the answers at the end of the full quiz. Generate a 4 question quiz which is about the basics of RAG system."#Persona pattern to decline unexpected outputs
result = qa_chain({"query": question})
clear_output(wait=True)
print("\nAnswer:", result["result"])