In [2]:
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace, HuggingFaceEmbeddings
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
load_dotenv()

True

In [3]:
video_id =  "MdeQMVBuGgY"    #podcast video 
try:
    transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
    text_list=[]
    for item in transcript:
        text_list.append(item['text'])# Extract text from each segment of the transcript
    full_caption= " ".join(text_list)# Combine all text segments into a single string
    # print(combined_text)

except TranscriptsDisabled:
    print("Transcript is disabled for this video.")

In [None]:
#Splitting transcript into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap= 300
)
full_caption_chunks = text_splitter.create_documents([full_caption])
print(len(full_caption_chunks))

265


In [5]:
embedding = HuggingFaceEmbeddings()
vectorstore = FAISS.from_documents(
    full_caption_chunks,           # first argument: list of Document objects ALWAYS
    embedding         # second artument: embedding model ALWAYS
)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
retriever= vectorstore.as_retriever(
    search_type= 'similarity',
    search_kwargs= {'k': 4}
)

In [51]:
query = "is vijay malya scam is true or not?"

In [52]:
retrievered_answers = retriever.invoke(query)
print(retrievered_answers)

[Document(id='a6722e15-628d-4cac-9b7f-dc335e30e8e2', metadata={}, page_content="If I am assured, absolutely. I will think about it seriously. But [Applause] this episode is not about glorifying a fugitive nor it is about justifying any wrongs. It's about asking the hard questions to the man at the center of one of the India's most controversial financial stories. After 9 years of complete media silence, Vijay Malia chose our podcast figuring out to speak publicly for the first time. I didn't do this podcast to celebrate him. I did it because I was curious. I read his tweets. I heard the headlines, but I never heard the full side of the story because the tweets that he makes are different than what the headlines talk about him. There's a mismatch. As a podcaster, my job is to ask every uncomfortable question, and I did. Vijay Malia claims he never committed fraud. He says he wanted to pay back the loans, that his properties were seized and sold, and that he's being framed. He also says 

In [60]:
def format_docs(retrievered_answers):
    joined_retrieved_answer = ' '.join([doc.page_content for doc in retrievered_answers])
    return joined_retrieved_answer


In [61]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [62]:
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.
      Context: {context},
      Question: {question}
    """,
    input_variables = {"context", "question"}
)

In [63]:
parallel_chain = RunnableParallel({
    "question": RunnablePassthrough(),
    "context": retriever | RunnableLambda(format_docs)
}
)

In [64]:
llm = HuggingFaceEndpoint(
    repo_id= "google/gemma-2-2b-it",
    task= "text-generation"
)
model  =  ChatHuggingFace(llm =llm)

In [65]:
parser = StrOutputParser()

In [67]:
final_chain = parallel_chain | prompt | model | parser
final_result = final_chain.invoke(query)

In [68]:
print(final_result)

Whether Vijay Malia's actions constitute a scam is a complex matter that the podcast aims to explore. 

The podcast presents an exploration of the controversies surrounding Malia,  pursued through:

* Malia's claims: He denies committing fraud and alleges he was framed and wants to face the consequences of its. 
* The evidence:  The podcast presents contradictory evidence regarding the amounts borrowed by Malia, the alleged recovery by the Indian government, and a gap between initial claims and presented evidence.  

The podcast underscores the importance of evaluating all aspects of the case and avoiding knee-jerk judgments. 


Let me know if you'd like me to summarize any other points from the transcript. 

