In [22]:
from langchain_community.document_loaders import YoutubeLoader
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0.0,
    max_retries=2,
    # other params...
)


In [23]:
a = llm.invoke("who is the prime minister of india")
print(a)

content='As of my cut-off knowledge in December 2023, the Prime Minister of India is Narendra Modi. He has been serving as the Prime Minister of India since May 26, 2014. However, please note that my information may not be up-to-date, and I recommend verifying the current Prime Minister of India through a reliable news source for the most recent information.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 76, 'prompt_tokens': 42, 'total_tokens': 118, 'completion_time': 0.101333333, 'prompt_time': 0.003760913, 'queue_time': 0.017284113, 'total_time': 0.105094246}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_9cb648b966', 'finish_reason': 'stop', 'logprobs': None} id='run-c21dfe3e-14ce-426e-9465-7acc3927bc89-0' usage_metadata={'input_tokens': 42, 'output_tokens': 76, 'total_tokens': 118}


In [24]:
from langchain_community.document_loaders import YoutubeLoader

youtube_url = "https://youtu.be/zIwLWfaAg-8?si=qsN6eA17rsIqJl8o"
loader = YoutubeLoader.from_youtube_url(youtube_url=youtube_url, add_video_info=False)
docs = loader.load()

In [25]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)
docs = text_splitter.split_documents(docs)

In [26]:
docs

[Document(metadata={'source': 'zIwLWfaAg-8'}, page_content="Chris Anderson:\nElon, hey, welcome back to TED. It's great to have you here. Elon Musk: Thanks for having me. CA: So, in the next half hour or so, we're going to spend some time exploring your vision for what\nan exciting future might look like, which I guess makes\nthe first question a little ironic: Why are you boring? EM: Yeah. I ask myself that frequently. We're trying to dig a hole under LA, and this is to create the beginning of what will hopefully\nbe a 3D network of tunnels to alleviate congestion. So right now, one of the most\nsoul-destroying things is traffic. It affects people\nin every part of the world. It takes away so much of your life. It's horrible. It's particularly horrible in LA. (Laughter) CA: I think you've brought with you the first visualization\nthat's been shown of this. Can I show this? EM: Yeah, absolutely.\nSo this is the first time -- Just to show what we're talking about. So a couple of key thi

In [27]:
for doc in docs:
    print(doc.page_content)


Chris Anderson:
Elon, hey, welcome back to TED. It's great to have you here. Elon Musk: Thanks for having me. CA: So, in the next half hour or so, we're going to spend some time exploring your vision for what
an exciting future might look like, which I guess makes
the first question a little ironic: Why are you boring? EM: Yeah. I ask myself that frequently. We're trying to dig a hole under LA, and this is to create the beginning of what will hopefully
be a 3D network of tunnels to alleviate congestion. So right now, one of the most
soul-destroying things is traffic. It affects people
in every part of the world. It takes away so much of your life. It's horrible. It's particularly horrible in LA. (Laughter) CA: I think you've brought with you the first visualization
that's been shown of this. Can I show this? EM: Yeah, absolutely.
So this is the first time -- Just to show what we're talking about. So a couple of key things
that are important in having a 3D tunnel network. First of all, 

In [None]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

class CharacterInfo(BaseModel):
    """
    Structured model for character information in a story.
    """
    name: str = Field(description="Full name of the character")
    storyTitle: str = Field(description="Title of the story")
    summary: str = Field(description="Brief summary of the character's story")
    relations: list = Field(description="List of character's relationships with other characters in story")
    characterType: str = Field(description="Character's role in the story")

def extract_character_info(story_text: str, character_name: str) -> dict:
    """
    Extract detailed information about a character from a story.
    """
    output_parser = JsonOutputParser(pydantic_object=CharacterInfo)
    prompt = PromptTemplate(
        template="""
        Analyze the following story and extract comprehensive information about the character '{character_name}'.
        
        Story:
        {story_text}
        
        {format_instructions}
        
        Provide a detailed JSON response with:
        - Character's full name
        - Story title
        - Character's role and journey summary
        - The character's relationships with other characters in the story for example : 
        ("name": "Arya Stark", "relation": "Sister" ,
          "name": "Eddard Stark", "relation": "Father" )
        - Character type (protagonist, antagonist, side character, etc.)
        
        Dont add any extra field to the json format. If the character is not found, return an empty JSON object.
        """,
        input_variables=["story_text", "character_name"],
        partial_variables={"format_instructions": output_parser.get_format_instructions()}
    )
    
    llm = ChatGroq(
        temperature=0.7,  # Allow some creativity in summarization
        model_name="llama3-8b-8192",
        api_key=os.getenv('GROQ_API_KEY')
    )
    
    chain = prompt | llm | output_parser
    
    try:
        result = chain.invoke({"story_text": story_text, "character_name": character_name})
        return result
    except Exception as e:
        print(f"Error in character information extraction: {e}")
        return {}

In [21]:
template = f"""
You are a helpful assistant programmed to generate questions based on any text provided. For every chunk of text you receive, you're tasked with designing 10 distinct questions. Each of these questions will be accompanied by 3 possible answers: one correct answer and two incorrect ones. 
provided text is as follows:
{docs}

For clarity and ease of processing, structure your response in a way that emulates a Python list of lists. 

Your output should be shaped as follows:

1. An outer list that contains 5 inner lists.
2. Each inner list represents a set of question and answers, and contains exactly 4 strings in this order:
- The generated question.
- The correct answer.
- The first incorrect answer.
- The second incorrect answer.

Your output should mirror this structure:
[
    ["Generated Question 1", "Correct Answer 1", "Incorrect Answer 1.1", "Incorrect Answer 1.2"],
    ["Generated Question 2", "Correct Answer 2", "Incorrect Answer 2.1", "Incorrect Answer 2.2"],
    ...
]

It is crucial that you adhere to this format as it's optimized for further Python processing.

"""

a = llm.invoke(template).content

print(a)

Here are the 10 distinct questions with 3 possible answers each, based on the provided text:

[
    ["What is the main goal of Elon Musk's tunneling project?", "To alleviate traffic congestion in cities", "To create a network of tunnels for underground living", "To build a new transportation system for space travel"],
    ["Why is digging tunnels expensive?", "Because of the high cost of machinery and labor", "Because of the difficulty of digging through hard rock", "Because of the need to build multiple entrances and exits"],
    ["What is the key to achieving a tenfold improvement in the cost per mile of tunneling?", "Cutting the tunnel diameter by a factor of two or more", "Using more powerful tunneling machines", "Building tunnels at a shallower depth"],
    ["What is the main advantage of using electric skates in the tunnel network?", "They are faster than traditional cars", "They are more environmentally friendly", "They can operate at high speeds without the need for ventilation

In [None]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List, Tuple
from langchain_google_genai import ChatGoogleGenerativeAI

class QuizQuestion(BaseModel):
    """
    Structured model for quiz questions and answers.
    """
    question: str = Field(description="The quiz question")
    option1: str = Field(description="First option")
    option2: str = Field(description="Second option")
    option3: str = Field(description="third option")
    option4: str = Field(description="Fourth option")
    correct_answer: str = Field(description="The correct answer to the question")

class QuizSet(BaseModel):
    """
    Collection of quiz questions.
    """
    questions: List[QuizQuestion] = Field(description="List of quiz questions with their answers")

def generate_quiz_questions(docs: str) -> List[List[str]]:
    """
    Generate quiz questions from provided text documentation.
    Returns a list of questions, each with one correct and two incorrect answers.
    """
    output_parser = JsonOutputParser(pydantic_object=QuizSet)
    
    prompt = PromptTemplate(
        template="""
        You are a quiz generator tasked with creating questions based on the following text.
        Generate 5 distinct questions, each with one correct answer and two plausible but incorrect answers.
        
        Text to analyze:
        {input_text}
        
        {format_instructions}
        
        Generate questions that:
        - Test understanding of key concepts
        - Have clear, unambiguous correct answers
        - Include plausible but clearly incorrect alternative answers
        - Cover different aspects of the provided text
        
        Return the questions in a structured format where each question is accompanied by its correct answer
        and two incorrect alternatives.
        """,
        input_variables=["input_text"],
        partial_variables={"format_instructions": output_parser.get_format_instructions()}
    )
    
    # llm = ChatGroq(
    #     temperature=0.7,  # Allow some creativity in question generation
    #     model_name="llama3-8b-8192",
    #     api_key=os.getenv('GROQ_API_KEY')
    # )


    llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
    
    chain = prompt | llm | output_parser
    
    try:
        # Generate questions using the LLM
        result = chain.invoke({"input_text": docs})
        
        # Convert the structured output to the required list format
        # quiz_list = []
        # for q in result.questions:
        #     quiz_list.append([
        #         q.question,
        #         q.correct_answer,
        #         q.incorrect_answer1,
        #         q.incorrect_answer2
        #     ])
        
        return result
    
    except Exception as e:
        print(f"Error in quiz generation: {e}")
        return []


In [39]:
a = generate_quiz_questions(docs)