In [None]:
from youtube_transcript_api import YouTubeTranscriptApi

In [None]:
video_id="dDP36_ZBs6A"
t=YouTubeTranscriptApi.get_transcript(video_id)

In [None]:
transcript = " ".join([segment['text'] for segment in t])
print(transcript)

In [None]:
from dotenv import load_dotenv
import google.generativeai as genai
from google.generativeai import GenerationConfig
import json
import os
import time
import random

# created a schema/format to give to the llm ki aise format ki json me data return kar
import typing_extensions as typing
class QA_data(typing.TypedDict):
    question: str
    answers: list[str]
    correct_answer: str


# Load environment variables, particularly GEMINI_KEY
load_dotenv()

# Configure Gemini API
genai.configure(api_key=os.getenv("GEMINI_KEY"))
config = GenerationConfig(temperature=0.9, response_mime_type="application/json", response_schema=QA_data)




In [None]:
def split_text_into_chunks(text):
    num_questions = random.randint(8, 10) # you can change the number of questions you want to generate
    print(f"Generating {num_questions} questions from the transcript...")
    max_words = len(text.split()) // num_questions
    words = text.split()
    chunks = []
    for i in range(0, len(words), max_words):
        chunk = ' '.join(words[i:i + max_words])
        chunks.append(chunk.strip())
    return chunks



def generate_questions_and_options(chunk):
    try:
        prompt = f'''Generate a question from the following text chunk:\n\n{chunk}\n\nProvide 4 options, with only 1 correct option.
                    Format the output in a dictionary like such'''
        
        # Call the Gemini API
        response = genai.GenerativeModel("gemini-1.5-flash",
                                         system_instruction="You are an expert question maker and quizzer and need to parse some transcript chunks to generate the best questions possible",
                                         generation_config=config)
        result = response.generate_content(prompt)
        
        # parsed the response object
        dict_to_return = json.loads(result.parts[0].text)
        return dict_to_return

    except Exception as e:
        print(f"Error during API request: {e}")
        return "Error generating question and options.", [], ""


In [None]:

# Sample text transcript
text = transcript

# Split text into chunks

chunks = split_text_into_chunks(text)
questions_data = []

# Process each chunk
for i, chunk in enumerate(chunks):
    print(f"Chunk {i + 1}:\n{chunk}\n")
    question_data = generate_questions_and_options(chunk)

    # wrote the index of the chunk as well before appending
    question_data["index"] = i
    questions_data.append(question_data)
    # break
# Save to JSON file
with open("questions_data.json", "w") as json_file:
    json.dump(questions_data, json_file, indent=4)

print("Collected data has been saved to questions_data.json")