In [100]:
import ollama
import re
import json

In [101]:
def parse_llama_json(text):
    # Extract JSON part from the generated text
    start_idx = text.find('[')
    end_idx = text.rfind(']') + 1

    if start_idx == -1 or end_idx == -1:
        raise ValueError("No valid JSON found in the text")

    json_part = text[start_idx:end_idx]

    # Parse the extracted JSON
    try:
        parsed_data = json.loads(json_part)
        return parsed_data
    except json.JSONDecodeError as e:
        raise ValueError(f"Failed to parse JSON: {e}")

In [102]:
def generate_questions_from_transcript(transcript, model='llama3.2'):
    task_description = """
        You are an AI tasked with generating multiple-choice questions (MCQs) from a given transcript. 
        Your goal is to:
        1. Identify important concepts, events, or details in the transcript.
        2. Frame questions in a simple and clear manner based on these concepts.
        3. Provide 4 answer options for each question, ensuring one is correct and the others are plausible but incorrect.
        4. Specify the index (0-based) of the correct answer for each question.
        5. Format your response as a JSON list where each entry follows the structure:
        { "question": "<question_text>", "options": ["<option1>", "<option2>", "<option3>", "<option4>"], "correct_answer": <index_of_correct_option> }

        Example output:
        [
            {
                "question": "What is the capital of France?",
                "options": ["Berlin", "Madrid", "Paris", "Rome"],
                "correct_answer": 2
            },
            {
                "question": "Which planet is known as the Red Planet?",
                "options": ["Earth", "Mars", "Jupiter", "Venus"],
                "correct_answer": 1
            },
            {
                "question": "What is the chemical symbol for water?",
                "options": ["H2O", "O2", "CO2", "NaCl"],
                "correct_answer": 0
            }
        ]
        Your input will be a transcript, and you will generate 3 questions based on its content in this exact format.
    """

    prompt = task_description + '\n Here is the transcript content: \n' + str(transcript) + 'Generate 3 questions as a JSON list, each question following the specified json format { "question": "<question_text>", "options": ["<option1>", "<option2>", "<option3>", "<option4>"], "correct_answer": <index_of_correct_option> }.'

    response = ollama.generate(model=model, prompt=prompt)

    llama_output = generate_questions_from_transcript(response["response"])

    return eval(parse_llama_json(llama_output['response']))

In [103]:
from pytubefix import YouTube
from pytubefix.cli import on_progress
from pydub import AudioSegment
import whisper
import os
import uuid

# Utility function to generate a transcript
def generate_transcript_from_url(url, timestamps=None):
    unique_id = str(uuid.uuid4())[:8]  # Shorten UUID for brevity
    m4a_file = f"{unique_id}"
    wav_file = f"{unique_id}.wav"

    try:
        # Step 1: Download audio from YouTube
        yt = YouTube(url, on_progress_callback=on_progress)
        print(f"Downloading audio for video: {yt.title}")
        ys = yt.streams.get_audio_only()
        ys.download(filename=m4a_file)

        # Step 2: Convert .m4a to .wav
        audio = AudioSegment.from_file(f"{m4a_file}.m4a", format="m4a")
        audio.export(wav_file, format="wav")
        print(f"Conversion complete: {wav_file}")

        # Step 3: Handle timestamps if not provided
        if timestamps is None:
            duration = len(audio) / 1000  # Convert milliseconds to seconds
            timestamps = [i * (duration / 2) * 1000 for i in range(1, 3)]  # Divide into 10 equal parts, convert to ms

        # Step 4: Transcribe audio using Whisper
        transcripts = []
        questions = []
        for i in range(len(timestamps)):
            start_time = timestamps[i]
            end_time = timestamps[i + 1] if i + 1 < len(timestamps) else len(audio)
            segment = audio[start_time:end_time]

            # Save the segment to a temporary file
            segment_file = f"{unique_id}_segment_{i}.wav"
            segment.export(segment_file, format="wav")
            print(f"Segment {i + 1} saved: {segment_file}")

            # Step 5: Transcribe the segment using Whisper model
            model = whisper.load_model("base")
            result = model.transcribe(segment_file)
            transcripts.append(f"{result['text']}")

            for ques in generate_questions_from_transcript(result['text']):
                ques['segment'] = i
                questions.append(ques)

            # Delete the segment file
            os.remove(segment_file)

        # Step 6: Clean up temporary files
        os.remove(f"{m4a_file}.m4a")
        os.remove(wav_file)
        print(f"Temporary files deleted: {m4a_file}.m4a, {wav_file}")

        return transcripts, questions
    except Exception as e:
        print(f"Error during transcription: {e}")
        return None

In [104]:
generate_transcript_from_url("https://www.youtube.com/watch?v=lV2QjJrAu8w&list=WL&index=27")

Downloading audio for video: Dr. APJ Abdul Kalam taking oath as the President of India
Conversion complete: 69c74b33.wav███████████████| 100.0%
Segment 1 saved: 69c74b33_segment_0.wav


  checkpoint = torch.load(fp, map_location=device)


KeyboardInterrupt: 