In [6]:
import os

os.environ["GROQ_API_KEY"] = "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"

In [7]:
from pytubefix import YouTube
from pytubefix.cli import on_progress
from groq import Groq


In [8]:
def _timestamp_to_seconds(timestamp: str) -> float:
    """Convert SRT timestamp to seconds."""
    if ',' in timestamp:
        timestamp = timestamp.replace(',', '.')

    parts = timestamp.split(':')
    if len(parts) == 3:
        hours, minutes, seconds = parts
        return float(hours) * 3600 + float(minutes) * 60 + float(seconds)
    return 0

def _parse_srt_captions(srt_captions: str):
    """Parse SRT format captions into segments."""
    segments = []
    current_segment = {}

    for line in srt_captions.split('\n'):
        line = line.strip()

        if line.isdigit():  # Segment number
            if current_segment:
                segments.append(current_segment)
            current_segment = {}
        elif '-->' in line:  # Timestamp
            start, end = line.split('-->')
            current_segment['start'] = _timestamp_to_seconds(start.strip())
            current_segment['end'] = _timestamp_to_seconds(end.strip())
        elif line:  # Text content
            if 'text' in current_segment:
                current_segment['text'] += ' ' + line
            else:
                current_segment['text'] = line

    if current_segment:
        segments.append(current_segment)

    return segments


In [10]:
# ... existing code ...

class EducationalAssistant:
    def __init__(self, groq_client):
        self.groq_client = groq_client
        
    def process_youtube_video(self, video_url):
        """Process a YouTube video and extract captions"""
        yt = YouTube(url=video_url)
        segments = None
        
        # Try auto-generated captions first
        if yt.captions and 'a.en' in yt.captions:
            caption = yt.captions['a.en']
            raw_captions = caption.generate_srt_captions()
            segments = _parse_srt_captions(raw_captions)
        # Fall back to manual English captions
        elif yt.captions and 'en' in yt.captions:
            caption = yt.captions['en']
            raw_captions = caption.generate_srt_captions()
            segments = _parse_srt_captions(raw_captions)
            
        return segments
    
    def identify_key_segments_with_llm(self, segments, max_chunk_size=1000):
        """Use LLM to identify important segments in the video"""
        # Combine all text first
        full_text = " ".join(segment['text'] for segment in segments)
        
        prompt = """Analyze this video transcript and identify the most important segments or topics. 
        For each important segment, provide:
        1. A brief title for the segment
        2. The key points or main ideas discussed
        3. Why this segment is important for learning
        
        Format your response as:
        SEGMENT: [title]
        KEY POINTS: [bullet points of main ideas]
        IMPORTANCE: [why this matters]
        
        Transcript:
        {text}
        """
        
        completion = self.groq_client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=[
                {
                    "role": "user",
                    "content": prompt.format(text=full_text)
                }
            ],
            temperature=0.7,
            max_completion_tokens=1024,
            top_p=1,
            stream=False,
        )
        
        # Get LLM analysis
        analysis = completion.choices[0].message.content
        
        # Now match the analyzed segments back to timestamps
        return self._match_segments_to_timestamps(analysis, segments)
    
    def _match_segments_to_timestamps(self, analysis, original_segments):
        """Match the LLM-identified segments with video timestamps"""
        # Split analysis into segments
        llm_segments = analysis.split('SEGMENT:')[1:]  # Skip empty first split
        
        key_segments = []
        for llm_segment in llm_segments:
            # Extract title and content
            lines = llm_segment.strip().split('\n')
            title = lines[0].strip()
            
            # Find matching content in original segments
            for i, segment in enumerate(original_segments):
                if any(keyword.lower() in segment['text'].lower() 
                      for keyword in title.split()):
                    # Found a matching segment
                    key_segments.append({
                        'start': segment['start'],
                        'end': segment['end'],
                        'title': title,
                        'text': segment['text'],
                        'analysis': llm_segment.strip()
                    })
                    break
        
        return key_segments
    
    def generate_questions(self, segment, grade, num_questions=3):
        """Generate questions for a given segment using Groq"""
        prompt = f"""Based on this video segment titled "{segment['title']}", generate {num_questions} quiz questions.
        Consider the following analysis of the segment:
        {segment['analysis']}
        
        Generate questions that test understanding of the key concepts for a grade {grade} student.
        Format each question as:
        Q: [question]
        A: [answer]
        
        Content: {segment['text']}
        Generate questions only based on the content of the segment, do not make up any questions.
        """
        
        completion = self.groq_client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=[
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            temperature=0.7,
            max_completion_tokens=1024,
            top_p=1,
            stream=False,
        )
        
        return completion.choices[0].message.content

# Example usage
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
assistant = EducationalAssistant(client)

# Process video
video_url = "https://www.youtube.com/watch?v=D1Ymc311XS8"
segments = assistant.process_youtube_video(video_url)

grade = 2
num_questions = 3

if segments:
    # Get LLM-identified key segments
    key_segments = assistant.identify_key_segments_with_llm(segments)
    
    # Generate questions for each key segment
    for i, segment in enumerate(key_segments, 1):
        print(f"\nSegment {i}: {segment['title']}")
        print(f"Timestamp: {segment['start']:.1f}s - {segment['end']:.1f}s")
        print("Analysis:")
        print(segment['analysis'])
        print("\nQuestions:")
        questions = assistant.generate_questions(segment, grade, num_questions)
        print(questions)


Segment 1: Introduction to Photosynthesis
Timestamp: 11.2s - 16.9s
Analysis:
Introduction to Photosynthesis
KEY POINTS:
* The speaker introduces the topic of photosynthesis in a casual setting
* The importance of sunlight and vitamin D is mentioned
* The speaker asks the audience if they know about photosynthesis and invites them to learn about it
IMPORTANCE: This segment is important for learning because it sets the tone for the rest of the video and grabs the audience's attention. It also establishes the speaker's informal and conversational style, making the topic more approachable and engaging.

Questions:
Q: What is special about the day mentioned in the video segment?
A: It's a lovely sunny day.

Q: Why is sunlight important, according to the introduction?
A: Sunlight is important, and it also helps our bodies make vitamin D.

Q: What is the speaker inviting the audience to do in the video segment?
A: The speaker is inviting the audience to learn about photosynthesis.

Segment 2

In [None]:
# Add these imports at the top
!pip install moviepy
from moviepy.editor import VideoFileClip
import speech_recognition as sr

# Add this method to the EducationalAssistant class
def process_video_file(self, video_path):
    """Process a non-YouTube video file"""
    # Extract audio
    video = VideoFileClip(video_path)
    audio = video.audio
    
    # Save audio temporarily
    audio_path = "temp_audio.wav"
    audio.write_audiofile(audio_path)
    
    # Initialize speech recognizer
    recognizer = sr.Recognizer()
    
    # Convert audio to text
    with sr.AudioFile(audio_path) as source:
        audio_data = recognizer.record(source)
        text = recognizer.recognize_google(audio_data)
    
    # Clean up
    os.remove(audio_path)
    video.close()
    audio.close()
    
    # Create a single segment with the transcribed text
    segment = {
        'start': 0,
        'end': video.duration,
        'text': text
    }
    
    return [segment]

In [None]:
# For YouTube videos
assistant = EducationalAssistant(groq_client)
segments = assistant.process_youtube_video("https://youtube.com/watch?v=...")

# For other videos
segments = assistant.process_video_file("path/to/video.mp4")

# Generate questions
key_segments = assistant.identify_key_segments(segments)
for segment in key_segments:
    questions = assistant.generate_questions(segment['text'])
    print(questions)

In [None]:
from groq import Groq
import os

audio_path = "/content/test_audio.mp3"

api_key = os.getenv("GROQ_API_KEY")

client = Groq(api_key=api_key)


with open(audio_path, "rb") as file:
    transcription = client.audio.transcriptions.create(
      file=(audio_path, file.read()),
      model="whisper-large-v3",
      response_format="verbose_json",
    )

In [6]:
video_url = "https://www.youtube.com/watch?v=ZcI2B92JlJU"
yt = YouTube(url=video_url)

In [7]:
if yt.captions and 'a.en' in yt.captions:
    caption = yt.captions['a.en']
    raw_captions = caption.generate_srt_captions()
    segments = _parse_srt_captions(raw_captions)

# Fall back to manual English captions
if yt.captions and 'en' in yt.captions:
    caption = yt.captions['en']
    raw_captions = caption.generate_srt_captions()
    segments = _parse_srt_captions(raw_captions)

In [None]:
from groq import Groq

client = Groq()
completion = client.chat.completions.create(
    model="llama-3.3-70b-versatile",
    messages=[
        {
            "role": "user",
            "content": ""
        }
    ],
    temperature=1,
    max_completion_tokens=1024,
    top_p=1,
    stream=True,
    stop=None,
)

for chunk in completion:
    print(chunk.choices[0].delta.content or "", end="")
