# LLM Testing

In [3]:
import speech_recognition as sr
from pydub import AudioSegment
from moviepy.editor import VideoFileClip

def extract_audio_from_video(video_path, audio_path):
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(audio_path)

def transcribe_audio_chunk(recognizer, audio_chunk):
    with sr.AudioFile(audio_chunk) as source:
        audio_data = recognizer.record(source)
        try:
            return recognizer.recognize_google(audio_data).lower()
        except sr.UnknownValueError:
            return ""

def find_sentence_times(audio_path, target_sentence, initial_chunk_duration=2000, max_chunk_duration=10000):
    recognizer = sr.Recognizer()
    audio = AudioSegment.from_file(audio_path)
    
    audio_length = len(audio)
    target_sentence = target_sentence.lower()
    target_sentence_words = len(target_sentence.split())
    
    start_time = None
    end_time = None
    
    chunk_duration = initial_chunk_duration
    overlap_duration = initial_chunk_duration // 2  # 50% overlap
    
    while chunk_duration <= max_chunk_duration:
        for i in range(0, audio_length, chunk_duration - overlap_duration):  # Overlapping chunks
            chunk = audio[i:i+chunk_duration]
            chunk.export("chunk.wav", format="wav")
            
            text = transcribe_audio_chunk(recognizer, "chunk.wav")
            
            if target_sentence in text:
                words = text.split()
                sentence_start_index = text.find(target_sentence)
                sentence_end_index = sentence_start_index + len(target_sentence)
                
                start_time = i / 1000.0 + sentence_start_index / len(text) * chunk_duration / 1000.0
                end_time = i / 1000.0 + sentence_end_index / len(text) * chunk_duration / 1000.0
                
                break
        if start_time is not None:
            break
        chunk_duration += 1000  # Increase chunk duration by 1 second if the sentence is not found
    
    return start_time, end_time

def save_video_clip(video_path, start_time, end_time, output_path):
    video = VideoFileClip(video_path)
    clip = video.subclip(start_time, end_time)
    clip.write_videofile(output_path, codec="libx264")

# Define the paths
video_path = "V1.mp4"
audio_path = "audio.wav"
captions = [
    "I love you",
    "Happy Friday"
]

# Extract audio from video
extract_audio_from_video(video_path, audio_path)

# Dictionary to store the paths of the created video clips
clips_paths = {}

# Process each caption
for i, caption in enumerate(captions, 1):
    start_time, end_time = find_sentence_times(audio_path, caption)
    if start_time is not None:
        output_path = f"output_clip_{i}.mp4"
        save_video_clip(video_path, start_time, end_time, output_path)
        clips_paths[f"caption_{i}"] = output_path
        print(f"The sentence '{caption}' starts at {start_time} seconds and ends at {end_time} seconds. Saved to {output_path}")
    else:
        print(f"The sentence '{caption}' was not found in the audio.")

# Print the dictionary with paths
print(clips_paths)


MoviePy - Writing audio in audio.wav


                                                                     

MoviePy - Done.
Moviepy - Building video output_clip_1.mp4.
MoviePy - Writing audio in output_clip_1TEMP_MPY_wvf_snd.mp3


                                                                 

MoviePy - Done.
Moviepy - Writing video output_clip_1.mp4



                                                            

Moviepy - Done !
Moviepy - video ready output_clip_1.mp4
The sentence 'I love you' starts at 0.0 seconds and ends at 2.0 seconds. Saved to output_clip_1.mp4
Moviepy - Building video output_clip_2.mp4.
MoviePy - Writing audio in output_clip_2TEMP_MPY_wvf_snd.mp3


                                                               

MoviePy - Done.
Moviepy - Writing video output_clip_2.mp4



                                                            

Moviepy - Done !
Moviepy - video ready output_clip_2.mp4
The sentence 'Happy Friday' starts at 38.0 seconds and ends at 40.0 seconds. Saved to output_clip_2.mp4
{'caption_1': 'output_clip_1.mp4', 'caption_2': 'output_clip_2.mp4'}
