<a href="https://colab.research.google.com/github/sakaars/Data-Analytics/blob/main/Join%20Video%20using%20Transcript_By_Sakaar.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# Step 1: Install Necessary Libraries
!pip install yt-dlp openai-whisper moviepy fuzzywuzzy
!apt-get install -y ffmpeg

# Step 2: Mount Google Drive (Optional: Save Outputs Persistently)
from google.colab import drive
drive.mount('/content/drive')

# Step 3: Import Required Libraries
import os
import subprocess
from moviepy.video.io.VideoFileClip import VideoFileClip
from moviepy.video.compositing.concatenate import concatenate_videoclips
import whisper
from fuzzywuzzy import fuzz
from google.colab import files

# Step 4: Define Helper Functions
def download_video(youtube_url, output_path="video.mp4"):
    """
    Download the video using yt-dlp.
    """
    try:
        command = [
            "yt-dlp",
            "--format", "mp4",
            "--output", output_path,
            youtube_url,
        ]
        subprocess.run(command, check=True)
        print(f"Video downloaded successfully: {output_path}")
    except subprocess.CalledProcessError as e:
        print(f"Error downloading video: {e}")
        raise

def transcribe_audio(audio_path):
    """
    Generate a transcript from the audio file using Whisper.
    """
    try:
        model = whisper.load_model("base")
        result = model.transcribe(audio_path)
        print("Transcription completed.")
        return result["text"], result["segments"]
    except Exception as e:
        print(f"Error transcribing audio: {e}")
        raise

def extract_video_segment(input_video, start_time, end_time, output_video):
    """
    Extract a specific segment from a video.
    """
    command = [
        "ffmpeg",
        "-i", input_video,
        "-ss", str(start_time),
        "-to", str(end_time),
        "-c", "copy",
        output_video,
    ]
    try:
        subprocess.run(command, check=True)
        print(f"Extracted segment saved to {output_video}")
    except subprocess.CalledProcessError as e:
        print(f"Error extracting video segment: {e}")
        raise

def fuzzy_find_text_segment(segments, search_text):
    """
    Use fuzzy matching to find the best match for the input text in the transcript.
    """
    best_match = None
    highest_score = 0
    for segment in segments:
        segment_text = segment["text"]
        score = fuzz.ratio(search_text.lower(), segment_text.lower())
        if score > highest_score:
            highest_score = score
            best_match = segment

    if best_match and highest_score > 70:  # 70% match threshold
        return best_match["start"], best_match["end"], best_match["text"]
    else:
        return None, None, None

def concatenate_clips(segment_files, output_file="final_video.mp4"):
    """
    Concatenate selected video segments into a final video.
    """
    try:
        clips = [VideoFileClip(segment) for segment in segment_files]
        final_clip = concatenate_videoclips(clips, method="compose")
        final_clip.write_videofile(output_file, codec="libx264")
        print(f"Final video saved: {output_file}")
    except Exception as e:
        print(f"Error during concatenation: {e}")
        raise

# Step 5: Main Workflow
def main():
    # Create folders for intermediate files
    os.makedirs("/content/videos", exist_ok=True)
    os.makedirs("/content/segments", exist_ok=True)

    # Part 1: Video Input Selection
    num_videos = int(input("How many videos do you want to process? "))
    video_files = []

    for i in range(num_videos):
        print(f"\nVideo {i + 1}: Do you want to upload a file or use a YouTube URL?")
        choice = input("Type 'upload' to upload a file or 'url' for a YouTube link: ").strip().lower()

        if choice == 'upload':
            print(f"Upload video file {i + 1}")
            uploaded = files.upload()
            uploaded_file = next(iter(uploaded.keys()))  # Get the first uploaded file
            video_path = f"/content/videos/{uploaded_file}"
            os.rename(uploaded_file, video_path)  # Move file to videos folder
            video_files.append(video_path)
            print(f"File uploaded and saved as {video_path}")
        elif choice == 'url':
            youtube_url = input(f"Enter the YouTube URL for video {i + 1}: ").strip()
            video_path = f"/content/videos/video_{i + 1}.mp4"
            print(f"\nDownloading video {i + 1}...")
            download_video(youtube_url, video_path)
            video_files.append(video_path)
        else:
            print("Invalid input. Please try again.")
            i -= 1  # Retry current video selection

    # Part 2: Process Each Video
    segments = []
    for i, video_file in enumerate(video_files):
        print(f"\nProcessing video {i + 1}...")
        # Generate transcript
        print(f"\nGenerating transcript for video {i + 1}...")
        transcript, segments_data = transcribe_audio(video_file)
        print(f"Transcript for video {i + 1}:\n{transcript}\n")

        # Extract segments based on user input
        while True:
            search_text = input(f"Enter the text you want to extract from video {i + 1} (leave empty to stop): ")
            if not search_text:
                break

            # Find segment using fuzzy matching
            start_time, end_time, matched_text = fuzzy_find_text_segment(segments_data, search_text)
            if start_time is None or end_time is None:
                print(f"Could not find a good match for the specified text in video {i + 1}. Skipping this portion.")
            else:
                print(f"Found match: {matched_text}")
                print(f"Start Time: {start_time}s, End Time: {end_time}s")

                # Confirm extraction
                confirm = input(f"Do you want to extract this segment? (y/n): ")
                if confirm.lower() == 'y':
                    # Extract segment
                    segment_file = f"/content/segments/segment_{i + 1}_{search_text[:5]}.mp4"
                    extract_video_segment(video_file, start_time, end_time, segment_file)
                    segments.append(segment_file)

    # Part 3: Concatenate All Selected Segments
    print("\nConcatenating selected video segments...")
    if segments:
        final_video_path = "/content/final_video.mp4"
        concatenate_clips(segments, final_video_path)

        # Optional: Save to Google Drive
        save_to_drive = input("Do you want to save the final video to Google Drive? (y/n): ")
        if save_to_drive.lower() == 'y':
            from shutil import copy2
            copy2(final_video_path, "/content/drive/My Drive/final_video.mp4")
            print("Final video saved to Google Drive.")
    else:
        print("No segments selected for concatenation.")

# Run the main workflow
main()


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
How many videos do you want to process? 1

Video 1: Do you want to upload a file or use a YouTube URL?
Type 'upload' to upload a file or 'url' for a YouTube link: url
Enter the YouTube URL for video 1: https://youtu.be/WLQ6HyFbfKU?si=RRgPG9a4P5MOg3IM

Downloading video 1...
Video downloaded successfully: /content/videos/video_1.mp4

Processing video 1...

Generating transcript for video 1...


  checkpoint = torch.load(fp, map_location=device)



Transcription completed.
Transcript for video 1:
 Hey, this is Mr. B from Lamborghini for me and holy s**t, I'm at Montreal. Beautiful, beautiful cars all over the place. This is heaven for all Lamborghini owners. Look at that. Look at how beautiful this is. And there's a Lamborghini Veneno. Yeah, someone's out there calling their... ...if your kuntages... My goodness, this is just a toy store here. Special thanks to Lamborghini Montreal. Good invite to check out the place being in Canada for the first time. Oh my goodness. That is crazy beautiful. Look at that. I think this is the Ascenza. Yeah, I think this is the Ascenza. So rare. But not as rare as this. De Veneno. Here we have here the Lamborghini Veneno. This is just crazy. One of three and I'm seeing one in person for the first time. Oh god, it's so gorgeous. I can't believe it. One of three and it's right here in front of me. It's just an honor seeing this in person. God. Beautiful design. Timeless. Timeless, timeless, timeless



MoviePy - Done.
Moviepy - Writing video /content/final_video.mp4





Moviepy - Done !
Moviepy - video ready /content/final_video.mp4
Final video saved: /content/final_video.mp4
Do you want to save the final video to Google Drive? (y/n): y
Final video saved to Google Drive.
