<a href="https://colab.research.google.com/github/nAkshat2103/ML-Assignment/blob/main/mashupp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install yt-dlp pydub

Collecting yt-dlp
  Downloading yt_dlp-2024.12.13-py3-none-any.whl.metadata (172 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.1/172.1 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading yt_dlp-2024.12.13-py3-none-any.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m44.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub, yt-dlp
Successfully installed pydub-0.25.1 yt-dlp-2024.12.13


In [3]:
import os
import random
import yt_dlp
from pydub import AudioSegment
from google.colab import files

In [4]:
AUDIO_DIR = '/content/audios'
OUTPUT_DIR = '/content/output'

os.makedirs(AUDIO_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)


In [5]:
def download_audio(video_url, index):
    """Download audio from YouTube video."""
    try:
        print(f"Downloading audio for video {index}: {video_url}")
        ydl_opts = {
            'format': 'bestaudio/best',
            'outtmpl': f'{AUDIO_DIR}/audio_{index}.webm',
            'noplaylist': True,
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([video_url])
        return f'{AUDIO_DIR}/audio_{index}.webm'
    except Exception as e:
        print(f"Error downloading video {index}: {e}")
        return None

In [6]:
def convert_to_wav(input_path):
    """Convert downloaded audio to WAV format using pydub."""
    try:
        audio = AudioSegment.from_file(input_path)
        wav_path = input_path.replace('.webm', '.wav')
        audio.export(wav_path, format="wav")
        return wav_path
    except Exception as e:
        print(f"Error converting {input_path} to WAV: {e}")
        return None

In [7]:
def extract_random_clip(input_path, duration):
    """Extract a random clip of the specified duration from the audio file."""
    try:
        audio = AudioSegment.from_file(input_path)
        start_time = random.randint(0, max(0, len(audio) - duration * 1000))
        return audio[start_time:start_time + duration * 1000]
    except Exception as e:
        print(f"Error extracting clip from {input_path}: {e}")
        return None

In [8]:
def create_mashup(audio_paths, output_filename, clip_duration=5):
    """Create a mashup by concatenating random clips from audio files."""
    try:
        mashup = AudioSegment.silent(duration=0)
        for path in audio_paths:
            clip = extract_random_clip(path, clip_duration)
            if clip:
                mashup += clip

        if len(mashup) > 0:
            output_path = os.path.join(OUTPUT_DIR, output_filename)
            mashup.export(output_path, format="mp3")
            print(f"Mashup created and saved to {output_path}")
            return output_path
        else:
            print("No clips to create a mashup.")
            return None
    except Exception as e:
        print(f"Error creating mashup: {e}")
        return None


In [9]:
def search_videos(singer_name, num_videos):
    """Search YouTube for videos by singer name."""
    try:
        ydl_opts = {
            'quiet': True,
            'default_search': 'ytsearch',
            'format': 'bestaudio',
        }
        search_query = f"{singer_name} official audio OR official music OR official video"
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            result = ydl.extract_info(f"ytsearch{num_videos}:{search_query}", download=False)
            return [entry['webpage_url'] for entry in result['entries'][:num_videos]]
    except Exception as e:
        print(f"Error searching videos: {e}")
        return []

In [10]:
def main(singer_name, num_videos, audio_duration, output_filename):
    if num_videos < 10 or audio_duration < 5:
        print("Number of videos must be greater than 10 and audio duration must be at least 5 seconds.")
        return

    try:
        video_urls = search_videos(singer_name, num_videos)
        if not video_urls:
            print("No videos found. Exiting.")
            return

        audio_paths = []
        for i, video_url in enumerate(video_urls):
            downloaded_file = download_audio(video_url, i + 1)
            if downloaded_file:
                wav_path = convert_to_wav(downloaded_file)
                if wav_path:
                    audio_paths.append(wav_path)

        mashup_path = create_mashup(audio_paths, output_filename, clip_duration=audio_duration)
        if mashup_path:
            print("Mashup created successfully. Downloading...")
            files.download(mashup_path)
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


In [11]:
singer_name = "Sharry Maan"
num_videos = 15
audio_duration = 5
output_filename = "mashup.mp3"

main(singer_name, num_videos, audio_duration, output_filename)

Downloading audio for video 1: https://www.youtube.com/watch?v=BJ-6hjVnwa0
[youtube] Extracting URL: https://www.youtube.com/watch?v=BJ-6hjVnwa0
[youtube] BJ-6hjVnwa0: Downloading webpage
[youtube] BJ-6hjVnwa0: Downloading ios player API JSON
[youtube] BJ-6hjVnwa0: Downloading mweb player API JSON
[youtube] BJ-6hjVnwa0: Downloading m3u8 information
[info] BJ-6hjVnwa0: Downloading 1 format(s): 251
[download] Destination: /content/audios/audio_1.webm
[download] 100% of   21.85MiB in 00:00:01 at 13.21MiB/s  
Downloading audio for video 2: https://www.youtube.com/watch?v=tR6XkXy-gjY
[youtube] Extracting URL: https://www.youtube.com/watch?v=tR6XkXy-gjY
[youtube] tR6XkXy-gjY: Downloading webpage
[youtube] tR6XkXy-gjY: Downloading ios player API JSON
[youtube] tR6XkXy-gjY: Downloading mweb player API JSON
[youtube] tR6XkXy-gjY: Downloading m3u8 information
[info] tR6XkXy-gjY: Downloading 1 format(s): 251
[download] Destination: /content/audios/audio_2.webm
[download] 100% of    3.50MiB in 00:

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>