In [1]:
# !pip install yt-dlp

In [2]:
# Download Video

In [1]:
from yt_dlp import YoutubeDL
import os

def download_youtube_audio(url, output_path=".", is_playlist=False):
    """
    Downloads audio from a YouTube video or playlist and converts it to MP3 (320kbps).

    Args:
        url (str): YouTube video or playlist URL.
        output_path (str): Output directory (for playlist) or filename path (for single video).
        is_playlist (bool): Set True if URL is a playlist.
    Returns:
        str: Path to the downloaded MP3 file or directory.
    """
    try:
        if is_playlist:
            os.makedirs(output_path, exist_ok=True)
            outtmpl = os.path.join(output_path, '%(playlist_index)s.%(ext)s')
        else:
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            if not output_path.endswith('.mp3'):
                output_path += '.%(ext)s'
            outtmpl = output_path

        ydl_opts = {
            'format': 'bestaudio/best',
            'cookiefile': 'cookies/yt_cookies.txt',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '320',
            }],
            'outtmpl': outtmpl,
            'noplaylist': not is_playlist,
        }

        with YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        return output_path if not is_playlist else os.path.abspath(output_path)

    except Exception as e:
        raise RuntimeError(f"Error downloading/converting: {e}")

In [62]:
# from yt_dlp import YoutubeDL

# def get_all_video_links(channel_url):
#     """
#     Fetch all video URLs from a YouTube channel using yt-dlp.
    
#     Args:
#         channel_url (str): URL of the YouTube channel (e.g., https://www.youtube.com/@ChannelName or /c/ or /channel/ID).
    
#     Returns:
#         list[str]: List of video URLs.
#     """
#     # try:
#     ydl_opts = {
#         'cookiefile': 'cookies/yt_cookies.txt',
#         'quiet': True,
#         'extract_flat': True,  # Do not download, just get metadata
#         'force_generic_extractor': False,
#         'skip_download': True,
#     }

#     with YoutubeDL(ydl_opts) as ydl:
#         result = ydl.extract_info(channel_url, download=False)
#         if 'entries' in result:
#             return [entry['url'] if entry['url'].startswith("http") else f"https://www.youtube.com/watch?v={entry['id']}" for entry in result['entries']]
#         else:
#             return []

#     # except Exception as e:
#     #     print(f"Error fetching video links: {e}")
#     #     return []


In [2]:
!ls cookies

cookies.txt  yt_cookies.txt


In [3]:
import random
import string
import time

def generate_random_filename(extension=".wav", length=8):
    timestamp = int(time.time())
    random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=length))
    return f"{random_str}_{timestamp}"

In [4]:
mp3_file = download_youtube_audio("https://www.youtube.com/watch?v=e7hwJrMpz_U", output_path=f"downloads/{generate_random_filename()}", is_playlist=False)

[youtube] Extracting URL: https://www.youtube.com/watch?v=e7hwJrMpz_U
[youtube] e7hwJrMpz_U: Downloading webpage
[youtube] e7hwJrMpz_U: Downloading tv client config
[youtube] e7hwJrMpz_U: Downloading player 20830619
[youtube] e7hwJrMpz_U: Downloading tv player API JSON
[info] e7hwJrMpz_U: Downloading 1 format(s): 251
[download] Destination: downloads/bs65mdqf_1743316334.webm
[download] 100% of   27.42MiB in 00:00:00 at 48.06MiB/s    
[ExtractAudio] Destination: downloads/bs65mdqf_1743316334.mp3
Deleting original file downloads/bs65mdqf_1743316334.webm (pass -k to keep)


In [5]:
# !pip install demucs

In [5]:
!ls downloads

bs65mdqf_1743316334.mp3  hhr01zvj_1743315022.mp3


In [4]:
# !demucs --model=htdemucs --two-stems=vocals 

In [10]:
# !demucs --list

In [11]:
!pip install pydub

Collecting pydub
  Using cached pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
[0m

In [12]:
from pydub import AudioSegment

# Load your audio
audio = AudioSegment.from_file("downloads/bs65mdqf_1743316334.mp3")

# Define time range in milliseconds
start_time = 60 * 1000   # 1 minute
end_time = 120 * 1000    # 2 minutes

# Slice the audio
segment = audio[start_time:end_time]

# Export the sliced segment
segment.export("segment_1min_to_2min.mp3", format="mp3")

print("Segment saved as segment_1min_to_2min.mp3")

Segment saved as segment_1min_to_2min.mp3


In [16]:
!demucs -n mdx_extra_q -d cuda \
  --two-stems=vocals \
  -o ./clean_vocals \
  --mp3 \
  segment_1min_to_2min.mp3

Downloading: "https://dl.fbaipublicfiles.com/demucs/mdx_final/464b36d7-e5a9386e.th" to /root/.cache/torch/hub/checkpoints/464b36d7-e5a9386e.th
100%|███████████████████████████████████████| 37.1M/37.1M [00:00<00:00, 107MB/s]
Downloading: "https://dl.fbaipublicfiles.com/demucs/mdx_final/14fc6a69-a89dd0ee.th" to /root/.cache/torch/hub/checkpoints/14fc6a69-a89dd0ee.th
100%|███████████████████████████████████████| 36.7M/36.7M [00:00<00:00, 108MB/s]
Downloading: "https://dl.fbaipublicfiles.com/demucs/mdx_final/7fd6ef75-a905dd85.th" to /root/.cache/torch/hub/checkpoints/7fd6ef75-a905dd85.th
100%|███████████████████████████████████████| 37.6M/37.6M [00:00<00:00, 107MB/s]
Selected model is a bag of 4 models. You will see that many progress bars per track.
Separated tracks will be stored in /root/tts_datamaker/clean_vocals/mdx_extra_q
Separating track segment_1min_to_2min.mp3
100%|██████████████████████████████████████████████████████████████████████████| 66.0/66.0 [00:00<00:00, 71.44seconds/s]


In [15]:
# !pip install diffq

In [19]:
# !pip install deepfilternet


In [23]:
!ls clean_vocals/mdx_extra_q/segment_1min_to_2min/vocals.mp3

clean_vocals/mdx_extra_q/segment_1min_to_2min/vocals.mp3


In [24]:
import os
import librosa
import soundfile as sf

def convert_mp3_to_16k_wav(input_mp3_path: str, output_dir: str, output_filename: str = None):
    """
    Converts an MP3 file to 16kHz WAV format and saves it in the specified directory.

    Args:
        input_mp3_path (str): Path to the input MP3 file.
        output_dir (str): Directory where the output WAV will be saved.
        output_filename (str, optional): Custom name for the output file (without extension).
                                         If not provided, uses the input file name.
    Returns:
        str: Path to the saved WAV file.
    """
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Use input file name if custom name is not provided
    if output_filename is None:
        output_filename = os.path.splitext(os.path.basename(input_mp3_path))[0]

    # Define output path
    output_path = os.path.join(output_dir, f"{output_filename}_16k.wav")

    # Load and resample to 16 kHz
    audio, sr = librosa.load(input_mp3_path, sr=16000)

    # Save as WAV
    sf.write(output_path, audio, samplerate=16000)

    print(f"✅ Converted and saved: {output_path}")
    return output_path


In [25]:
convert_mp3_to_16k_wav('clean_vocals/mdx_extra_q/segment_1min_to_2min/vocals.mp3', 'clean_waves')

✅ Converted and saved: clean_waves/vocals_16k.wav


'clean_waves/vocals_16k.wav'

In [6]:
# !deepFilter -i clean_waves/vocals_16k.wav -o clean_final.wav


In [5]:
# !deepFilter clean_waves/vocals_16k.wav --output-dir filter_waves

In [32]:
import os
import subprocess

def run_deepfilter_with_output_dir(input_wav_path: str, output_dir: str):
    """
    Runs DeepFilterNet (via CLI) on a WAV file and saves the enhanced file in a given output directory.

    Args:
        input_wav_path (str): Path to the input WAV file.
        output_dir (str): Directory to store the enhanced WAV output.

    Returns:
        str: Path to the enhanced WAV file.
    """
    os.makedirs(output_dir, exist_ok=True)

    # Run deepFilter command with custom output directory
    command = [
        "deepFilter",
        input_wav_path,
        "--output-dir", output_dir
    ]

    print(f"🚀 Running DeepFilterNet on: {input_wav_path}")
    subprocess.run(command, check=True)

    # Expected output file name
    base_name = os.path.splitext(os.path.basename(input_wav_path))[0]
    output_path = os.path.join(output_dir, f"{base_name}_enhanced.wav")

    print(f"✅ Saved enhanced file to: {output_path}")
    return output_path



In [45]:
from yt_dlp import YoutubeDL

def get_all_video_links(channel_url):
    """
    Fetch all video URLs from a YouTube channel using yt-dlp.
    
    Args:
        channel_url (str): YouTube channel URL (e.g., https://www.youtube.com/@username).
    
    Returns:
        list[str]: List of full YouTube video URLs.
    """
    try:
        ydl_opts = {
            'quiet': True,
            'extract_flat': True,       # Only extract metadata, no download
            'skip_download': True,
        }

        with YoutubeDL(ydl_opts) as ydl:
            result = ydl.extract_info(channel_url, download=False)
            return result
            
    except Exception as e:
        print(f"Error fetching video links: {e}")
        return []


In [51]:
channel = "https://www.youtube.com/channel/UCyzvb3N0ChAnWeQhoD0McRg"
video_links = get_all_video_links(channel)



In [54]:
data = video_links

In [67]:
def extract_channel_video_link(channel_info):
    vid_links = []
    data = channel_info
    try:
        # Access the main list of entries
        main_entries = data.get('entries', [])
        if main_entries:
            # Access the first item in main_entries (which seems to be a playlist)
            playlist_entry = main_entries[0]
            # Access the list of video entries within that playlist
            video_entries = playlist_entry.get('entries', [])
    
            # Loop through each video entry in the list
            for video in video_entries:
                # Check if the entry type is 'url' and get the 'url' value
                if video.get('_type') == 'url':
                    url = video.get('url')
                    if url:
                        vid_links.append(url)
        return vid_links
    
    except (IndexError, KeyError, TypeError) as e:
        print(f"Error accessing data structure: {e}")
        return []


In [59]:
# len(vid_links)

In [66]:
# vid_links

In [61]:
import json


# Specify the name for the JSON file you want to create
output_filename = 'faisal.json'

try:
    # Open the file in write mode ('w')
    # 'with open(...)' ensures the file is properly closed afterward
    with open(output_filename, 'w', encoding='utf-8') as f:
        # Use json.dump() to write the Python list to the file as JSON
        # - The first argument is the Python object (your list).
        # - The second argument is the file object.
        # - 'indent=4' makes the JSON file human-readable (optional).
        # - 'ensure_ascii=False' is good practice if your list contains non-ASCII characters.
        json.dump(vid_links, f, indent=4, ensure_ascii=False)

    print(f"Successfully saved the list to '{output_filename}'")

except Exception as e:
    print(f"An error occurred: {e}")


Successfully saved the list to 'faisal.json'
