In [None]:
#### Packages
import os
import sys
import warnings
import numpy as np

#### Spotify packages and and variables
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Own Spotify API credentials
CLIENT_ID = 'ebeb4d9df793458c82937729f9dc55e0'
CLIENT_SECRET = '4b63a60c9fcb4a32b2723291287910ff'

# Authenticate with Spotify
client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

### YouTube download packages
import yt_dlp
from pytube import Playlist
AUDIO_FORMAT = 'webm'
AUDIO_QUALITY = '128k'

In [None]:
### Spotify Functions
def download_titles_and_artists_from_spotify(playlist_url, output_filename, filter=None):

    playlist_id = playlist_url.split("/")[-1].split("?")[0]

    limit = 100
    offset = 0
    lines = []

    while True:
        results = sp.playlist_items(playlist_id, limit=limit, offset=offset)
        items = results['items']
        if not items:
            break
        for item in items:
            track = item['track']
            if track:
                title = track.get('name', 'Unknown Title')
                artists = ", ".join(artist.get('name', 'Unknown Artist') for artist in track.get('artists', []))
                lines.append(f"{title} by {artists}")
        offset += limit

    if type(filter) != type(None):
        filtered_lines = np.array(lines)[filter]
    else: 
        filtered_lines = np.array(lines)

    # Write to a text file
    with open(output_filename, "w", encoding="utf-8") as f:
        f.write("\n".join(filtered_lines.tolist()))

    # Remove Spotify token cache
    if os.path.exists('.cache'):
        os.remove('.cache')

    print(f"Data for {len(filtered_lines)} tracks exported successfully to {output_filename}")
    if type(filter) != type(None):
        print('Filtered songs:')
        for f in np.setdiff1d(np.arange(0, len(lines)), filter):
            print(f'-- No.{f} - {lines[f]}')



In [None]:
### YouTube Functions
def parse_music_file(filepath):
    """Parses the music file ({title} by {artist} format)."""
    songs = []
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            for i, line in enumerate(f):
                line = line.strip()
                if not line:
                    continue # Skip empty lines
                
                parts = line.split(' by ', 1) # Split only on the first ' by '
                if len(parts) == 2:
                    title = parts[0].strip()
                    artist = parts[1].strip()
                    if title and artist: # Ensure title and artist are not empty
                        songs.append({'title': title, 'artist': artist, 'line': i + 1})
                    else:
                        print(f"Warning: Skipped line {i+1} due to empty title or artist after parsing: '{line}'")
                else:
                    print(f"Warning: Skipped line {i+1} due to unexpected format: '{line}'")
    except FileNotFoundError:
        print(f"Error: File not found at '{filepath}'")
        sys.exit(1) # Exit if the file doesn't exist
    except Exception as e:
        print(f"Error reading file '{filepath}': {e}")
        sys.exit(1) # Exit on other file reading errors
        
    return songs

def download_music(songs, output_dir='downloaded_music', audio_format='mp3', audio_quality='192', verbose=False):
    """Downloads music using yt-dlp based on title and artist."""
    warnings.filterwarnings("ignore", message="Support for Python version 3.8 has been deprecated")
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    if verbose:
        print(f"Downloads will be saved in: {os.path.abspath(output_dir)}")

    # Configure yt-dlp options
    # See yt-dlp documentation for more options: https://github.com/yt-dlp/yt-dlp#embedding-yt-dlp
    ydl_opts = {
        'format': 'bestaudio/best', # Choose best audio-only format
        'postprocessors': [{
            'key': 'FFmpegExtractAudio', # Requires ffmpeg
            'preferredcodec': audio_format, # e.g., 'mp3', 'm4a', 'wav'
            'preferredquality': audio_quality, # e.g., '192', '320' for mp3
        }],
        # Define output template using yt-dlp variables if possible,
        # otherwise fall back to manual construction
        'outtmpl': os.path.join(output_dir, '%(artist)s - %(title)s.%(ext)s'), 
        'noplaylist': True, # Don't download playlists if a search result is a playlist
        'quiet': False, # Set to True to suppress yt-dlp output, False for progress
        'verbose': False, # Set to True for detailed debugging output
        'nocheckcertificate': True, # Sometimes needed for HTTPS issues
        'ignoreerrors': True, # Continue processing even if one download fails
        #'ffmpeg_location': None, # Set path like '/usr/local/bin/ffmpeg' if not in PATH
        # Add metadata to the file if possible (requires mutagen library: pip install mutagen)
        # 'postprocessor_args': {
        #     'ffmpegextractaudio': ['-metadata', 'artist=%(artist)s', '-metadata', 'title=%(title)s']
        # },
        # 'writemetadata': True, # Try writing metadata
        'quiet': not verbose,          # Suppress yt-dlp standard messages if not verbose
        'no_warnings': not verbose,    # Suppress yt-dlp warnings if not verbose (use with caution)
        'verbose': verbose,            # Pass verbose flag to yt-dlp for its own debugging if needed
        'default_search': 'ytsearch1',  
        'http_headers': {
            'User-Agent': (
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                'AppleWebKit/537.36 (KHTML, like Gecko) '
                'Chrome/115.0.0.0 Safari/537.36'
            )
        },
    }

    downloaded_count = 0
    failed_count = 0

    for song in songs:
        title = song['title']
        artist = song['artist']
        line_num = song['line']
        
        # Construct search query for yt-dlp (searches YouTube by default)
        search_query = f"{artist} {title} audio lyrics"
        
        if verbose:
            print(f"\n[Line {line_num}] Searching for: '{title}' by '{artist}' ({search_query})")


        base_outtmpl = os.path.join(output_dir, f"{artist} - {title}")
        expected_file = f"{base_outtmpl}.{audio_format}" 

        # Ensure the yt-dlp options use a predictable base name if possible
        current_ydl_opts = ydl_opts.copy()
        current_ydl_opts['outtmpl'] = f"{base_outtmpl}.%(ext)s" # Use the base template

        download_successful = False # Flag to track success
        try:
            with yt_dlp.YoutubeDL(current_ydl_opts) as ydl:
                # Perform the search and download
                # We ignore the error_code here because ignoreerrors=True makes it unreliable
                ydl.download([search_query]) 
                
                # --- NEW SUCCESS CHECK ---
                # Check if the expected file exists AFTER the download attempt
                if os.path.exists(expected_file):
                    if verbose:
                        print(f"Success: File found at '{expected_file}'")
                    download_successful = True
                else:
                    # Check if maybe yt-dlp used a slightly different extension (e.g., .opus for webm)
                    # This is a basic fallback check
                    alt_expected_file = f"{base_outtmpl}.opus" # Common alternative for webm audio
                    if audio_format == 'webm' and os.path.exists(alt_expected_file):
                        if verbose:
                            print(f"Success: File found with .opus extension at '{alt_expected_file}'")
                        download_successful = True
                    else:
                        if verbose:
                            print(f"Warning: Download command finished, but expected file '{expected_file}' not found.")
                            # Consider adding more checks for variations if needed

        except yt_dlp.utils.DownloadError as e:
            if verbose:
                print(f"Error (DownloadError) for '{title}' by '{artist}': {e}")
            # Error definitely occurred
            download_successful = False 
        except Exception as e:
            if verbose:
                print(f"An unexpected error occurred for '{title}' by '{artist}': {e}")
            # Error definitely occurred
            download_successful = False

        # Update counts based on the file existence check or caught exceptions
        if download_successful:
            downloaded_count += 1
        else:
            failed_count += 1


    print(f"\n--- Download Summary ---")
    print(f"Successfully downloaded (or attempted): {downloaded_count}")
    print(f"Failed/Skipped due to errors: {failed_count}")
    print(f"Total processed lines: {len(songs)}")
    print(f"Check the '{output_dir}' folder.")
    

---
# Creat an excel sheet with titles and artists

In [None]:
# Salsa Con Rumba
download_titles_and_artists_from_spotify("https://open.spotify.com/playlist/5Yn2jfdT5zx69wjbiduiUu?si=N0p4kyNuRyOHktil4nemMg&nd=1&dlsi=a74bfb1dec1c4890",
                                         output_filename='salsa_con_rumba.txt')

In [None]:
# Salsa Con Rumba 2
download_titles_and_artists_from_spotify("https://open.spotify.com/playlist/0hxTgZ4X53jQtlnYzGam0V?si=sf-9iLuXQPeJC2dfUd0j"+\
                                         "CQ&pi=VkSMx9VGTH2vu&fbclid=IwY2xjawJk25FleHRuA2FlbQIxMAABHnlbqbgL4ImghzR1PhBqnFsu"+\
                                         "lnGLMt906Huo-FUZmkCsEl3wbfGKOfwH3ybz_aem_FFhL-xjZwtd06NB_pdfUYw&nd=1&dlsi=cc188d4899eb4532",
                                         output_filename='salsa_con_rumba_2.txt')

In [None]:
# Linear Salsa
### Innen pedig a harmaidk kategoriaba mehet 135tol 215 ig
### viszont ki kell venni belole: 188, 181, 166, 145
filter = np.setdiff1d(np.arange(135, 216), [145, 166, 181, 188])-1 # minus one to get the index
download_titles_and_artists_from_spotify("https://open.spotify.com/playlist/7Hn7dPB752mTiXDf22nD2r?si=lY5H2jO5RsKoSALnRExR_Q&nd=1&dlsi=9aeeb75ee6d741bf",
                                         output_filename='linear_salsa.txt', filter=filter)

In [None]:
# Salsa Son
download_titles_and_artists_from_spotify("https://open.spotify.com/playlist/00D8QYONxylwe4KKeNEo1N?si=sVlSA_mpTXuwac1ZwQnj0g&nd=1&dlsi=c6e7b6e39a654f5d",
                                         output_filename='son.txt')

---
### Dowload via YouTube playlist link

In [None]:
output_dir = "../genres/son"
os.makedirs(output_dir, exist_ok=True)

ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'),
    'ignoreerrors': True,                     
    'no_warnings': True,
    'quiet': False,
    'verbose': False,
    'nocheckcertificate': True,
    'geo_bypass': True,
    'geo_bypass_country': 'US',
    'http_headers': {
        'User-Agent': (
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/114.0.0.0 Safari/537.36'
        ),
        'Referer': 'https://www.youtube.com/',
    },
    'allow_unplayable_formats': True,
    'compat_opts': ['no-youtube-unavailable-videos'],
    'update_to': 'nightly',    
}

playlist_url = "https://www.youtube.com/playlist?list=PLt1M8CP9cZ_xDuyCtLfBZFmCRCbyJwDqy"

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([playlist_url])

----
# Download songs from YouTube

In [None]:
music_list_file = 'salsa_con_rumba.txt'
output_directory = '../genres/salsa_con_rumba'

songs_to_download = parse_music_file(music_list_file)
download_music(songs_to_download, output_directory, AUDIO_FORMAT, AUDIO_QUALITY)

#--- Download Summary ---
#Successfully downloaded (or attempted): 62
#Failed/Skipped due to errors: 4

In [None]:
music_list_file = 'salsa_con_rumba_2.txt'
output_directory = '../genres/salsa_con_rumba'

songs_to_download = parse_music_file(music_list_file)
download_music(songs_to_download, output_directory, AUDIO_FORMAT, AUDIO_QUALITY)

#--- Download Summary ---
#Successfully downloaded (or attempted): 17
#Failed/Skipped due to errors: 2

In [None]:
music_list_file = 'linear_salsa.txt'
output_directory = '../genres/linear_salsa'

songs_to_download = parse_music_file(music_list_file)
download_music(songs_to_download, output_directory, AUDIO_FORMAT, AUDIO_QUALITY)

#--- Download Summary ---
#Successfully downloaded (or attempted): 74
#Failed/Skipped due to errors: 3

In [None]:
music_list_file = 'son.txt'
output_directory = '../genres/son'

songs_to_download = parse_music_file(music_list_file)
download_music(songs_to_download, output_directory, AUDIO_FORMAT, AUDIO_QUALITY)

#--- Download Summary ---
#Successfully downloaded (or attempted): 43
#Failed/Skipped due to errors: 0