In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException
import time

MAX_EMPTY_SCROLLS = 10    # stop after 10 scrolls without discovering new songs
SCROLL_PAUSE = 0.5        # wait after each scroll

def scrape_spotify_playlist(url):
    opts = Options()
    # opts.add_argument("--headless")  # uncomment if you don’t need to see the browser
    opts.add_argument("--no-sandbox")
    opts.add_argument("--disable-dev-shm-usage")

    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=opts)
    driver.get(url)
    time.sleep(5)  # allow initial load

    try:
        container = driver.find_element(By.CSS_SELECTOR, 'div[data-testid="playlist-tracklist"]')
    except NoSuchElementException:
        driver.quit()
        raise RuntimeError("Playlist container not found. Check login/URL.")

    # 1) Give focus to the container so PAGE_DOWN scrolls it
    actions = ActionChains(driver)
    actions.move_to_element(container).click().perform()
    time.sleep(0.5)

    collected = []            # list of "Song – Artist" in encounter order
    seen = set()              # to avoid duplicates
    empty_scrolls = 0

    # 2) Loop until we’ve scrolled enough times without finding new songs
    while empty_scrolls < MAX_EMPTY_SCROLLS:
        # a) Capture all currently visible rows *inside* the playlist container
        rows = container.find_elements(By.CSS_SELECTOR, '[data-testid="tracklist-row"]')
        new_found = False

        for row in rows:
            try:
                texts = row.find_elements(By.CSS_SELECTOR, 'div[data-encore-id="text"]')
                if len(texts) >= 2:
                    title = texts[0].text.strip()
                    artist = texts[1].text.strip()
                    key = f"{title} – {artist}"
                    if key not in seen:
                        seen.add(key)
                        collected.append((title, artist))
                        new_found = True
            except NoSuchElementException:
                continue

        # b) Decide if this scroll yielded new items
        if new_found:
            empty_scrolls = 0
        else:
            empty_scrolls += 1

        # c) Scroll down one PAGE_DOWN
        actions.send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(SCROLL_PAUSE)

    driver.quit()

    # Split into two lists before returning
    song_titles = [title for title, _ in collected]
    songs_with_artists = [f"{title} – {artist}" for title, artist in collected]
    return song_titles, songs_with_artists


In [3]:
url="https://open.spotify.com/playlist/4hOKQuZbraPDIfaGbM3lKI"
songs, songs_artist=scrape_spotify_playlist(url)
print(len(songs))

102


In [4]:
for i, t in enumerate(songs):
    print(f"{i+1}. {t}")

1. Despacito - Remix
2. I'm the One
3. Shape of You
4. HUMBLE.
5. Mask Off
6. Something Just Like This
7. Stay (with Alessia Cara)
8. It Ain't Me (with Selena Gomez)
9. Swalla (feat. Nicki Minaj & Ty Dolla $ign)
10. Malibu
11. There's Nothing Holdin' Me Back
12. That's What I Like
13. Symphony (feat. Zara Larsson)
14. Despacito (Featuring Daddy Yankee)
15. Attention
16. Passionfruit
17. Sign of the Times
18. XO TOUR Llif3
19. First Time
20. Felices los 4
21. Unforgettable
22. DNA.
23. Issues
24. Scared to Be Lonely
25. Galway Girl
26. Slide (feat. Frank Ocean & Migos)
27. Congratulations
28. No Promises (feat. Demi Lovato)
29. 1-800-273-8255
30. Thunder
31. SUBEME LA RADIO
32. Pretty Girl - Cheat Codes X CADE Remix
33. Me Rehúso
34. The Cure
35. I'm the One
36. iSpy (feat. Lil Yachty)
37. Good Life (with G-Eazy & Kehlani)
38. Now Or Never
39. Believer
40. Location
41. Rockabye (feat. Sean Paul & Anne-Marie)
42. Paris
43. Call On Me - Ryan Riback Extended Remix
44. Rollin (feat. Future 

In [5]:
import yt_dlp

def get_best_youtube_music_result(full_query, song_title, max_results=5):
    """
    full_query: The full search query with song and artist.
    song_title: The clean song title only (used to match against video title).
    """

    ydl_opts = {
        'quiet': True,
        'extract_flat': True,
        'skip_download': True,
        'force_generic_extractor': True,
        'noplaylist': True,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        try:
            info = ydl.extract_info(f"ytsearch{max_results}:{full_query}", download=False)
            entries = info.get("entries", [])

            if not entries:
                return None

            song_title_lower = song_title.lower()

            # Filter out shorts and ensure song title is in video title
            valid = []
            for e in entries:
                url = e.get("url", "")
                title = e.get("title", "").lower()
                if "shorts/" in url:
                    continue
                if song_title_lower not in title:
                    continue
#                 if not any(kw in title for kw in ["official", "audio", "lyrics", "video"]):
#                     continue
                valid.append(e)

            if not valid:
                valid = [e for e in entries if song_title_lower in e.get("title", "").lower()]

            if not valid:
                valid = entries  # fallback to anything if nothing matches

            # Pick the video with highest view count
            valid_with_views = [v for v in valid if v.get("view_count")]
            if valid_with_views:
                best = max(valid_with_views, key=lambda x: x["view_count"])
            else:
                best = valid[0]

            return f"https://www.youtube.com/watch?v={best['id']}"
        except Exception as e:
            print(f"Error during search: {e}")
            return None


In [124]:
links=[]
for i in range(len(songs)):
    print(songs_artist[i])
    url=get_best_youtube_music_result(songs_artist[i], songs[i])
    print(i+1, url)
    if(i==10):
        break
    links.append(url)

Despacito - Remix – Luis Fonsi, Daddy Yankee, Justin Bieber
1 https://www.youtube.com/watch?v=kJQP7kiw5Fk
I'm the One – DJ Khaled, Justin Bieber, Quavo, Chance the Rapper, Lil Wayne
2 https://www.youtube.com/watch?v=weeI1G46q0o
Shape of You – Ed Sheeran
3 https://www.youtube.com/watch?v=JGwWNGJdvx8
HUMBLE. – Kendrick Lamar
4 https://www.youtube.com/watch?v=tvTRZJ-4EyI
Mask Off – Future
5 https://www.youtube.com/watch?v=xvZqHgFz51I
Something Just Like This – The Chainsmokers, Coldplay
6 https://www.youtube.com/watch?v=FM7MFYoylVs
Stay (with Alessia Cara) – Zedd, Alessia Cara
7 https://www.youtube.com/watch?v=h--P8HzYZ74
It Ain't Me (with Selena Gomez) – Kygo, Selena Gomez
8 https://www.youtube.com/watch?v=D5drYkLiLI8
Swalla (feat. Nicki Minaj & Ty Dolla $ign) – Jason Derulo, Nicki Minaj, Ty Dolla $ign
9 https://www.youtube.com/watch?v=NGLxoKOvzu4
Malibu – Miley Cyrus
10 https://www.youtube.com/watch?v=8j9zMok6two
There's Nothing Holdin' Me Back – Shawn Mendes
11 https://www.youtube.com/

In [15]:
url="https://open.spotify.com/playlist/72yaYysCp8xkoxwc639Sj0"
songs, songs_artist=scrape_spotify_playlist(url)
print(len(songs))


7


In [83]:
import os
import time

def wait_for_download(target_filename, crdownload_path, download_dir, timeout):
    final_path = os.path.join(download_dir, target_filename)
    crdownload_path = os.path.join(download_dir, crdownload_path)
    start = time.time()
    while time.time() - start < timeout:
        if os.path.isfile(final_path):
            return True
        elif os.path.isfile(crdownload_path):
            time.sleep(1)  # still downloading
        else:
            print("Using timeout, file not found")
            time.sleep(timeout)
    return False  # timeout


In [87]:
import os
import threading

sem = threading.Semaphore(1)
# 1) Configuration
download_dir = "C:\\Users\\HP\\Downloads"  # or any folder you want
print(download_dir)
timeout=50
TARGET_PAGE = "https://y2mate.nu/en-ynKX/"      # replace with your page URL
# BUTTON_SELECTOR = (By.ID, "submit")  # replace with a locator for your button
URL_TO_ENTER = "https://www.youtube.com/watch?v=_8lzaerNG2U"

def download_worker(url_to_enter):
    opts = Options()
    # opts.add_argument("--headless")  # Optional
    opts.add_argument("--no-sandbox")
    opts.add_argument("--disable-dev-shm-usage")

    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=opts)

    try:
        driver.get(TARGET_PAGE)
        time.sleep(5)

        input_box = driver.find_element(By.ID, "v")
        input_box.clear()
        input_box.send_keys(url_to_enter)

        convert_btn = driver.find_element(By.XPATH, "//button[normalize-space(text())='Convert']")
        convert_btn.click()
        time.sleep(2)

        MAX_RETRIES = 15
        SLEEP_SECONDS = 2
        download_btn = None

        for _ in range(MAX_RETRIES):
            try:
                download_btn = driver.find_element(By.XPATH, "//button[normalize-space(text())='Download']")
                break
            except NoSuchElementException:
                time.sleep(SLEEP_SECONDS)

        if download_btn:
            with sem:  # Lock access to download folder
                before = set(os.listdir(download_dir))
                download_btn.click()
                time.sleep(1)
                after = set(os.listdir(download_dir))
            
            new_files = list(after - before)
            if not new_files:
                print("No new file detected after clicking Download, increase after click wait time")
                return

            newd = new_files[0]
            if newd.endswith(".crdownload"):
                newf = newd[:newd.rfind(".mp3")+len(".mp3")]
            else:
                newf = newd

            wait_for_download(newf, newd, download_dir, timeout=60)
        else:
            print("Download button not found.")
    except Exception as e:
        print(f"Error in worker: {e}")
    finally:
        driver.quit()

C:\Users\HP\Downloads


In [128]:
# Allow max 5 workers at a time
worker_semaphore = threading.Semaphore(1)
start=0
end=start+1
def work_assigner():
    global start, end
    while True:
        with worker_semaphore:
            if start>=end:
                break
            task=links[start]
            name=songs[start]
            start+=1
        download_worker(task)
        print(f"Done with {name}")
        
threads = []
for i in range(5):  # Example: 20 tasks
    t = threading.Thread(target=work_assigner)
    t.start()
    threads.append(t)

# Wait for all to finish
for t in threads:
    t.join()


Luis Fonsi - Despacito ft. Daddy Yankee.mp3
Done with Despacito - Remix


In [None]:
###rough work:

In [31]:
import threading
import time

# Create a semaphore with initial value 1
semaphore = threading.Semaphore(1)

def task(name):
    print(f"{name} is waiting to acquire the semaphore...")
    with semaphore:
        print(f"{name} has acquired the semaphore.")
        time.sleep(2)
        print(f"{name} is releasing the semaphore.")

# Start multiple threads
t1 = threading.Thread(target=task, args=("Thread 1",))
t2 = threading.Thread(target=task, args=("Thread 2",))

t1.start()
t2.start()

t1.join()
t2.join()


Thread 1 is waiting to acquire the semaphore...
Thread 1 has acquired the semaphore.
Thread 2 is waiting to acquire the semaphore...
Thread 1 is releasing the semaphore.
Thread 2 has acquired the semaphore.
Thread 2 is releasing the semaphore.


In [29]:
s={'asfashfa'}
print(list(s)[0])

asfashfa


In [52]:
for song in songs:
    print(song)
    url=get_best_youtube_link(song)
    print(url)

Scream at the Sky - Luna Rossa
https://www.youtube.com/watch?v=weQRRL2q70o&pp=ygUeU2NyZWFtIGF0IHRoZSBTa3kgLSBMdW5hIFJvc3Nh
Storm Corrosion - Storm Corrosion
https://www.youtube.com/watch?v=manW5v-AR7U&pp=ygUhU3Rvcm0gQ29ycm9zaW9uIC0gU3Rvcm0gQ29ycm9zaW9u
Colours - Airbag
https://www.youtube.com/watch?v=U1F6zAWAsMo&pp=ygUQQ29sb3VycyAtIEFpcmJhZw%3D%3D
Surreal - Ambeon
https://www.youtube.com/watch?v=qRZvOcC7pGc&pp=ygUQU3VycmVhbCAtIEFtYmVvbg%3D%3D
Gover Si Vena - Elvya
Videos filter not found or already applied.
https://www.youtube.com/watch?v=iaxdx8OIAlo&pp=ygUVR292ZXIgU2kgVmVuYSAtIEVsdnlh
Shut It Down - Killit


KeyboardInterrupt: 

In [50]:

def initialize_driver():
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--window-size=1920,1080")
    driver = webdriver.Chrome(options=options)
    return driver

def click_videos_tab(driver):
    try:
        # This will find all tabs and click the one with text "Videos"
        tabs = driver.find_elements(By.XPATH, "//button[@role='tab']")
        for tab in tabs:
            try:
                if "Videos" in tab.text:
                    tab.click()
                    time.sleep(2)
                    return
            except:
                continue
        print("Videos tab not found.")
    except Exception as e:
        print("Could not click the Videos tab:", e)

def parse_view_count(text):
    match = re.search(r'([\d,.]+)', text)
    if not match:
        return 0
    count = match.group(1).replace(',', '')
    try:
        return int(count)
    except:
        return 0

def get_top_video_url(query):
    driver = initialize_driver()
    try:
        search_url = f"https://www.youtube.com/results?search_query={query.replace(' ', '+')}"
        driver.get(search_url)
        time.sleep(2)

        click_videos_tab(driver)  # Now using the new tab structure

        videos = driver.find_elements(By.TAG_NAME, "ytd-video-renderer")[:5]
        best_video = None
        highest_views = -1

        for video in videos:
            try:
                title_element = video.find_element(By.ID, "video-title")
                views_text = video.find_element(By.XPATH, ".//span[contains(text(),'views')]").text
                views = parse_view_count(views_text)
                url = title_element.get_attribute("href")

                if views > highest_views:
                    highest_views = views
                    best_video = url
            except Exception as e:
                print("Skipping one video due to error:", e)
                continue

        return best_video

    finally:
        driver.quit()


for song in songs:
    print(song)
    url=get_top_video_url(song)
    print(url)

Scream at the Sky - Luna Rossa
https://www.youtube.com/watch?v=ZhLNlTDGo-A&pp=ygUeU2NyZWFtIGF0IHRoZSBTa3kgLSBMdW5hIFJvc3Nh
Storm Corrosion - Storm Corrosion
https://www.youtube.com/watch?v=GnSxa96Z4kY&pp=ygUhU3Rvcm0gQ29ycm9zaW9uIC0gU3Rvcm0gQ29ycm9zaW9u
Colours - Airbag
Videos tab not found.
https://www.youtube.com/watch?v=U1F6zAWAsMo&pp=ygUQQ29sb3VycyAtIEFpcmJhZw%3D%3D
Surreal - Ambeon
Videos tab not found.
https://www.youtube.com/watch?v=euJMRNJ2JVM&pp=ygUQU3VycmVhbCAtIEFtYmVvbg%3D%3D
Gover Si Vena - Elvya
Videos tab not found.
https://www.youtube.com/watch?v=h53MF5hddVc&pp=ygUVR292ZXIgU2kgVmVuYSAtIEVsdnlh
Shut It Down - Killit
https://www.youtube.com/watch?v=1bIbUlv8UdU&pp=ygUVU2h1dCBJdCBEb3duIC0gS2lsbGl0
The Garden - Unitopia
https://www.youtube.com/watch?v=eYu8832UVpA&pp=ygUVVGhlIEdhcmRlbiAtIFVuaXRvcGlh
