In [1]:
import requests
import json
from typing import Any
import logging
import sys
import pathlib

from ytmusicapi.parsers import songs

In [2]:
def set_logger(name: str) -> logging.Logger:
    logger: logging.Logger = logging.getLogger(name)
    logger.setLevel(logging.INFO)
    logger.addHandler(logging.StreamHandler(sys.stdout))
    return logger

def get_dir_absolute_path(dir_name: str) -> pathlib.Path:
    # Assuming you are in the "src" folder
    current_folder: pathlib.Path = pathlib.Path.cwd()

    target_folder_path: pathlib.Path = pathlib.Path()
    for parent in current_folder.parents:
        for potential_folder_path in parent.rglob(dir_name):
            if potential_folder_path.is_dir():
                return potential_folder_path

    return target_folder_path

def get_access_token() -> str:
    """
    Retrieve the spotify access token from spotify api endpoint
    :return: the access token
    """
    response: requests.Response = requests.post(
        url="https://accounts.spotify.com/api/token",
        data="grant_type=client_credentials&client_id=bf621646332d4c9c82c6e6d1fd8a8352&client_secret=0ecda4e3308e4340a26b519d0647b2bf",
        headers={"Content-Type": "application/x-www-form-urlencoded"}
    )
    if response.status_code == 200:
        return response.json()["access_token"]

    return ''



In [3]:
import yt_dlp
from youtube_search import YoutubeSearch
import pathlib
from datetime import datetime
import shutil

youtube_dl_options: dict[str, str | list[dict[str, str]]] = {
    "quiet": True,
    'format': 'bestaudio/best',
    'embedthumbnail': True,
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
        'preferredquality': '192',
    }, {
        'key': 'FFmpegMetadata',
    }]
}

def get_song_realease_date_folder(release_date: str, precision: str) -> pathlib.Path:
    release_date_precision_map: dict[str, str] = {
        "year": "%Y",
        "month": "%Y-%m",
        "day": "%Y-%m-%d"
    }
    release_date: datetime = datetime.strptime(release_date, release_date_precision_map[precision])

    return pathlib.Path(release_date.strftime(release_date_precision_map[precision].replace('-', '/')))

def download_song_from_youtube(song_to_search: str, track: dict[str, Any]) -> bool:
    release_path: pathlib.Path = get_song_realease_date_folder(
        release_date=track["track"]["album"]["release_date"],
        precision=track["track"]["album"]["release_date_precision"],
    )
    output_path: pathlib.Path = get_dir_absolute_path("songs") / release_path / track["track"]["id"]

    # create dirs if they do not exist
    output_path.parent.mkdir(parents=True, exist_ok=True)

    if output_path.with_suffix(".mp3").exists():
        logger.debug(f"Skipping {track['track']['id']}. Already downloaded. output_path {output_path}")
        return True
    logger.info(f"To download: {track['track']['id']}")
    # return False

    attempts_left: int = 3
    best_url: str = ""
    while attempts_left > 0:
        try:
            url_suffix = YoutubeSearch(song_to_search, max_results=1).to_dict()[0].get("url_suffix")
            best_url = f"https://www.youtube.com{url_suffix}"
            break
        except IndexError:
            attempts_left -= 1
            logger.debug(f"No valid URLs found for {song_to_search}, trying again ({attempts_left} attempts left).")
        if best_url is None:
            logger.debug(f"No valid URLs found for {song_to_search}, skipping track.")
            continue

    # Run you-get to fetch and download the link's audio
    # print(f"Initiating download for {song_to_search}. url is: {best_url}")
    try:
        with yt_dlp.YoutubeDL(youtube_dl_options | {'outtmpl': str(output_path)}) as ydl:
            ydl.extract_info(best_url, download=True)
        return True
    except Exception as e:
        logger.error(e)
        return False

def get_string_to_search_in_youtube(track: dict[str, Any]) -> str:
    track = track.get("track", {})
    # print(track["id"], track["name"], track["uri"], track["album"], track["artists"])
    to_search: str = " ".join([artist.get("name") for artist in track["artists"]])
    return f"{to_search} - {track.get('name')}"

def move_songs_to_release_date_partition(song: dict[str, Any]) -> None:
    songs_path: pathlib.Path = get_dir_absolute_path("songs")
    original_song_path: pathlib.Path = (songs_path / song["track"]["id"]).with_suffix(".mp3")

    if not original_song_path.exists():
        return

    try:
        release_path: pathlib.Path = get_song_realease_date_folder(
            release_date=song["track"]["album"]["release_date"],
            precision=song["track"]["album"]["release_date_precision"],
        )
        output_path: pathlib.Path = (songs_path / release_path / song["track"]["id"]).with_suffix(".mp3")
    except Exception as e:
        return

    if output_path.exists():
        return

    logger.debug(f'moving song["track"]["id"]')
    output_path.parent.mkdir(parents=True, exist_ok=True)
    shutil.move(original_song_path, output_path)

In [4]:
logger: logging.Logger = set_logger("spotify api get data")
PLAYLISTS: list[str] = ["31LTVcI9mBggtLVlYRStnJ", "7CqTvaywsSnZHdwujtkfqp"]
PLAYLIST_ID: str = PLAYLISTS[0]
access_token: str = get_access_token()
playlist_info: dict[str, str | list[dict[str, Any]]] = get_playlist_tracks_data(PLAYLIST_ID, access_token)
save_playlist_tracks_data(playlist_info, PLAYLIST_ID)
playlist_info

Fetching playlist tracks for 31LTVcI9mBggtLVlYRStnJ, current offset: 0, total songs: 0, fetching total True, fetched 0 songs
Fetching tracks from playlist id: 31LTVcI9mBggtLVlYRStnJ starting from the 0th song
Fetched 100, total 1155, offset 100, length 100
Fetching tracks from playlist id: 31LTVcI9mBggtLVlYRStnJ starting from the 100th song
Fetched 200, total 1155, offset 200, length 100
Fetching tracks from playlist id: 31LTVcI9mBggtLVlYRStnJ starting from the 200th song
Fetched 300, total 1155, offset 300, length 100
Fetching tracks from playlist id: 31LTVcI9mBggtLVlYRStnJ starting from the 300th song
Fetched 400, total 1155, offset 400, length 100
Fetching tracks from playlist id: 31LTVcI9mBggtLVlYRStnJ starting from the 400th song
Fetched 500, total 1155, offset 500, length 100
Fetching tracks from playlist id: 31LTVcI9mBggtLVlYRStnJ starting from the 500th song
Fetched 600, total 1155, offset 600, length 100
Fetching tracks from playlist id: 31LTVcI9mBggtLVlYRStnJ starting from th

{'playlist_id': '31LTVcI9mBggtLVlYRStnJ',
 'num_songs': 1155,
 'tracks': [{'track': {'album': {'name': 'Room 25',
     'release_date': '2018-09-14',
     'release_date_precision': 'day'},
    'artists': [{'id': '1EpyA68dKpjf7jXmQL88Hy', 'name': 'Noname'},
     {'id': '1ybINI1qPiFbwDXamRtwxD', 'name': 'Smino'},
     {'id': '7Hjbimq43OgxaBRpFXic4x', 'name': 'Saba'}],
    'id': '1Ya8UT49Oc3oASVcsDqz60',
    'name': 'Ace',
    'uri': 'spotify:track:1Ya8UT49Oc3oASVcsDqz60'}},
  {'track': {'album': {'name': 'Yes Lawd!',
     'release_date': '2016-10-21',
     'release_date_precision': 'day'},
    'artists': [{'id': '6PEMFpe3PTOksdV4ZXUpbE', 'name': 'NxWorries'},
     {'id': '3jK9MiCrA42lLAdMGUZpwa', 'name': 'Anderson .Paak'},
     {'id': '17Zu03OgBVxgLxWmRUyNOJ', 'name': 'Knxwledge'}],
    'id': '5mtfHX4VyY5D81WMaOHgHV',
    'name': 'What More Can I Say',
    'uri': 'spotify:track:5mtfHX4VyY5D81WMaOHgHV'}},
  {'track': {'album': {'name': 'Lost & Found',
     'release_date': '2018-06-08',
   

In [None]:
playlist_info.keys()

## Producer

In [6]:
import pika

connection = pika.BlockingConnection(
    pika.ConnectionParameters(host='localhost'))
channel = connection.channel()

channel.queue_declare(queue='playlist')

channel.basic_publish(exchange='', routing_key='playlist', body=json.dumps(playlist_info))
print(" [x] Sent 'Hello World!'")
connection.close()

 [x] Sent 'Hello World!'


## Consumer

In [None]:
import pika, sys
from pymongo import MongoClient

client: MongoClient = MongoClient("mongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000&appName=mongosh+2.1.1")

def save_counter_to_db(playlist_id: str, num_songs: int) -> None:
    db = client.playlist
    collection = db.playlist_counter
    collection.insert_one({"playlist_id": playlist_id, "num_songs":num_songs})
    return

def save_json_to_db(playlist_info: dict[str, str | list[dict[str, Any]]]) -> None:
    playlist_info["tracks"] = [track.get("id") for track in playlist_info["tracks"]]
    db = client.playlist
    collection = db.playlist_info
    collection.insert_one(playlist_info)
    return

def send_messages_for_each_song(playlist_info: dict[str, str | list[dict[str]]]) -> None:
    connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
    channel = connection.channel()

    channel.queue_declare(queue='tracks')

    for track in playlist_info.get("tracks"):
        channel.basic_publish(exchange='', routing_key='tracks', body=json.dumps(track))
        print(" [x] Sent 'Hello World!'")
    connection.close()

def playlist_consumer_producer():
    connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
    channel = connection.channel()

    channel.queue_declare(queue='playlist')

    def playlist_unpack(ch, method, properties, body):
        body = json.loads(body)
        save_counter_to_db(body.get("playlist_id"), body.get("num_songs"))
        save_json_to_db(body)
        send_messages_for_each_song(body)
        
        print(f" [x] playlist id: {body.get('playlist_id')} - Total number of songs in the playlist: {body.get('num_songs')}")
        # print(f" [x] Received {body}")

    channel.basic_consume(queue='playlist', on_message_callback=playlist_unpack, auto_ack=True)

    print(' [*] Waiting for messages. To exit press CTRL+C')
    channel.start_consuming()


playlist_consumer_producer()

 [*] Waiting for messages. To exit press CTRL+C
 [x] playlist id: 31LTVcI9mBggtLVlYRStnJ - Total number of songs in the playlist: 1155


In [33]:
db.test.insert_one({"nome": "porcodio", "a": "b", "c": 2})

InsertOneResult(ObjectId('65969e449a878c6e2a16d8c2'), acknowledged=True)

In [35]:
type(client.playlist.test.find_one({"a": "b"}))

dict

In [38]:


db = client.playlist
collection = db.test

# db.test.insert_one({"nome": "porcodio", "a": "b", "c": 2})
# 
next(db.test.find())

{'_id': ObjectId('65969c4c9a878c6e2a16d8c1'),
 'nome': 'porcodio',
 'a': 'b',
 'c': 2}

In [39]:
collection.update_one({"nome": "porcodio"}, {"$set": {"a": "bb"}})

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)