In [1]:
import os
import json
import uuid
from collections import defaultdict
from datetime import datetime
import xml.etree.ElementTree as ET

# Load the XML file
xml_file = "Biblioteca.xml"
tree = ET.parse(xml_file)
root = tree.getroot()

# Find the first <dict> inside <plist>
library_dict = root.find("dict")

# Locate the <dict> associated with "Tracks"
tracks_dict = None
keys = list(library_dict)

for i in range(len(keys)):
    if keys[i].tag == "key" and keys[i].text == "Tracks":
        tracks_dict = keys[i + 1]  # The next element is the <dict> of Tracks
        break

# Verify that the correct dictionary was found
if tracks_dict is None or tracks_dict.tag != "dict":
    raise ValueError("Tracks dictionary not found in XML")

In [2]:
tracks = {}

track_elements = list(tracks_dict)
for i in range(0, len(track_elements), 2):  # Read elements in pairs (key -> dict)
    track_id = track_elements[i].text
    track_data = track_elements[i + 1]  # The <dict> containing track info

    track_info = {}
    keys = list(track_data)

    for j in range(0, len(keys), 2):  # Read pairs (key -> value)
        key = keys[j].text
        value_elem = keys[j + 1]

        # Convert values based on type
        if value_elem.tag == "integer":
            value = int(value_elem.text)
        elif value_elem.tag == "true":
            value = True
        elif value_elem.tag == 'false':
            value = False
        else:
            value = value_elem.text

        track_info[key] = value

    tracks[track_id] = track_info

# Create the JSON filename with the current date
current_date = datetime.now().strftime("%Y-%m-%d")

# Create a folder with the current date if it doesn't exist
output_folder = f"./{current_date}"
os.makedirs(output_folder, exist_ok=True)


def file_path(path):
    return os.path.join(output_folder, path)


with open(file_path(f'Biblioteca_{current_date}.json'), "w", encoding="utf-8") as f:
    json.dump(tracks, f, indent=4, ensure_ascii=False)

In [3]:
formatted_json = {
    "data": []
}

# Loop through all the tracks and create the new format
for track_id, track_info in tracks.items():
    # Get play count, default to 0 if not exists
    play_count = track_info.get("Play Count", 0)

    # Calculate timePlayed (playCount * duration)
    time_played = play_count * track_info["Total Time"]

    formatted_track = {
        "id": str(track_info["Track ID"]),
        "name": track_info["Name"],
        "duration": track_info["Total Time"],
        "trackNumber": track_info["Track Number"],
        "artist": track_info["Artist"],
        "album": track_info["Album"],
        # Use Artist if Album Artist doesn't exist
        "albumArtist": track_info.get("Album Artist", track_info["Artist"]),
        "year": track_info["Year"],
        "genre": track_info["Genre"],
        "playCount": play_count,
        "timePlayed": time_played
    }
    formatted_json["data"].append(formatted_track)

with open(file_path(f'Formatted_Biblioteca_{current_date}.json'), 'w', encoding='utf-8') as f:
    json.dump(formatted_json, f, ensure_ascii=False, indent=4)

In [4]:
# Sort by "timePlayed" descending, and by "name" ascending in case of a tie
formatted_json["data"] = sorted(
    formatted_json["data"], key=lambda x: (-x["timePlayed"], x["name"]))

with open(file_path(f'Formatted_Biblioteca_byTimePlayed_{current_date}.json'), 'w', encoding='utf-8') as f:
    json.dump(formatted_json, f, ensure_ascii=False, indent=4)

# Sort by "playCount" descending, and by "name" ascending in case of a tie
formatted_json["data"] = sorted(
    formatted_json["data"], key=lambda x: (-x["playCount"], x["name"]))

with open(file_path(f'Formatted_Biblioteca_byPlayCount_{current_date}.json'), 'w', encoding='utf-8') as f:
    json.dump(formatted_json, f, ensure_ascii=False, indent=4)

In [5]:
def generate_uuid():
    return str(uuid.uuid4())


# Create a dictionary to group songs by album
albums = defaultdict(lambda: {
    "id": generate_uuid(),
    "name": "",
    "artist": "",
    "year": 0,
    "genre": "",
    "playCount": float('inf'),  # Start with a high value so it always reduces
    "timePlayed": 0,
    "tracks": []
})

# Loop through all songs in formatted_json["data"] and group them by album
for track_info in formatted_json["data"]:
    album_name = track_info["album"]
    album_artist = track_info["albumArtist"]
    album_year = track_info["year"]
    album_genre = track_info["genre"]
    play_count = track_info["playCount"]
    time_played = track_info["timePlayed"]

    # Group the song in its corresponding album
    album = albums[album_name]

    # Assign the album name, artist, year, and genre only once
    if album["name"] == "":
        album["name"] = album_name
        album["artist"] = album_artist
        album["year"] = album_year
        album["genre"] = album_genre

    # Update the playCount with the lowest value among all tracks in the album
    album["playCount"] = min(album["playCount"], play_count)

    # Add the track's timePlayed to the total timePlayed of the album
    album["timePlayed"] += time_played

    # Add the track to the album, sorted by trackNumber
    album["tracks"].append({
        "id": str(track_info["id"]),
        "name": track_info["name"],
        "duration": track_info["duration"],
        "trackNumber": track_info["trackNumber"],
        "artist": track_info["artist"],
        "album": album_name,
        # Use Artist if Album Artist doesn't exist
        "albumArtist": track_info.get("albumArtist", track_info["artist"]),
        "year": album_year,
        "genre": album_genre,
        "playCount": play_count,
        "timePlayed": time_played
    })

# Now, sort the tracks within each album by trackNumber
for album in albums.values():
    album["tracks"] = sorted(album["tracks"], key=lambda x: x["trackNumber"])

# Create the new format with the album data
formatted_album_json = {
    "data": list(albums.values())  # Convert defaultdict to list
}

# Sort albums by 'timePlayed'
formatted_album_json_by_timeplayed = {
    "data": sorted(formatted_album_json["data"], key=lambda x: x["timePlayed"], reverse=True)
}

with open(file_path(f'Formatted_Biblioteca_byAlbum_{current_date}.json'), 'w', encoding='utf-8') as f:
    json.dump(formatted_album_json_by_timeplayed,
              f, ensure_ascii=False, indent=4)

In [6]:
# Create a dictionary to hold genre information
genres = defaultdict(lambda: {
    "id": generate_uuid(),  # Generate a unique UUID for each genre
    "name": "",
    "albums": [],
    "artists": set(),  # Use a set to avoid duplicate artists
    "timePlayed": 0
})

# Grouping albums by genre
for album in formatted_album_json["data"]:
    genre_name = album["genre"]
    genre = genres[genre_name]

    # Set the genre name once (it's the same for all albums in the genre)
    if genre["name"] == "":
        genre["name"] = genre_name

    genre["albums"].append(album["name"])
    genre["artists"].add(album["artist"])
    genre["timePlayed"] += album["timePlayed"]

for genre in genres.values():
    genre["artists"] = list(genre["artists"])

# Create the final JSON structure with genres
formatted_genre_json = {
    "data": list(genres.values())  # Convert defaultdict to a list of genres
}

# Sort genres by timePlayed in descending order (highest first)
formatted_genre_json["data"] = sorted(
    formatted_genre_json["data"], key=lambda x: x["timePlayed"], reverse=True)

with open(file_path(f'Formatted_Biblioteca_byGenre_{current_date}.json'), 'w', encoding='utf-8') as f:
    json.dump(formatted_genre_json, f, ensure_ascii=False, indent=4)

In [7]:
# Create a dictionary to hold artist information
artists = defaultdict(lambda: {
    "id": generate_uuid(),  # Generate a unique UUID for each artist
    "name": "",
    "albums": [],
    "songsCount": 0,
    "timePlayed": 0
})

# Grouping songs by artist
for track_info in formatted_json["data"]:
    artist_name = track_info["artist"]
    artist = artists[artist_name]

    if artist["name"] == "":
        artist["name"] = artist_name

    # Add the album name to the albums list for this artist
    if track_info["album"] not in artist["albums"]:
        artist["albums"].append(track_info["album"])

    artist["songsCount"] += 1
    artist["timePlayed"] += track_info["timePlayed"]

# Create the final JSON structure with artists
formatted_artist_json = {
    "data": list(artists.values())  # Convert defaultdict to a list of artists
}

# Sort artists by timePlayed in descending order (highest first)
formatted_artist_json["data"] = sorted(
    formatted_artist_json["data"], key=lambda x: x["timePlayed"], reverse=True)

with open(file_path(f'Formatted_Biblioteca_byArtist_{current_date}.json'), 'w', encoding='utf-8') as f:
    json.dump(formatted_artist_json, f, ensure_ascii=False, indent=4)