In [1]:
import os
import json
import pyperclip
import pandas as pd

In [2]:
DATA_PATH = os.path.join('../Apple Music Activity')
FILE_NAME = 'Apple Music Library Albums.json'
DOCS_PATH = os.path.join('../docs/Library')
DOC_NAME = 'LIBRARY_ALBUMS.md'

In [4]:
with open(os.path.join(DATA_PATH, FILE_NAME), 'r') as f:
    library = json.load(f)

In [5]:
len(library), type(library)

(1981, list)

In [6]:
library[0].keys()

dict_keys(['Album ID', 'Title', 'Date Created', 'Date Created In Library', 'Visible', 'Catalog Identifiers - Album'])

In [7]:
keys_set = set()
for item in library:
    keys_set.add(len(item.keys()))
keys_set

{5, 6, 8}

In [8]:
types_set = set()
for item in library:
    for key in item.keys():
        types_set.add(type(item[key]))
        
types_set

{bool, str}

In [9]:
df = pd.DataFrame(library)

In [10]:
df.shape

(1981, 8)

In [11]:
df.columns

Index(['Album ID', 'Title', 'Date Created', 'Date Created In Library',
       'Visible', 'Catalog Identifiers - Album', 'Favorite Album - Status',
       'Favorite Album - Date'],
      dtype='object')

In [12]:
df['Album ID'].nunique()

1981

In [13]:
df['Album ID'].value_counts()

Album ID
l.00DcEq9    1
l.hpHUdF8    1
l.hnwFT7b    1
l.hmk0vSb    1
l.hgtachk    1
            ..
l.LH6QMqx    1
l.LFDOFdn    1
l.LExy3K5    1
l.LDEszPi    1
l.ztDqLco    1
Name: count, Length: 1981, dtype: int64

In [14]:
df['Album ID'].isna().sum()

np.int64(0)

In [15]:
df['Title'].nunique()

1855

In [16]:
df['Title'].value_counts()

Title
                                                                95
Shang-Chi and The Legend of The Ten Rings: The Album             3
Bas Kya Ba (feat. DIVINE) - Single                               2
Die for You (Zedd Remix) - Single                                2
mainstream sellout                                               2
                                                                ..
Game of Thrones: Season 6 (Music from the HBO Series)            1
Tha Carter III                                                   1
ULTRAMAN (From The Netflix Film "Ultraman: Rising") - Single     1
Rattle - Single                                                  1
Until Now                                                        1
Name: count, Length: 1855, dtype: int64

In [17]:
df['Title'].isna().sum()

np.int64(0)

In [18]:
df[df['Title'] == '']['Title']

86       
127      
186      
199      
252      
       ..
1951     
1952     
1953     
1954     
1955     
Name: Title, Length: 95, dtype: object

In [19]:
df['Date Created'].nunique()

1620

In [20]:
df['Date Created'].value_counts()

Date Created
2023-02-15T08:07:07Z    57
2024-07-22T02:18:17Z    44
2024-07-22T02:18:20Z    31
2025-03-18T19:11:50Z    25
2022-04-28T00:59:21Z    25
                        ..
2023-11-14T21:21:39Z     1
2024-01-11T20:40:25Z     1
2021-03-15T01:10:43Z     1
2024-11-22T02:04:48Z     1
2023-11-01T06:36:09Z     1
Name: count, Length: 1620, dtype: int64

In [21]:
df['Date Created'].isna().sum()

np.int64(0)

In [22]:
df['Date Created In Library'].nunique()

1576

In [23]:
df['Date Created In Library'].value_counts()

Date Created In Library
2023-02-15T08:07:07Z    57
2024-07-22T02:18:17Z    44
2024-07-22T02:18:20Z    31
2021-06-05T17:48:05Z    30
2022-04-28T01:04:28Z    25
                        ..
2022-10-26T10:09:31Z     1
2022-09-22T07:25:22Z     1
2023-03-23T18:57:33Z     1
2024-06-09T18:36:09Z     1
2023-11-01T06:36:09Z     1
Name: count, Length: 1576, dtype: int64

In [24]:
df['Date Created In Library'].isna().sum()

np.int64(0)

In [25]:
df['Favorite Album - Date'].nunique()

2

In [26]:
df['Favorite Album - Date'].value_counts()

Favorite Album - Date
2021-03-26T01:48:22Z    1
2023-02-17T21:24:35Z    1
Name: count, dtype: int64

In [27]:
df['Favorite Album - Date'].isna().sum()

np.int64(1979)

In [28]:
df[~df['Favorite Album - Date'].isna()]['Title']

1018            Vaaqif
1323    Quest For Fire
Name: Title, dtype: object

In [29]:
df['Favorite Album - Status'].nunique()

1

In [30]:
df['Favorite Album - Status'].value_counts()

Favorite Album - Status
True    2
Name: count, dtype: int64

In [31]:
df[~df['Favorite Album - Status'].isna()]['Title']

1018            Vaaqif
1323    Quest For Fire
Name: Title, dtype: object

In [32]:
df['Catalog Identifiers - Album'].nunique()

1661

In [33]:
df['Catalog Identifiers - Album'].value_counts()

Catalog Identifiers - Album
1             77
1744026852     1
1714664204     1
840381885      1
1445010871     1
              ..
1503415446     1
1681797698     1
1777807936     1
978976413      1
1021728914     1
Name: count, Length: 1661, dtype: int64

In [34]:
df[df['Catalog Identifiers - Album'] == 1]['Title']

Series([], Name: Title, dtype: object)

In [35]:
df_non_nans = df.isna().sum(axis=1)
min_len = df_non_nans.min()
min_len_idx = df_non_nans.idxmin()

In [51]:
mdown_text = f"""
# Library Albums Data Definition

This data dictionary describes the fields in the {FILE_NAME} data. There are {df.shape[0]} rows and {df.shape[1]} columns. 

## Library Track Data

"""

'\n# Library Albums Data Definition\n\nThis data dictionary describes the fields in the Apple Music Library Albums.json data. There are 1702 rows and 8 columns. \n\n## Library Track Data\n\n'

In [56]:
def generate_markdown_table(df, min_index):
    # Helper function to map data types to more readable formats
    def map_data_type(dtype, sample_value):
        # Check for datetime
        if pd.api.types.is_datetime64_any_dtype(dtype) or (
            isinstance(sample_value, str) and pd.to_datetime(sample_value, errors='coerce') is not pd.NaT
        ):
            return "Datetime"
        # Check for Boolean (Python Boolean or string representation)
        elif isinstance(sample_value, bool) or (isinstance(sample_value, str) and sample_value.lower() in ["true", "false"]):
            return "Boolean"
        elif pd.api.types.is_string_dtype(dtype):
            return "String"
        elif pd.api.types.is_numeric_dtype(dtype):
            if pd.api.types.is_integer_dtype(dtype):
                return "Integer"
            elif pd.api.types.is_float_dtype(dtype):
                return "Float"
        elif isinstance(sample_value, list):
            return "List"
        else:
            return str(dtype)  # Fallback to original dtype if no match

    # Generate the table header
    markdown = "| Column Name | Data Type | Description | Example Value |\n"
    markdown += "|-------------|-----------|-------------|---------------|\n"

    # Iterate over DataFrame columns to generate each row
    for column in df.columns:
        sample_value = df[column].iloc[min_len_idx] if not df[column].empty else "N/A"
        data_type = map_data_type(df[column].dtype, sample_value)
        # Convert example_value to string to avoid formatting issues
        example_value = str(sample_value).replace("\n", " ").replace("|", "\\|")
        # Create a new row for each column
        markdown += f"| `{column}` | {data_type} |  | {example_value} |\n"

    return markdown

mdown_text += generate_markdown_table(df, min_len_idx)



In [57]:
def write_markdown_file(mdown_text, file_path):
    with open(file_path, 'w') as f:
        f.write(mdown_text)
    print(f"Markdown file saved to {file_path}")

In [58]:
write_markdown_file(mdown_text, os.path.join(DOCS_PATH, DOC_NAME))

Markdown file saved to ../docs/Library/LIBRARY_ALBUMS.md


In [37]:
singles = df[df['Title'].str.contains(" - Single", na=False)]

In [38]:
singles.to_csv('Singles.csv')