In [2]:
import os
import json
import pyperclip
import pandas as pd

In [3]:
DATA_PATH = os.path.join('../Apple Music Activity')
FILE_NAME = 'Apple Music Library Playlists.json'
DOCS_PATH = os.path.join('../docs/Library')
DOC_NAME = 'LIBRARY_PLAYLISTS.md'

In [4]:
with open(os.path.join(DATA_PATH, FILE_NAME), 'r') as f:
    library = json.load(f)

In [5]:
len(library), type(library)

(106, list)

In [6]:
library[0].keys()

dict_keys(['Container Type', 'Container Identifier', 'Title', 'Playlist Item Identifiers', 'Favorite Status - Playlist', 'Favorite Date - Playlist', 'Added Date', 'Name or Description Modified Date', 'Playlist Items Modified Date'])

In [7]:
keys_set = set()
for item in library:
    keys_set.add(len(item.keys()))
keys_set

{6, 7, 8, 9, 10, 11, 12}

In [8]:
types_set = set()
list_type_keyset = set()
for item in library:
    for key in item.keys():
        types_set.add(type(item[key]))
        if type(item[key]) == list:
            list_type_keyset.add(key)
        
types_set, list_type_keyset

({bool, int, list, str}, {'Playlist Item Identifiers'})

In [9]:
df = pd.DataFrame(library)

In [10]:
df.shape

(106, 14)

In [11]:
df.columns

Index(['Container Type', 'Container Identifier', 'Title',
       'Playlist Item Identifiers', 'Favorite Status - Playlist',
       'Favorite Date - Playlist', 'Added Date',
       'Name or Description Modified Date', 'Playlist Items Modified Date',
       'Public Playlist Identifier', 'Playlist Is Shared',
       'Playlist Previously Shared', 'Description',
       'Available On Apple Music Profile'],
      dtype='object')

In [12]:
df['Container Type'].value_counts()

Container Type
Playlist               53
Subscribed Playlist    52
Favorites Playlist      1
Name: count, dtype: int64

In [13]:
df['Container Type'].isna().sum()

0

In [14]:
df['Container Identifier'].isna().sum()

0

In [15]:
df['Container Identifier'].value_counts()

Container Identifier
2            1
256004113    1
256004105    1
256004101    1
256004097    1
            ..
256001801    1
256001797    1
256001793    1
256001553    1
256005633    1
Name: count, Length: 106, dtype: int64

In [16]:
df['Title'].isna().sum()

52

In [17]:
df[df['Title'].isna()]

Unnamed: 0,Container Type,Container Identifier,Title,Playlist Item Identifiers,Favorite Status - Playlist,Favorite Date - Playlist,Added Date,Name or Description Modified Date,Playlist Items Modified Date,Public Playlist Identifier,Playlist Is Shared,Playlist Previously Shared,Description,Available On Apple Music Profile
1,Subscribed Playlist,256000261,,,,,2021-03-18T08:48:19Z,2021-03-18T08:48:19Z,2021-03-18T08:48:19Z,pl.u-9N9L8abt7ArvWB,True,True,,
2,Subscribed Playlist,256000265,,,,,2021-04-16T18:37:21Z,2021-04-16T18:37:21Z,2021-04-16T18:37:21Z,pl.10fc76a3edc14e759deb60535854e339,True,True,,
3,Subscribed Playlist,256000269,,,,,2021-04-18T06:30:46Z,2021-04-18T06:30:46Z,2021-04-18T06:30:46Z,pl.0b593f1142b84a50a2c1e7088b3fb683,True,True,,
4,Subscribed Playlist,256000273,,,,,2021-04-21T17:39:04Z,2021-04-21T17:39:04Z,2021-04-21T17:39:04Z,pl.299001f6c528460797ea61bac6522a81,True,True,,
5,Subscribed Playlist,256000513,,,,,2021-05-08T22:28:39Z,2021-05-08T22:28:39Z,2021-05-08T22:28:39Z,pl.5ee8333dbe944d9f9151e97d92d1ead9,True,True,,
10,Subscribed Playlist,256000769,,,,,2021-06-18T17:13:00Z,2021-06-18T17:13:00Z,2021-06-18T17:13:00Z,pl.6f8f8462368a46d984d0e2e61ac27c3c,True,True,,
11,Subscribed Playlist,256000773,,,,,2021-06-18T17:21:49Z,2021-06-18T17:21:49Z,2021-06-18T17:21:49Z,pl.cc74a5aec23942da9cf083c6c4344aee,True,True,,
12,Subscribed Playlist,256000777,,,,,2021-06-23T07:08:33Z,2021-06-23T07:08:33Z,2021-06-23T07:08:33Z,pl.u-KVXBkl1ILAGrb4P,True,True,,
13,Subscribed Playlist,256000781,,,,,2021-06-30T16:43:08Z,2021-06-30T16:43:08Z,2021-06-30T16:43:08Z,pl.a0c765aa555e457c9666b2a201de5506,True,True,,
14,Subscribed Playlist,256000785,,,,,2021-06-30T16:45:04Z,2021-06-30T16:45:04Z,2021-06-30T16:45:04Z,pl.c03f886929854b5c99d44266b5c31b61,True,True,,


In [18]:
df[df['Title'].isna()]['Container Type'].value_counts()

Container Type
Subscribed Playlist    52
Name: count, dtype: int64

In [19]:
df[df['Container Type'] == 'Favorites Playlist']['Title']

0    Favorite Songs
Name: Title, dtype: object

In [20]:
df[df['Favorite Status - Playlist'] == True]['Title']

0     Favorite Songs
18               NaN
Name: Title, dtype: object

In [21]:
df_personal = df[df['Container Type'] == 'Playlist']

In [22]:
df_personal.shape

(53, 14)

In [23]:
df_personal['Description'].isna().sum()

10

In [24]:
df_personal['Title'].isna().sum()

0

In [25]:
df_personal[df_personal['Playlist Is Shared'] == False]['Title']

8              My Shazam Tracks
95     TRANSITION. No crossfade
97                     desi DnB
102                    DnB 🥁 SJ
103                    DnB 🥁 SJ
104                    DnB 🥁 SJ
Name: Title, dtype: object

In [26]:
df_personal[df_personal['Playlist Previously Shared'] == True]['Title']

6                              Bathroom 
7                                   Club
19               13 reasons why personal
22                     Untitled Playlist
23                     Untitled Playlist
25                           Seedhe Maut
26                           Linkin Park
29                          Non Hip-Hop 
30                                   DHH
31                        Hip-Hop / Trap
32                         Hip-Hop Outro
34                                   Alt
35                                   Emo
36                  Beat Switch Symphony
37     The Weeknd - After Hours Til Dawn
38                                     H
39        Atlantic Beats: Trap meets EDM
44                 Best Intros (Hip-Hop)
48                Best remixes (Hip-Hop)
57                          Indian Drill
58                     Relax on Hip-Hop 
59                          Jersey Club 
73                     Gujarati Hip-Hop 
75             Dark&Scary Beats by Metro
83              

In [27]:
df_personal[df_personal['Available On Apple Music Profile'] == True]['Title']

30                                   DHH
31                        Hip-Hop / Trap
32                         Hip-Hop Outro
34                                   Alt
35                                   Emo
37     The Weeknd - After Hours Til Dawn
44                 Best Intros (Hip-Hop)
48                Best remixes (Hip-Hop)
57                          Indian Drill
58                     Relax on Hip-Hop 
59                          Jersey Club 
71                   Piano Hip-Hop Tunes
72                              Flute 🪈🪤
73                     Gujarati Hip-Hop 
75             Dark&Scary Beats by Metro
90                       Guitar Hip-Hop 
92                       Wake UP at 5 AM
93                               Elevate
94                           Rap Revival
98                       MikeDeanMagic 🌵
100                           3AreLegend
104                             DnB 🥁 SJ
Name: Title, dtype: object

In [28]:
df['Favorite Date - Playlist'].value_counts()

Favorite Date - Playlist
2023-10-31T01:45:27Z    1
Name: count, dtype: int64

In [29]:
df[df['Favorite Status - Playlist'] == True]['Title']   

0     Favorite Songs
18               NaN
Name: Title, dtype: object

In [30]:
df_non_nans = df.isna().sum(axis=1)
min_len = df_non_nans.min()
min_len_idx = df_non_nans.idxmin()

In [41]:
mdown_text = f"""
# Library Playlists Data Definition

This data dictionary describes the fields in the {FILE_NAME} data. There are {df.shape[0]} rows and {df.shape[1]} columns. 

## Library Playlists Data

"""

In [42]:
def generate_markdown_table(df, min_index):
    # Helper function to map data types to more readable formats
    def map_data_type(dtype, sample_value):
        # Check for datetime
        if pd.api.types.is_datetime64_any_dtype(dtype) or (
            isinstance(sample_value, str) and pd.to_datetime(sample_value, errors='coerce') is not pd.NaT
        ):
            return "Datetime"
        # Check for Boolean (Python Boolean or string representation)
        elif isinstance(sample_value, bool) or (isinstance(sample_value, str) and sample_value.lower() in ["true", "false"]):
            return "Boolean"
        elif pd.api.types.is_string_dtype(dtype):
            return "String"
        elif pd.api.types.is_numeric_dtype(dtype):
            if pd.api.types.is_integer_dtype(dtype):
                return "Integer"
            elif pd.api.types.is_float_dtype(dtype):
                return "Float"
        elif isinstance(sample_value, list):
            return "List"
        else:
            return str(dtype)  # Fallback to original dtype if no match

    # Generate the table header
    markdown = "| Column Name | Data Type | Description | Example Value |\n"
    markdown += "|-------------|-----------|-------------|---------------|\n"

    # Iterate over DataFrame columns to generate each row
    for column in df.columns:
        sample_value = df[column].iloc[min_len_idx] if not df[column].empty else "N/A"
        data_type = map_data_type(df[column].dtype, sample_value)
        # Convert example_value to string to avoid formatting issues
        example_value = str(sample_value).replace("\n", " ").replace("|", "\\|")
        # Create a new row for each column
        markdown += f"| `{column}` | {data_type} |  | {example_value} |\n"

    return markdown

mdown_text += generate_markdown_table(df, min_len_idx)



In [46]:
def write_markdown_file(mdown_text, file_path):
    with open(file_path, 'w') as f:
        f.write(mdown_text)
    print(f"Markdown file saved to {file_path}")

In [45]:
write_markdown_file(mdown_text, os.path.join(DOCS_PATH, DOC_NAME))

Markdown file saved to ../docs/Library/LIBRARY_PLAYLISTS.md


In [31]:
df['Public Playlist Identifier'].isna().sum()

17

In [32]:
df[df['Public Playlist Identifier'].isna()]['Title']

0                            Favorite Songs
8                          My Shazam Tracks
9                                      Temp
17                                      you
40                                 Gujarati
53                                  Mixtape
56     Lil Baby & Young Thug - It's Only Me
68                                 Mask Off
69                         R&B and Hip-Hop 
70                         i hate palo alto
79                         Hip-Hop top 2023
80                          Hip-Hop bangers
81                                        L
95                 TRANSITION. No crossfade
97                                 desi DnB
102                                DnB 🥁 SJ
103                                DnB 🥁 SJ
Name: Title, dtype: object

### Merge table with Library Tracks to get title of songs in the playlist

In [33]:
def load_data(FILE_PATH):
    with open(FILE_PATH, 'r') as f:
        data = json.load(f)
    return pd.DataFrame(data)

In [34]:
LIBRARY_TRACK_FILE = 'Apple Music Library Tracks.json'
df_tracks = load_data(os.path.join(DATA_PATH, LIBRARY_TRACK_FILE))

In [35]:
df_tracks.shape

(2761, 52)

In [36]:
df_exploded = df.explode('Playlist Item Identifiers')

In [37]:
df_exploded.shape

(2467, 14)

In [38]:
df_exploded.head()

Unnamed: 0,Container Type,Container Identifier,Title,Playlist Item Identifiers,Favorite Status - Playlist,Favorite Date - Playlist,Added Date,Name or Description Modified Date,Playlist Items Modified Date,Public Playlist Identifier,Playlist Is Shared,Playlist Previously Shared,Description,Available On Apple Music Profile
0,Favorites Playlist,2,Favorite Songs,182887450,True,2023-10-31T01:45:27Z,2023-04-26T04:35:27Z,2023-04-26T04:35:27Z,2024-09-27T04:56:46Z,,,,,
0,Favorites Playlist,2,Favorite Songs,182907166,True,2023-10-31T01:45:27Z,2023-04-26T04:35:27Z,2023-04-26T04:35:27Z,2024-09-27T04:56:46Z,,,,,
0,Favorites Playlist,2,Favorite Songs,182896434,True,2023-10-31T01:45:27Z,2023-04-26T04:35:27Z,2023-04-26T04:35:27Z,2024-09-27T04:56:46Z,,,,,
0,Favorites Playlist,2,Favorite Songs,182906674,True,2023-10-31T01:45:27Z,2023-04-26T04:35:27Z,2023-04-26T04:35:27Z,2024-09-27T04:56:46Z,,,,,
0,Favorites Playlist,2,Favorite Songs,182905398,True,2023-10-31T01:45:27Z,2023-04-26T04:35:27Z,2023-04-26T04:35:27Z,2024-09-27T04:56:46Z,,,,,


In [39]:
df_tracks['Track Identifier'].nunique()

2761

In [40]:
df_with_tracks = pd.merge(df_exploded, df_tracks[['Track Identifier', 'Title']], how='left', left_on='Playlist Item Identifiers', right_on='Track Identifier')

In [41]:
df_with_tracks.head()

Unnamed: 0,Container Type,Container Identifier,Title_x,Playlist Item Identifiers,Favorite Status - Playlist,Favorite Date - Playlist,Added Date,Name or Description Modified Date,Playlist Items Modified Date,Public Playlist Identifier,Playlist Is Shared,Playlist Previously Shared,Description,Available On Apple Music Profile,Track Identifier,Title_y
0,Favorites Playlist,2,Favorite Songs,182887450,True,2023-10-31T01:45:27Z,2023-04-26T04:35:27Z,2023-04-26T04:35:27Z,2024-09-27T04:56:46Z,,,,,,182887450.0,Forget It
1,Favorites Playlist,2,Favorite Songs,182907166,True,2023-10-31T01:45:27Z,2023-04-26T04:35:27Z,2023-04-26T04:35:27Z,2024-09-27T04:56:46Z,,,,,,182907166.0,KILL DEM
2,Favorites Playlist,2,Favorite Songs,182896434,True,2023-10-31T01:45:27Z,2023-04-26T04:35:27Z,2023-04-26T04:35:27Z,2024-09-27T04:56:46Z,,,,,,182896434.0,PROBLEMATIC
3,Favorites Playlist,2,Favorite Songs,182906674,True,2023-10-31T01:45:27Z,2023-04-26T04:35:27Z,2023-04-26T04:35:27Z,2024-09-27T04:56:46Z,,,,,,182906674.0,F*CK WHAT THEY SAY
4,Favorites Playlist,2,Favorite Songs,182905398,True,2023-10-31T01:45:27Z,2023-04-26T04:35:27Z,2023-04-26T04:35:27Z,2024-09-27T04:56:46Z,,,,,,182905398.0,where will i be


In [42]:
df['Title'].unique()

array(['Favorite Songs', nan, 'Bathroom ', 'Club', 'My Shazam Tracks',
       'Temp', 'you', '13 reasons why personal', 'Untitled Playlist',
       'Seedhe Maut', 'Linkin Park', 'Non Hip-Hop ', 'DHH',
       'Hip-Hop / Trap', 'Hip-Hop Outro', 'Alt', 'Emo',
       'Beat Switch Symphony', 'The Weeknd - After Hours Til Dawn', 'H',
       'Atlantic Beats: Trap meets EDM', 'Gujarati',
       'Best Intros (Hip-Hop)', 'Best remixes (Hip-Hop)', 'Mixtape',
       "Lil Baby & Young Thug - It's Only Me", 'Indian Drill',
       'Relax on Hip-Hop ', 'Jersey Club ', 'Mask Off',
       'R&B and Hip-Hop ', 'i hate palo alto', 'Piano Hip-Hop Tunes',
       'Flute \U0001fa88🪤', 'Gujarati Hip-Hop ',
       'Dark&Scary Beats by Metro', 'Hip-Hop top 2023', 'Hip-Hop bangers',
       'L', 'Library: Upcoming', 'Video Songs',
       '2016 XXL freshman cypher ', 'Guitar Hip-Hop ', 'Wake UP at 5 AM',
       'Elevate', 'Rap Revival', 'TRANSITION. No crossfade', 'desi DnB',
       'MikeDeanMagic 🌵', '3AreLegend', 

In [43]:
list(df_with_tracks[df_with_tracks['Title_x'] == 'MikeDeanMagic 🌵']['Title_y'])

['HIGHEST IN THE ROOM',
 'For The Night (feat. Lil Baby & DaBaby)',
 '90210 (feat. Kacy Hill)',
 'Ni**as in Paris',
 'goosebumps',
 '5% TINT',
 'Stronger',
 'pick up the phone',
 'NO BYSTANDERS',
 'STOP TRYING TO BE GOD',
 'Father Stretch My Hands, Pt. 1',
 'SICKO MODE (Skrillex Remix)',
 'SICKO MODE',
 'Panda',
 'Power',
 'BUTTERFLY EFFECT',
 'Antidote',
 'HYAENA',
 'Watch (feat. Lil Uzi Vert & Kanye West)',
 'STARGAZING',
 'Hurricane',
 "CAN'T SAY",
 'FE!N (feat. Playboi Carti)',
 'YOSEMITE',
 'THANK GOD',
 'MY EYES',
 'FRANCHISE (feat. Young Thug & M.I.A.)',
 'Power is Power',
 'The Plan (From the Motion Picture "TENET")',
 'All of the Lights',
 'Am I Dreaming',
 'Young Metro',
 'Popular (feat. Playboi Carti) [Music from the HBO Original Series The Idol]',
 'Double Fantasy (feat. Future)',
 'TOP FLOOR (feat. Travis Scott)',
 'Annihilate (Spider-Man: Across the Spider-Verse)',
 'Flocky Flocky (feat. Travis Scott)',
 'Oh Me Oh My (feat. Travis Scott, Migos & G4shi)',
 'Never Sleep (fe

In [44]:
df_exploded.shape, df_with_tracks.shape

((2467, 14), (2467, 16))

In [45]:
df_with_tracks['Title_y'].isna().sum()

55

In [46]:
df_with_tracks[df_with_tracks['Title_y'] == 'Mixtape']['Title_x'].unique()

array([], dtype=object)

In [47]:
df[df['Title'] == 'Mixtape']

Unnamed: 0,Container Type,Container Identifier,Title,Playlist Item Identifiers,Favorite Status - Playlist,Favorite Date - Playlist,Added Date,Name or Description Modified Date,Playlist Items Modified Date,Public Playlist Identifier,Playlist Is Shared,Playlist Previously Shared,Description,Available On Apple Music Profile
53,Playlist,256002829,Mixtape,"[182880802, 182870558, 182880806, 182880798]",,,2022-12-20T18:50:39Z,2022-12-20T18:50:40Z,2022-12-20T18:50:40Z,,,,,


In [48]:
df_exploded[df_exploded['Title'] == 'Mixtape']

Unnamed: 0,Container Type,Container Identifier,Title,Playlist Item Identifiers,Favorite Status - Playlist,Favorite Date - Playlist,Added Date,Name or Description Modified Date,Playlist Items Modified Date,Public Playlist Identifier,Playlist Is Shared,Playlist Previously Shared,Description,Available On Apple Music Profile
53,Playlist,256002829,Mixtape,182880802,,,2022-12-20T18:50:39Z,2022-12-20T18:50:40Z,2022-12-20T18:50:40Z,,,,,
53,Playlist,256002829,Mixtape,182870558,,,2022-12-20T18:50:39Z,2022-12-20T18:50:40Z,2022-12-20T18:50:40Z,,,,,
53,Playlist,256002829,Mixtape,182880806,,,2022-12-20T18:50:39Z,2022-12-20T18:50:40Z,2022-12-20T18:50:40Z,,,,,
53,Playlist,256002829,Mixtape,182880798,,,2022-12-20T18:50:39Z,2022-12-20T18:50:40Z,2022-12-20T18:50:40Z,,,,,


In [49]:
df_with_tracks[df_with_tracks['Title_x'] == 'Mixtape']

Unnamed: 0,Container Type,Container Identifier,Title_x,Playlist Item Identifiers,Favorite Status - Playlist,Favorite Date - Playlist,Added Date,Name or Description Modified Date,Playlist Items Modified Date,Public Playlist Identifier,Playlist Is Shared,Playlist Previously Shared,Description,Available On Apple Music Profile,Track Identifier,Title_y
1841,Playlist,256002829,Mixtape,182880802,,,2022-12-20T18:50:39Z,2022-12-20T18:50:40Z,2022-12-20T18:50:40Z,,,,,,182880802.0,Meri Pant Bhi Sexy
1842,Playlist,256002829,Mixtape,182870558,,,2022-12-20T18:50:39Z,2022-12-20T18:50:40Z,2022-12-20T18:50:40Z,,,,,,182870558.0,Way 2 Sexy (feat. Future & Young Thug)
1843,Playlist,256002829,Mixtape,182880806,,,2022-12-20T18:50:39Z,2022-12-20T18:50:40Z,2022-12-20T18:50:40Z,,,,,,182880806.0,Sanam Re
1844,Playlist,256002829,Mixtape,182880798,,,2022-12-20T18:50:39Z,2022-12-20T18:50:40Z,2022-12-20T18:50:40Z,,,,,,182880798.0,Whoopty


In [50]:
df_tracks['Playlist Only Track'].value_counts()

Playlist Only Track
True     300
False    173
Name: count, dtype: int64

In [51]:
df_tracks.shape

(2761, 52)

In [52]:
list(df_tracks[df_tracks['Playlist Only Track'] == True]['Title'])

['2 Phút Hơn (KAIZ Remix)',
 'UCLA',
 "I Don't Like (feat. Lil Reese)",
 'Everytime We Touch (Hardwell & Maurice West Remix)',
 'Everytime We Touch (Radio Edit)',
 'Opus (Four Tet Remix)',
 'Riot Gear',
 'Hall of Fame (feat. will.i.am)',
 'Friendships',
 "It's Time",
 'Feelings',
 'Eat the Beat',
 'Then Leave (feat. Queendome Come)',
 'Boom, Boom, Boom, Boom!!',
 'Body (feat. brando) [Mixed]',
 "Day N' Night (feat. ZHIKO)",
 'In My Moog (The Art of Seq)',
 'Trump It',
 'Yonaguni',
 'Body (Dzeko Extended Remix)',
 'Body (Dzeko Remix)',
 'Laser House',
 'FL.EX (Flow Experience)',
 'Roses',
 'Body (feat. brando) [Chus & Ceballos Remix]',
 'Body (feat. brando) [Live]',
 'Swing',
 'Poker Face (Space Cowboy Remix)',
 "What's Next",
 'Moment 4 Life (feat. Drake)',
 'seaside_demo',
 'Hearts on Fire',
 'How Do I Make You Love Me?',
 "Can't Feel My Face",
 'Or Nah (feat. The Weeknd, Wiz Khalifa and DJ Mustard) [Remix]',
 'Party Monster',
 'Faith',
 'I Feel It Coming (feat. Daft Punk)',
 'Is Ther

In [53]:
list(df_tracks[df_tracks['Playlist Only Track'] == False]['Title'])

['Pray For Me',
 'Lucid Dreams',
 'ROCKSTAR (feat. Roddy Ricch)',
 'Lemonade (feat. NAV)',
 'Mask Off (Marshmello Remix)',
 'Save Your Tears',
 'Like I Do',
 'Blessings (feat. Drake)',
 'Mo Bamba',
 'Centuries',
 'More Than You Know',
 'The Box',
 'Forbidden Voices',
 'Stay A While',
 'Travis Scott  INTERSTELLAR ft Frank Ocean Hans Zimmer',
 'INTERSTELLAR',
 'goosebumps (Time Remix)',
 'WhatsApp Audio 2021-11-25 at 7.05.22 PM',
 'Never Say Goodbye',
 "We'll Be Coming Yee (Hardwell",
 'The Island (Tiesto Remix)',
 'Stereo Love (Extend Version)',
 'Lose Myself vs. Fifteen (Hardw',
 'Dare You (Extended Mix)',
 'If I Lose Myself (Alesso vs OneRepublic)',
 'How We Do vs. Summertime Sadne',
 'Welcome To The Jungle vs. Lepr',
 'Live For The Spaceman (Hardwel',
 'Pallaroid vs. Arise vs. I Coul',
 'Apollo (Hardwell Private Conce',
 'The Code vs. Alive vs. Harder',
 'Years vs. Cannonball vs. Anima',
 'Jumper',
 'Alive Thunder (Hardwell Mashup',
 'Avicii - Silhouettes',
 'Calvin Harris feat. Elli

In [54]:
df['Container Identifier'].isna().sum()

0

In [55]:
df['Container Identifier'].nunique()

106

In [56]:
df.shape

(106, 14)

In [57]:
# Rename Title_x to Title and Title_y to Tracks Title
df_with_tracks.rename(columns={'Title_x': 'Title', 'Title_y': 'Tracks Title'}, inplace=True)

In [58]:
df_with_tracks.columns

Index(['Container Type', 'Container Identifier', 'Title',
       'Playlist Item Identifiers', 'Favorite Status - Playlist',
       'Favorite Date - Playlist', 'Added Date',
       'Name or Description Modified Date', 'Playlist Items Modified Date',
       'Public Playlist Identifier', 'Playlist Is Shared',
       'Playlist Previously Shared', 'Description',
       'Available On Apple Music Profile', 'Track Identifier', 'Tracks Title'],
      dtype='object')

In [59]:
df_with_tracks['Title']

0       Favorite Songs
1       Favorite Songs
2       Favorite Songs
3       Favorite Songs
4       Favorite Songs
             ...      
2462          DnB 🥁 SJ
2463          DnB 🥁 SJ
2464          DnB 🥁 SJ
2465          DnB 🥁 SJ
2466               NaN
Name: Title, Length: 2467, dtype: object

In [60]:
df_with_tracks['Tracks Title']

0                            Forget It
1                             KILL DEM
2                          PROBLEMATIC
3                   F*CK WHAT THEY SAY
4                      where will i be
                     ...              
2462                        Eyes on Me
2463                          Mr Happy
2464                       Click Clack
2465    Laserbeam (Blanke's ÆON:REMIX)
2466                               NaN
Name: Tracks Title, Length: 2467, dtype: object

In [65]:
# Automatically create the aggregation dictionary
aggregation_dict = {col: 'first' for col in df_with_tracks.columns if col not in ['Title', 'Tracks Title', 'Playlist Item Identifiers']}
aggregation_dict['Tracks Title'] = list
aggregation_dict['Playlist Item Identifiers'] = list

# Group by 'playlist_id' and apply the aggregation
df_merged = df_with_tracks.groupby('Title').agg(aggregation_dict).reset_index()


In [66]:
df_merged.head()

Unnamed: 0,Title,Container Type,Container Identifier,Favorite Status - Playlist,Favorite Date - Playlist,Added Date,Name or Description Modified Date,Playlist Items Modified Date,Public Playlist Identifier,Playlist Is Shared,Playlist Previously Shared,Description,Available On Apple Music Profile,Track Identifier,Tracks Title,Playlist Item Identifiers
0,13 reasons why personal,Playlist,256001041,,,2021-08-06T06:45:18Z,2021-08-06T06:45:56Z,2021-08-06T06:46:18Z,pl.u-aZb0N64tPBe640B,True,True,13 reasons why personal,,182863410.0,"[f**k, i'm lonely (feat. Anne-Marie), The Nigh...","[182863410, 182863878, 182863158, 182863654, 1..."
1,2016 XXL freshman cypher,Playlist,256004625,,,2024-01-04T06:53:44Z,2024-01-04T06:54:58Z,2024-01-04T06:54:58Z,pl.u-mJy88aPtzX3P17X,True,True,"Denzel Curry, Lil Uzi Vert, Lil Yachty, 21 Sav...",,182890250.0,[Yessirskiii],[182890250]
2,3AreLegend,Playlist,256005377,,,2024-05-03T05:42:39Z,2024-05-03T05:42:43Z,2024-05-03T05:50:42Z,pl.u-76oNP26CW5l6z05,True,True,,True,182874902.0,"[Bubbly (with Drake & Travis Scott), Nanchaku,...","[182874902, 182862890, 182890290, 182879514, 1..."
3,Alt,Playlist,256001809,,,2022-04-29T09:44:09Z,2022-04-29T09:44:10Z,2022-07-21T00:14:13Z,pl.u-e98lkMLizxJLqDx,True,True,,True,182872850.0,"[When I R.I.P., All for Us (From the HBO Origi...","[182872850, 182872342, 182872326, 182872322, 1..."
4,Atlantic Beats: Trap meets EDM,Playlist,256002065,,,2022-09-10T22:32:38Z,2023-05-25T20:01:24Z,2023-10-17T18:27:54Z,pl.u-9N9LX83Cx9vPYq9,True,True,Collaboration of the both sides of Atlantic.,,182871350.0,"[SICKO MODE (Skrillex Remix), HUMBLE. (SKRILLE...","[182871350, 182878214, 182861870, 182885150, 1..."
