In [1]:
import xml.etree.ElementTree as ET
tree = ET.parse('data/iTunesLibrary.xml')
root = tree.getroot()
root.tag

'plist'

In [2]:
root.attrib

{'version': '1.0'}

In [3]:
for child in root:
    print(child.tag, child.attrib)

dict {}


In [4]:
items = root.findall('./dict/key')
[item.text for item in items]

['Major Version',
 'Minor Version',
 'Date',
 'Application Version',
 'Features',
 'Show Content Ratings',
 'Music Folder',
 'Library Persistent ID',
 'Tracks',
 'Playlists']

In [5]:
lib = {}
key = ''
for index, item in enumerate(root[0]):
    if item.tag == 'key':
        key = item.text
    else:
        if key == 'Tracks':
            tracks = []
            tracklist = root[0][index]
            for t in tracklist:
                if t.tag == 'key':
                    # track id, used as key, but not needed, so skip
                    pass
                else:
                    # track details, make dict
                    track = {}
                    for track_item in t:
                        if track_item.tag == 'key':
                            item_key = track_item.text
                        elif track_item.tag == 'true':
                            item_value = True
                            track[item_key] = item_value
                        elif track_item.tag == 'false':
                            item_value = False
                            track[item_key] = item_value
                        else:
                            item_value = track_item.text
                            track[item_key] = item_value
                    tracks.append(track)
            value = tracks
        elif key == 'Playlists':
            value = None
        else:
            value = item.text
        lib[key] = value

In [6]:
len(lib['Tracks'])

20783

In [7]:
# extract interesting data
tracks = []
for track in lib['Tracks']:
    tracks.append({
        'title': track.get('Name'),
        'artist': track.get('Artist'),
        'album': track.get('Album'),
        'genre': track.get('Genre')
    })

In [8]:
# get all genres
set([track['genre']])
genres = set()
for track in tracks:
    genres.add(track['genre'])
genres

{'Acid Jazz',
 'Acoustic Blues',
 'Africa',
 'African',
 'Afro-Pop',
 'Alternative',
 'Americana',
 'Avant-Garde Jazz',
 'Big Band',
 'Bluegrass',
 'Blues',
 'Bop',
 'Chicago Blues',
 'Christian & Gospel',
 'Christmas',
 'Classic Blues',
 'Classical',
 'Contemporary Blues',
 'Contemporary Jazz',
 'Contemporary R&B',
 'Country',
 'Crossover Jazz',
 'Dance',
 'Delta Blues',
 'Devotional & Spiritual',
 'Disco',
 'Easy Listening',
 'Electric Blues',
 'Electronic',
 'Folk',
 'Funk',
 'Fusion',
 'Gospel',
 'Hard Bop',
 'Hard Rock',
 'Indian Classical',
 'Indie Rock',
 'Jazz',
 'Latin',
 'Latin Jazz',
 'Lounge',
 'Metal',
 'New Age',
 'Outlaw Country',
 'Pop',
 'Pop/Rock',
 'Punk',
 'R&B',
 'R&B/Soul',
 'Raíces',
 'Reggae',
 'Regional Indian',
 'Rock',
 'Singer/Songwriter',
 'Smooth Jazz',
 'Soul',
 'Soundtrack',
 'South Africa',
 'South America',
 'Traditional Gospel',
 'Traditional Pop',
 'Trip Hop',
 'Vocal',
 'Vocal Jazz',
 'World'}

In [9]:
genres_classical = [
 'Classical',
 'Indian Classical',
 'Regional Indian',
]

In [10]:
genres_jazz_blues = [
 'Acoustic Blues',
 'Avant-Garde Jazz',
 'Big Band',
 'Blues',
 'Bop',
 'Chicago Blues',
 'Christian & Gospel',
 'Christmas',
 'Classic Blues',
 'Contemporary Blues',
 'Contemporary Jazz',
 'Crossover Jazz',
 'Delta Blues',
 'Devotional & Spiritual',
 'Electric Blues',
 'Gospel',
 'Hard Bop',
 'Jazz',
 'Latin',
 'Latin Jazz',
 'New Age',
 'Smooth Jazz',
 'Vocal Jazz',
]

In [11]:
genres_pop = [
 'Acid Jazz',
 'Africa',
 'African',
 'Afro-Pop',
 'Contemporary R&B',
 'Dance',
 'Disco',
 'Easy Listening',
 'Electronic',
 'Funk',
 'Fusion',
 'Lounge',
 'Pop',
 'R&B',
 'R&B/Soul',
 'Raíces',
 'Reggae',
 'Soul',
 'Soundtrack',
 'South Africa',
 'South America',
 'Traditional Gospel',
 'Traditional Pop',
 'Trip Hop',
 'Vocal',
 'World'
]

In [12]:
genres_rock = [
 'Alternative',
 'Americana',
 'Bluegrass',
 'Country',
 'Folk',
 'Hard Rock',
 'Indie Rock',
 'Metal',
 'Outlaw Country',
 'Pop/Rock',
 'Punk',
 'Rock',
 'Singer/Songwriter',
]

In [13]:
print(f"total genres: {len(genres)}")
print("partial genres:", len(genres_classical) + len(genres_jazz_blues) + len(genres_pop) + len(genres_rock))

total genres: 65
partial genres: 65


In [22]:
tracks_sel = [track for track in tracks if track['genre'] in genres_jazz_blues]
len(tracks_sel)

8888

In [23]:
# configure logging
import logging
logging.basicConfig(
    filename=f'iTunes.log',
    level=logging.DEBUG, 
    filemode='w',
    format='%(asctime)s %(message)s', 
    datefmt='%m/%d/%Y %I:%M:%S %p'
)

In [24]:
# configure spotify
import spotify
sp = spotify.login(scope='playlist-modify-private')
user = sp.current_user()
user['display_name']   # check if login successful

'Mehmet Birgi'

In [25]:
# search for the tracks on spotify
import ipypb
tracks_sp_ids = set()
for track in ipypb.track(tracks_sel):
    q = f"{track['title']} {track['artist']}"
    logging.info(f"searching for: {q}")
    results = sp.search(q, type='track', market='CH')['tracks']['items']
    if results:
        spotify_id = results[0]['id']
        logging.info(f"found: {spotify_id}")
        tracks_sp_ids.add(spotify_id)
    else:
        logging.info("--- not found")

In [26]:
len(tracks_sp_ids)

6447

In [27]:
# get a playlist on spotify
playlist_id, is_new = spotify.get_playlist_by_name(sp, 
        f"iTunes Tracks: Jazz & Blues", 
        create_if_none=True)
if playlist_id and is_new:
    print(f"Created new playlist with ID {playlist_id}")
elif playlist_id:
    print(f"Found existing playlist with ID {playlist_id}")
else:
    print("Something went wrong :-(")

Created new playlist with ID 1KCwuY3tYlt8pdiYhCboOn


In [28]:
sp.user_playlist_change_details(user['id'], playlist_id, public=False,
                                description=f"Tracks from my iTunes library as of 2019, part ")

In [29]:
# add tracks to playlist
num_batches = len(tracks_sp_ids) // 100 + 1  # max 100 tracks per batch
num_tracks_added = 0
for i in ipypb.track(range(0, num_batches)):
    logging.info(f"adding batch #{i+1}")
    batch_start = i * 100
    batch_end = len(tracks_sp_ids) if (len(tracks_sp_ids) < batch_start + 100) else batch_start + 100
    logging.info(f"tracks from {batch_start} to {batch_end}")
    sp.user_playlist_add_tracks(user['id'], playlist_id, list(tracks_sp_ids)[batch_start:batch_end])
#     spotify.add_tracks(sp, playlist_id, list(tracks_sp_ids)[batch_start:batch_end])
print("finished")

finished


### Clean the library

In [166]:
set([track['genre'] for track in tracks])

{'Acid Jazz',
 'Alternative',
 'Bluegrass',
 'Blues',
 'Classical',
 'Country',
 'Disco',
 'Electronic',
 'Funk',
 'Jazz',
 'Latin',
 'Lounge',
 'Pop',
 'Punk',
 'R&B',
 'Reggae',
 'Rock',
 'Singer/Songwriter',
 'Soul',
 'Trip Hop',
 'World'}

In [165]:
for track in tracks:
#     if track['album'] == 'Entspannungsmusik Klavier - Sanfte Klaviermusik Zur Entspannung Und Regeneration Part II':
    if 'Pop/R' in track['genre']:# and track['artist'] == 'Christmas Music Santa' and track['genre'] != 'Acid Jazz':
#         track['genre'] = 'Jazz'
#         track['genre'] = 'World'
        track['genre'] = 'Rock'
#         track['genre'] = 'Soul'
#         track['genre'] = 'Blues'
#         track['genre'] = 'New Age'
        print(track)

{'title': 'The Crusher', 'artist': 'The Novas', 'album': 'Soldier Boy', 'genre': 'Rock'}


In [159]:
tracks = [track for track in tracks if track['genre'] != 'New Age']

In [170]:
len(tracks)

20780

In [169]:
# save tracks
import json
import os
import utils
folder = 'data'
filename = f"iTunes_tracks_{utils.timestamp()}.json"
if not os.path.exists(folder):
    os.mkdir(folder)
with open(os.path.join(folder, filename), 'w', encoding='utf-8') as f:
    json.dump(tracks, f, ensure_ascii=False, indent=4)