In [79]:
from lxml import etree
from datetime import datetime
import pandas as pd
pd.set_option('display.max_columns', None)

In [112]:
def parse_xml(xml_path):
    tracks = []
    track_dicts = []

    for event, element in etree.iterparse(xml_path):
        if element.text == 'Tracks':
            results = element.getnext()

    for element in results:
        if element.tag == 'dict':
            tracks.append(element)

    for item in tracks:
        track = {}
        for element in item:
            if element.tag == 'key':
                track[element.text] = element.getnext().text
        track_dicts.append(track)
    return track_dicts

def df_from_xml(xml_path):
    track_dicts = parse_xml(xml_path)
    new_data_df = pd.DataFrame(track_dicts)
    cols_to_drop = ['Kind', 'Size', 'Disc Number', 'Disc Count',
           'Track Number', 'Date Modified', 'Bit Rate',
           'Sample Rate', 'Artwork Count', 'Track Type', 'Location', 'File Folder Count', 
           'Library Folder Count', 'Normalization', 'Volume Adjustment', 
           'Play Date', 'Apple Music', 'Movement Name', 'Grouping',
           'Movement Number', 'Movement Count', 'Part Of Gapless Album', 'Comments', 'Album Loved', 'Matched', 'BPM',
           'Playlist Only', 'Explicit', 'Sort Composer', 'Clean', 'Protected',
           'File Type', 'Track Count']
    new_data_df = new_data_df.drop(cols_to_drop, axis=1)
    return new_data_df

def get_master_data(master_data_path):
    master_data_df = pd.read_csv(master_data_path)
    return master_data_df

def add_year_to_master(master_data_df, new_data_df):
    year = datetime.now().year
    if year == 2020:
        return master_data_df
    master_data_df['Play Count'].fillna(0, inplace=True)
    new_data_df['Play Count'].fillna(0, inplace=True)
    master_data_df[f'{year} Play Count'] = new_data_df['Play Count'].astype('int32') - master_data_df['Play Count'].astype('int32')
    return master_data_df



In [108]:
master_data_path = '/Users/kevinmarlis/Developer/Music Analytics/AM_analytics.csv'
xml_path = '/Users/kevinmarlis/Developer/Music Analytics/Library.xml'

master_data_df = get_master_data(master_data_path)
new_data_df = df_from_xml(xml_path)

In [113]:
master_data_df = add_year_to_master(master_data_df, new_data_df)

In [None]:
# End of year analytics

In [68]:
# Total Time is in milliseconds. Total Play Time is in minutes
played_in_2020 = df[df['Play Date UTC']> '2020'].sort_values(by=['Play Date UTC'])
played_in_2020['Total Play Time'] = played_in_2020['Play Count'].astype('int32') * played_in_2020['Total Time'].astype('int32') / 60000

In [69]:
played_in_2020.sort_values(by=['Total Play Time'], ascending=False)

Unnamed: 0,Track ID,Name,Artist,Album Artist,Album,Work,Genre,Total Time,Year,Date Added,Play Count,Play Date UTC,Sort Album,Sort Album Artist,Sort Artist,Persistent ID,Skip Count,Skip Date,Sort Name,Composer,Release Date,Compilation,2020 Play Count,Total Play Time
13426,29918,Mothership Connection (Star Child),Parliament,Parliament,Mothership Connection,,Funk,373267,1975,2020-02-26T19:04:45Z,18,2020-11-29T18:58:30Z,Mothership Connection,,Parliament,044A087DE8630DCB,1,2020-06-13T18:04:18Z,Mothership Connection (Star Child),"George Clinton, Jr., William Earl Collins & Be...",1975-12-15T12:00:00Z,,18,111.980100
15226,33535,Get Down,Curtis Mayfield,Curtis Mayfield,Roots,,R&B/Soul,348333,1971,2020-08-19T21:24:54Z,12,2020-11-29T19:17:21Z,Roots,,Curtis Mayfield,BAE668CBAB063CBF,,,Get Down,Curtis Mayfield,1971-10-01T07:00:00Z,,12,69.666600
15225,33533,Beautiful Brother of Mine,Curtis Mayfield,Curtis Mayfield,Roots,,R&B/Soul,447200,1971,2020-08-19T21:24:54Z,9,2020-11-29T19:11:32Z,Roots,,Curtis Mayfield,85C39B286D15E618,,,Beautiful Brother of Mine,Curtis Mayfield,1971-10-01T07:00:00Z,,9,67.080000
1784,6618,who sees you,My Bloody Valentine,,M B V,,Shoegaze,372271,2013,2016-04-20T06:11:29Z,9,2020-12-29T01:47:14Z,,,,7EF3735E606D682B,,2018-12-08T05:40:25Z,,,,,9,55.840650
13537,30140,Genius of Love,Tom Tom Club,Tom Tom Club,Tom Tom Club,,Pop,334353,1981,2020-03-11T03:48:17Z,10,2020-11-29T19:04:05Z,Tom Tom Club,,Tom Tom Club,0F40FE20931C38A5,,,Genius of Love,"Tom Tom Club, Chris Frantz & Tina Weymouth",1981-10-01T12:00:00Z,,10,55.725500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13081,29228,creation day the travel flute way,Broadcast,,Mother is the MIlky Way,,Space,33959,2009,2018-03-26T03:45:21Z,1,2020-07-29T01:12:36Z,,,,8610882D67213708,,,,,,,1,0.565983
10874,24798,Ostrich & Chirping,Elliott Smith,Elliott Smith,From A Basement On The Hill,,Alternative,33926,2004,2017-12-23T19:49:58Z,1,2020-06-30T22:06:09Z,From A Basement On The Hill,,Elliott Smith,967B7F6A583A29B6,,2019-01-01T19:18:43Z,Ostrich & Chirping,,2004-10-18T12:00:00Z,,1,0.565433
3698,10446,You're Not An Airplane,Guided By Voices,,Bee Thousand,,Lo-Fi,33358,1994,2009-12-11T09:37:32Z,1,2020-09-19T23:07:46Z,,,,C43AEADF0B493487,,2017-12-25T07:36:29Z,,Tobin Sprout,,,1,0.555967
13080,29226,growing backwards,Broadcast,,Mother is the MIlky Way,,Space,19670,2009,2018-03-26T03:45:21Z,1,2020-06-21T00:07:32Z,,,,C50BC7A40955E48E,,2018-03-30T21:47:30Z,,,,,1,0.327833
