In [1]:
import os
import utils
import pandas as pd
import csv

# Create Streams Frame

In [2]:
def update_tracks(path = 'output/tracks.csv'):
    '''
    Updates the main data bank of tracks to prevent redundant API calls for information that has already
    been queried for.
    '''
    
    # Updates tracks based off what was streamed and given as listening history from spotify data dump
    
    streams = utils.compile_streaming_history()
    utils.write_json(streams, 'output/streams.json')
    
    if os.path.exists(path):
        
        print('Recovering tracks...')
    
        tracks = {}
        with open(path) as csvFile:
            csvReader = csv.DictReader(csvFile)
            for rows in csvReader:
                track_key = rows['track_key']
                tracks[track_key] = rows
    
        existing_track_keys = list(tracks.keys()) 
        number_existing_track_keys = len(existing_track_keys)

        new_track_keys = set()
        for stream in streams:
            track_key = stream['trackName'] + '___' + stream['artistName']
            if track_key not in existing_track_keys:
                new_track_keys.add(track_key)
        
        number_new_track_keys = len(new_track_keys)
        
        track_keys_missing_track_objects = [track_key for track_key in existing_track_keys if 
                             tracks.get(track_key) is None]
        for track_key in track_keys_missing_track_objects:
            new_track_keys.add(track_key)
            
        number_missing_track_objects = len(new_track_keys)
        
        
        
        print(f'Recovered {number_existing_track_keys} tracks')
    
    else:
        tracks = {stream['trackName'] + '___' + stream['artistName'] : None for stream in streams}
        new_track_keys = list(tracks.keys())
        number_new_track_keys = len(new_track_keys)
    
    
    if number_new_track_keys > 0 or number_missing_track_objects > 0:
        print(f'You have streamed {number_new_track_keys} new tracks since the last update!')
        print(f'Overall, you have {number_missing_track_objects} tracks without track details')
        
        print('Connecting to Spotify to make attempt at aquiring track details now...')
        print('\n')

        token = utils.get_token()
        
        counter = 1
        for track_key in new_track_keys: 
            if counter % 100 == 0:
                token = utils.get_token()
                
            try:
                trackName, artistName = track_key.split('___')
                api_track_object = utils.get_api_track_object(trackName, artistName, token)
                
                track_dictionary = utils.create_track_dictionary(api_track_object, token)
                preview_url = track_dictionary['preview_url']
                if preview_url != None:
                    rich_feats = utils.collect_rich_features(preview_url)
                    track_dictionary.update(rich_feats)
                    tracks[track_key] = track_dictionary
                else: 
                    tracks[track_key] = None
            except:
                tracks[track_key] = None 
            counter += 1


        number_new_tracks_not_found = len([key for key in new_track_keys if tracks.get(key) == None])
        print(f'{number_missing_track_objects - number_new_tracks_not_found} new tracks were identified')
        print(f'{number_new_tracks_not_found} of {number_missing_track_objects} tracks missing track details were unable to be identified')
        
        existing_track_keys = list(tracks.keys())

    
    track_keys_missing_track_objects = [track_key for track_key in existing_track_keys if 
                             tracks.get(track_key) is None]
    number_missing_track_objects = len(track_keys_missing_track_objects)
    number_existing_track_keys = len(existing_track_keys)
    
    print(f'Of all {number_existing_track_keys} tracks, there are a total of {number_missing_track_objects} tracks still missing track details')
    print('\n')   
    percent_identified = (number_existing_track_keys - number_missing_track_objects) / number_existing_track_keys
    percent_identified = round(percent_identified * 100, 2)
    print(f'{percent_identified}% of tracks have been identified')
    print('\n')
    print('Run the script again to make a second attempt at identifying track details!')
    
    for key in list(tracks.keys()):
        if tracks.get(key) == None:
            tracks.pop(key)
    
    tracks_frame = pd.DataFrame.from_dict(tracks, orient = 'index')
    tracks_frame.to_csv('output/tracks.csv', index = False)

In [3]:
def process_streams(): 
    '''
    Processes a user's streaming history by adding Spotfiy audio features as well as calculating rich audio
    features for each track. 
    '''
    
    # Uses whats been saved in tracks.json to process the streaming history and add features etc.
    
    print('Updating identified tracks...')
    print('\n')
    update_tracks()
    print('\n')
    
    print('Processing identified streamed tracks...')
    tracks = {}
    with open('output/tracks.csv') as csvFile:
        csvReader = csv.DictReader(csvFile)
        for rows in csvReader:
            track_key = rows['track_key']
            #rows.pop('track_key')
            tracks[track_key] = rows

    streams = utils.read_json('output/streams.json')
    
    streams = utils.add_track_key_to_streams(streams)
    
    processed_streams = {}
    i = 0
    for stream in streams:
        track_key = stream['track_key']
        end_time = stream['endTime']
        ms_played = stream['msPlayed']
        track_dictionary_og = tracks.get(track_key)
        
        if track_dictionary_og != None:
            track_dictionary = track_dictionary_og.copy()
            # These may be out of order if we find issues later
            track_dictionary['track_key'] = track_key
            track_dictionary['end_time'] = end_time
            track_dictionary['ms_played'] = ms_played
            processed_streams[i] = track_dictionary
            i += 1
    
    csv_path = 'output/processed_streams.csv'
    processed_streams_frame = pd.DataFrame.from_dict(processed_streams, orient = 'index')
    processed_streams_frame.to_csv('output/processed_streams.csv', index = False)
    print(f'Processed tracks outputted to {csv_path}')
    print("Run the script again when new listening history becomes availbale to gain more information from Spotify.")

In [4]:
process_streams()

Updating identified tracks...


Recovering tracks...
Recovered 9821 tracks
You have streamed 77 new tracks since the last update!
Overall, you have 77 tracks without track details
Connecting to Spotify to make attempt at aquiring track details now...


3 new tracks were identified
74 of 77 tracks missing track details were unable to be identified
Of all 9898 tracks, there are a total of 74 tracks still missing track details


99.25% of tracks have been identified


Run the script again to make a second attempt at identifying track details!


Processing identified streamed tracks...
Processed tracks outputted to output/processed_streams.csv
Run the script again when new listening history becomes availbale to gain more information from Spotify.
