In [1]:
import os 
import sys 
import pandas as pd 
import re
import pickle
import json
import sqlite3
from pprint import pprint
import datetime
import pdb

In [2]:
sys.version

'3.6.9 (default, Oct  8 2020, 12:12:24) \n[GCC 8.4.0]'

In [3]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth

In [4]:
%load_ext autoreload
%autoreload 2

## Set up 

In [5]:
with open('../credentials/spotify_creds.pkl', 'rb') as hnd:
    credentials = pickle.load(hnd)

In [6]:
os.environ.update(credentials)

In [7]:
scope = "user-read-recently-played"

In [8]:
spotify = spotipy.Spotify(client_credentials_manager= SpotifyOAuth(scope=scope
                                                                   , username='malchemist02'))

## Functions

In [9]:
def split_utc_time_str(time_str):
    part1 = datetime.datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%S.%fZ') - datetime.timedelta(hours=4)
    date = str(part1.date())
    time = str(part1.time().strftime('%H:%M'))
    return(date, time)

In [10]:
def get_recently_played(after=None):
    recently_played = spotify.current_user_recently_played(after=after)
    # Describe json 
    print('Getting songs at ', datetime.datetime.now())
    
    if recently_played['cursors'] == None:
        return(pd.DataFrame(columns = ['name', 'artist_name', 'played_at_date', 'played_at_time'
                            , 'duration_min', 'popularity'
                            , 'song_uri', 'artist_id', 'playlist_id']))
    
    before = recently_played['cursors']['before']
    after = recently_played['cursors']['after']
    n_items = len(recently_played['items'])
    print(f'There are {n_items} songs between '
          , datetime.datetime.fromtimestamp(int(before)/1000)
          , '  and  '
          , datetime.datetime.fromtimestamp(int(after)/1000))
    
    if n_items == 0:
        return(pd.DataFrame(columns = ['name', 'artist_name', 'played_at_date', 'played_at_time'
                            , 'duration_min', 'popularity'
                            , 'song_uri', 'artist_id', 'playlist_id']))
    
    # Parse recently played output 
    songs_list = []
    wanted_keys = ['name', 'duration_ms', 'popularity', 'uri']
    for song in recently_played['items']:
        temp_song_dict = {}
        temp_song_dict['after_ts'] = after
        temp_song_dict['played_at_date'], temp_song_dict['played_at_time'] = split_utc_time_str(song['played_at'])
        for k in wanted_keys:
            temp_song_dict[k] = song['track'][k]
            
        # Take first artist information 
        temp_song_dict['artist_name'] = song['track']['artists'][0]['name'] 
        temp_song_dict['artist_id'] = song['track']['artists'][0]['uri']

        if song['context'] != None:
            temp_song_dict['playlist_id'] = song['context']['uri']
        else:
            temp_song_dict['playlist_id'] = None
        songs_list.append(temp_song_dict)

    
    # Export cleanly 
    out_series = pd.DataFrame(songs_list)
    out_series['duration_min'] = out_series['duration_ms']/60000.0
    out_series = out_series.rename({'uri':'song_uri'}, axis=1) 
    out_series = out_series[['name', 'artist_name', 'played_at_date', 'played_at_time'
                            , 'duration_min', 'popularity'
                            , 'song_uri', 'artist_id', 'playlist_id', 'after_ts']]
    
    return(out_series)

## Get time of last Spotify Pull

In [11]:
db_location = 'data/listening_history.db'

In [12]:
con = sqlite3.connect(db_location)

In [13]:
cursor = con.cursor()

In [14]:
try:
    rslt = cursor.execute('select max(after_ts) from Listening_History').fetchone()
    latest_time_pull = rslt[0]
    print("Latest pull from ", 
    datetime.datetime.fromtimestamp(int(latest_time_pull)/1000)) 
except Exception as e:
    # Mainly for first run through when table doesn't exist yet 
    ## Will take the latest recent history available 
    print(e)
    latest_time_pull = None

Latest pull from  2020-11-09 16:27:03.584000


## Pull from Spotify and Store to DB 

In [15]:
newly_played = get_recently_played(latest_time_pull)

Getting songs at  2020-11-11 14:29:24.285296
There are 44 songs between  2020-11-09 21:45:18.284000   and   2020-11-10 20:21:45.755000


In [16]:
newly_played.head()

Unnamed: 0,name,artist_name,played_at_date,played_at_time,duration_min,popularity,song_uri,artist_id,playlist_id,after_ts
0,You Will Still Be Mine,Nick Cordero,2020-11-10,16:21,2.304217,47,spotify:track:3BZYRhdrqH7jL2OGInv31y,spotify:artist:0r6iaGl6gF2uysRPWRgsxM,spotify:playlist:1vXTNJWpsSr8myE328rsDj,1605039705755
1,What Baking Can Do,Jessie Mueller,2020-11-10,14:56,3.504433,55,spotify:track:0h77yfaXxrLMLz31uNDigN,spotify:artist:4okEmrM2O6CEW6en3fRWUy,spotify:playlist:1vXTNJWpsSr8myE328rsDj,1605039705755
2,You Matter to Me,Drew Gehling,2020-11-10,12:39,4.3531,55,spotify:track:3OIoLMnnWzAUORFQxRAhdF,spotify:artist:7ejlBSnnDMNIhEcL7MN868,spotify:album:1E1tdqqLmyi03P0TJhGuw8,1605039705755
3,Bad Idea - Reprise,Drew Gehling,2020-11-10,12:35,1.054,46,spotify:track:4oUvqlTyJeSkJ0XjbkKVRk,spotify:artist:7ejlBSnnDMNIhEcL7MN868,spotify:album:1E1tdqqLmyi03P0TJhGuw8,1605039705755
4,I Didn't Plan It,Keala Settle,2020-11-10,12:34,2.661333,49,spotify:track:2HhPYh9wefr3DkFKWFLeV7,spotify:artist:7HV2RI2qNug4EcQqLbCAKS,spotify:album:1E1tdqqLmyi03P0TJhGuw8,1605039705755


In [17]:
con = sqlite3.connect(db_location)
newly_played.to_sql('Listening_History', con, if_exists='append' )

In [18]:
con.commit()
con.close()