In [1]:
import json
from authorization import SpotifyAuth

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [4]:
with open('client-secrets-rnr.json','r+') as secrets_file:
    secrets = json.load(secrets_file)

client_id = secrets["client_id"]
client_secret = secrets["client_secret"]
redirect_url = secrets["redirect_url"]

In [5]:
spotify_authorisation = SpotifyAuth(client_id=client_id, client_secret=client_secret, redirect_url=redirect_url)
spotify_authorisation.set_access_token_from_file('access_token.json')

# Get User Details

In [6]:
import spotify_functions as spotify

In [7]:
spotify.get_user_details(spotify_authorisation.get_access_token())

{'display_name': 'sdspot2034',
 'external_urls': {'spotify': 'https://open.spotify.com/user/sdspot2034'},
 'href': 'https://api.spotify.com/v1/users/sdspot2034',
 'id': 'sdspot2034',
 'images': [{'url': 'https://i.scdn.co/image/ab67757000003b8280153af8b79b3144b24f1291',
   'height': 64,
   'width': 64},
  {'url': 'https://i.scdn.co/image/ab6775700000ee8580153af8b79b3144b24f1291',
   'height': 300,
   'width': 300}],
 'type': 'user',
 'uri': 'spotify:user:sdspot2034',
 'followers': {'href': None, 'total': 9},
 'country': 'IN',
 'product': 'premium',
 'explicit_content': {'filter_enabled': False, 'filter_locked': False},
 'email': 'shreyandas2034@gmail.com'}

In [8]:
import datetime
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
# print(yesterday.timestamp()*1e3)
cdc_time = int(yesterday.timestamp()*1e3)

# Get All Songs Played in the last one day

In [9]:
token = spotify_authorisation.get_access_token()
history = spotify.get_listening_history(token, cdc_time)['items']

## EDA over recent history

### Number of songs played

In [10]:
len(history)

11

### Fetch details of a song

#### See available data for a song

In [11]:
history[0].keys()

dict_keys(['track', 'played_at', 'context'])

In [12]:
list(history[0]['track'].keys())

['album',
 'artists',
 'disc_number',
 'duration_ms',
 'explicit',
 'external_ids',
 'external_urls',
 'href',
 'id',
 'is_local',
 'name',
 'popularity',
 'preview_url',
 'track_number',
 'type',
 'uri']

In [13]:
history[0]['played_at']

'2024-08-05T04:29:40.078Z'

In [14]:
print(history[0]['context'])

None


# Transform

## Dimensions

### DIM_ARTIST

In [15]:
import pandas as pd
from sqlalchemy import Table, MetaData, Column, Integer, String
import sql_functions

In [16]:
# stacked list-comprehension
artist_list = [artist for artists in [play['track']['artists'] for play in history] for artist in artists]

In [17]:
df_artists = pd.DataFrame(artist_list)
df_artists = df_artists.drop(columns=['href','uri','external_urls'])
df_artists = df_artists.rename(columns={'id':'spotify_id','name':'artist_name','type':'artist_type'})
df_artists.drop_duplicates(inplace=True)
df_artists

Unnamed: 0,spotify_id,artist_name,artist_type
0,2FXC3k01G6Gw61bmprjgqS,Hozier,artist
1,4gzpq5DPGxSnKTe4SA8HAU,Coldplay,artist
2,2uFUBdaVGtyMqckSeCl0Qj,Ludovico Einaudi,artist
3,3e1kd1A7mKQmTs7fqX3GPI,Franco Feruglio,artist
4,11IgISNFApjwaLP6Y64tEz,Marco Decimo,artist
5,7eqjIlpFqiZ9iPfIz9LmgX,Gabriele Baffero,artist
6,21oSJUaw7ESADBnS8I93yK,Mauro Loguercio,artist
7,7FxEy78P0oIVEVxdaL9npy,Emily James,artist
8,0fauHpmSHwodVYIjTqOGHz,Yiruma,artist
9,0jrFMgW018F1XVnLtCXOKi,Fabrizio Paterlini,artist


#### Load (Type 1 SCD)

In [18]:
metadata = MetaData()

# Define the dimension table schema
dim_table = Table('DIM_ARTIST', metadata,
    Column('artist_wid', Integer, primary_key=True, autoincrement=True),
    Column('spotify_id', String(255), nullable=False, unique=True),
    Column('artist_name', String(255)),
    Column('artist_type', String(255))
)

In [19]:
sql_functions.save_to_sql(df_artists, dim_table, key_cols=['spotify_id'], wid=['artist_wid'], mode='upsert')

### DIM_ALBUM

In [20]:
album_list = [play['track']['album'] for play in history]

In [37]:
def get_url(x,height=640):
    for elem in x:
        if elem['height'] == height: return elem['url']
    return None

In [44]:
df_album = pd.DataFrame(album_list)
df_album['cover_image_url'] = df_album['images'].apply(get_url)
df_album = df_album.drop(columns=['href','uri','external_urls', 'images','artists','type'])
df_album = df_album.drop_duplicates()
df_album = df_album.rename(columns={'id':'album_id','name':'album_name'})
df_album

Unnamed: 0,album_type,album_id,album_name,release_date,release_date_precision,total_tracks,cover_image_url
0,album,3P4SQqMMgjqfqVxgLwtYRk,Unreal Unearth: Unheard,2024-03-22,day,20,https://i.scdn.co/image/ab67616d0000b2733e1b21...
1,album,6ZG5lRT77aJ3btmArcykra,Parachutes,2000-07-10,day,10,https://i.scdn.co/image/ab67616d0000b2739164ba...
2,album,5jRyFJtnadQ4XkSwL8mUzM,Eden Roc,1999-10-22,day,15,https://i.scdn.co/image/ab67616d0000b2739db983...
3,single,5eOSvWRX9rbKH0qoqSd7RN,Dreaming,2019-10-25,day,6,https://i.scdn.co/image/ab67616d0000b273a705ca...
4,album,1N6p8qmf8PJgYQdktQQYYh,First Love (The Original & the Very First Reco...,2001-11-27,day,15,https://i.scdn.co/image/ab67616d0000b2734bdb66...
5,single,6Q7oNLHJBqsVw5gjbw3wRV,Every Single Moment,2022-05-27,day,1,https://i.scdn.co/image/ab67616d0000b27395ae4d...
6,single,2BpRxtAl6IrHI2OQtd60PK,Idea 9,2023-05-26,day,1,https://i.scdn.co/image/ab67616d0000b2734e8fc9...
7,album,3YGh4CN0JocLK0SwvJgMWc,Piano,2017-09-29,day,21,https://i.scdn.co/image/ab67616d0000b273997e20...
9,album,7uPWpTHYqHEQTpZIlgELQK,Pinô,2014-01-31,day,18,https://i.scdn.co/image/ab67616d0000b27367e468...


#### Load (Type 1 SCD)

In [46]:
from sqlalchemy import MetaData, Table, Column, Integer, String, Text, Date

metadata = MetaData()

dim_album = Table(
    'DIM_ALBUM', metadata,
    Column('album_wid', Integer, primary_key=True, autoincrement=True),
    Column('album_id', String(62), nullable=False, unique=True),
    Column('album_type', String(20), nullable=False),
    Column('album_name', Text, nullable=False),
    Column('release_date', Date),
    Column('release_date_precision', String(20)),
    Column('total_tracks', Integer),
    Column('cover_image_url', Text)
)

In [47]:
sql_functions.save_to_sql(df_album, dim_album, key_cols = ['album_id'], wid = ['album_wid'], mode = 'upsert')

### DIM_SONG

In [48]:
history[0]['track'].keys()

dict_keys(['album', 'artists', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])

In [49]:
history[0]['track']

{'album': {'album_type': 'album',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2FXC3k01G6Gw61bmprjgqS'},
    'href': 'https://api.spotify.com/v1/artists/2FXC3k01G6Gw61bmprjgqS',
    'id': '2FXC3k01G6Gw61bmprjgqS',
    'name': 'Hozier',
    'type': 'artist',
    'uri': 'spotify:artist:2FXC3k01G6Gw61bmprjgqS'}],
  'external_urls': {'spotify': 'https://open.spotify.com/album/3P4SQqMMgjqfqVxgLwtYRk'},
  'href': 'https://api.spotify.com/v1/albums/3P4SQqMMgjqfqVxgLwtYRk',
  'id': '3P4SQqMMgjqfqVxgLwtYRk',
  'images': [{'height': 640,
    'url': 'https://i.scdn.co/image/ab67616d0000b2733e1b212546dac35443735241',
    'width': 640},
   {'height': 300,
    'url': 'https://i.scdn.co/image/ab67616d00001e023e1b212546dac35443735241',
    'width': 300},
   {'height': 64,
    'url': 'https://i.scdn.co/image/ab67616d000048513e1b212546dac35443735241',
    'width': 64}],
  'name': 'Unreal Unearth: Unheard',
  'release_date': '2024-03-22',
  'release_date_precision': 'day',

In [50]:
track_list = [play['track'] for play in history]

In [51]:
df_songs = pd.DataFrame(track_list)
df_songs = df_songs.explode('artists')
df_songs['artist_id'] = df_songs['artists'].apply(lambda x: x['id'])
df_songs['isrc_id'] = df_songs['external_ids'].apply(lambda x: x['isrc'])
df_songs = df_songs.drop(columns=['href','uri','external_urls','artists','external_ids', 'artist_id', 'popularity'])
df_songs['album'] = None
df_songs = df_songs.drop_duplicates()
df_songs = df_songs.rename(columns={'id':'song_id','album':'album_wid'})
df_songs

Unnamed: 0,album_wid,disc_number,duration_ms,explicit,song_id,is_local,name,preview_url,track_number,type,isrc_id
0,,1,251424,False,5Z0UnEtpLDQyYlWwgi8m9C,False,Too Sweet,https://p.scdn.co/mp3-preview/e526daac5ec6c459...,1,track,IEACJ2300335
1,,1,266773,False,3AJwUDP919kvQ9QcozQPxg,False,Yellow,https://p.scdn.co/mp3-preview/c0d9119dc69cae75...,5,track,GBAYE0000267
2,,1,89413,False,3cOGmT4WmEFfv9H5Pipcj5,False,Ultimi Fuochi II,https://p.scdn.co/mp3-preview/12437283c2e6f13d...,11,track,ITC490700072
3,,1,214087,False,01LEUIGH4Mup0hV3gHNgx0,False,Dreaming,https://p.scdn.co/mp3-preview/6d34e0fe3659d926...,4,track,QZ4BX1900004
4,,1,188786,False,2agBDIr9MYDUducQPC1sFU,False,River Flows In You,https://p.scdn.co/mp3-preview/3cd81468938f7c9d...,4,track,HKOA10616988
5,,1,153000,False,4klLHDMK96dfrIMwUaGBg6,False,Every Single Moment,https://p.scdn.co/mp3-preview/e561745546149b4e...,1,track,SE5ZV2227301
6,,1,137547,False,3TdROUy5JZz1OcBKMZjeux,False,Idea 9,https://p.scdn.co/mp3-preview/d1409e22384bc33a...,1,track,QM3DF2206675
7,,1,199493,False,5jvNtVu9fmqe7DMJRmCc3Q,False,Anthem,https://p.scdn.co/mp3-preview/eecd58db33a1475c...,13,track,SEAUA1700113
9,,1,165995,False,2Ai5UUXyxaulVbompgrzlo,False,Pinô,https://p.scdn.co/mp3-preview/a6fb39ce6ab6e6f4...,7,track,DEX261381907


#### Load (Type 1 SCD)

In [52]:
from sqlalchemy import BigInteger, Boolean, Text


metadata = MetaData()

dim_song = Table(
    'DIM_SONG', metadata,
    Column('song_wid', Integer, primary_key=True, autoincrement=True),
    Column('song_id', String(62), nullable=False, unique=True),
    Column('album_wid', Integer),
    Column('disc_number', Integer),
    Column('duration_ms', BigInteger, nullable=False),
    Column('explicit', Boolean),
    Column('is_local', Boolean),
    Column('name', Text, nullable=False),
    Column('preview_url', Text),
    Column('track_number', Integer),
    Column('type', String(50)),
    Column('isrc_id', String(100))
)

In [53]:
sql_functions.save_to_sql(df_songs, dim_song, key_cols = ['song_id'], wid = ['song_wid'], mode = 'upsert')