In [8]:
import json
from authorization import SpotifyAuth

In [9]:
with open('client-secrets-rnr.json','r+') as secrets_file:
    secrets = json.load(secrets_file)

client_id = secrets["client_id"]
client_secret = secrets["client_secret"]
redirect_url = secrets["redirect_url"]

In [10]:
spotify_authorisation = SpotifyAuth(client_id=client_id, client_secret=client_secret, redirect_url=redirect_url)
spotify_authorisation.set_access_token_from_file('access_token.json')

In [11]:
import datetime
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
# print(yesterday.timestamp()*1e3)
cdc_time = int(yesterday.timestamp()*1e3)

In [12]:
import spotify_functions as spotify

# Extract, Transform, Load (ETL)

## Dimensions

In [13]:
from dw_etl_scripts.dim import *

In [14]:
token = spotify_authorisation.get_access_token()
history = spotify.get_listening_history(token, cdc_time)['items']

### DIM_USER

#### Load (Type 0 SCD)

In [15]:
# dim_user_load(spotify, spotify_authorisation.get_access_token())

### DIM_ARTIST

#### Load (Type 1 SCD)

In [16]:
dim_artist_load(spotify, spotify_authorisation.get_access_token(), cdc_time)

### DIM_ALBUM

#### Load (Type 1 SCD)

In [17]:
dim_album_load(spotify, spotify_authorisation.get_access_token(), cdc_time)

### BRIDGE_ALBUM_ARTIST

In [18]:
from dw_etl_scripts.bridge import *

#### Load (Type 1 SCD)

In [19]:
bridge_album_artist_load(spotify, spotify_authorisation.get_access_token(), cdc_time)

### DIM_SONG

#### Load (Type 1 SCD)

In [20]:
dim_song_load(spotify, spotify_authorisation.get_access_token(), cdc_time)

### FACT_PLAY

In [165]:
dim_song = sql_functions.read_from_sql('DIM_SONG')
df_play = pd.DataFrame(history)
df_play['song_id'] = df_play['track'].apply(lambda x:x['id'])
df_play = df_play.merge(dim_song, 'left', on = 'song_id')
df_play['context_playlist'] = df_play['context'].apply(lambda x:x['href'] if x and 'href' in x.keys() else None)
df_play = df_play[['song_wid','played_at','context_playlist']]
df_play['played_at'] = pd.to_datetime(df_play['played_at'])
df_play['played_at'] = df_play['played_at'] + pd.Timedelta(hours=5, minutes=30)
df_play['played_at'] = df_play['played_at'].apply(lambda x:x.strftime('%Y-%m-%d %H:%M:%S.%f'))
df_play

Unnamed: 0,song_wid,played_at,context_playlist
0,89,2024-08-11 16:33:43.891000,https://api.spotify.com/v1/playlists/1TkXrIRsO...
1,90,2024-08-11 16:31:13.712000,https://api.spotify.com/v1/playlists/1TkXrIRsO...
2,90,2024-08-11 16:30:54.486000,https://api.spotify.com/v1/playlists/1TkXrIRsO...
3,50,2024-08-11 16:13:44.672000,
4,36,2024-08-11 12:12:53.419000,
5,87,2024-08-11 12:03:50.717000,


#### Load

In [128]:
sql_functions.save_to_sql(
    df_bridge_album_artist
    , bridge_album_artist
    , key_cols = ['album_wid', 'artist_wid']
    , wid = ['album_artist_map_wid']
    , mode = 'ignore'
)

In [166]:
from sqlalchemy import Table, Column, Integer, String, Text, DateTime

# Define the metadata
metadata = MetaData()

# Define the FACT_PLAY table
fact_play = Table(
    'FACT_PLAY', metadata,
    Column('play_wid', Integer, primary_key=True, autoincrement=True),
    Column('song_wid', Integer, nullable=False),
    Column('played_at', DateTime, nullable=False),
    Column('context_playlist', Text, nullable=False)
)

In [169]:
sql_functions.save_to_sql(df_play, fact_play, key_cols = ['song_wid','played_at'], wid = ['play_wid'], mode = 'append')