In [1]:
# imports
from connection import create_spotify_oauth, get_audio_features, get_token, get_tracks, \
    tracks_to_df, audio_features_to_df, get_artist_info, artist_info_to_df, get_album_info, \
        album_info_to_df, get_similar_artists

from collections import Counter
from itertools import chain

import json

import pandas as pd
import numpy as np

In [2]:
# token and authentication variables

sp_ouath = create_spotify_oauth()
token_info = get_token()

In [3]:
# getting playlist items

playlist_source = '7jfRuO7rW49RJnafrxsJeu' # playlist id for playlist

# playlists id's to play around with: 
### 5a4unKpRA7mYi2PAgKTuNW
### 7jfRuO7rW49RJnafrxsJeu

tracks = get_tracks(playlist_source, token_info) # parses Spotipy.playlist_items() to fetch playlist items
tracks_df = tracks_to_df(tracks) # extracts relevant features from messy list into a df

tracks_df.drop_duplicates(subset=['track_id'], inplace=True) # drops duplicate songs based on track ID, this is useful
# if a song gets added from single and album release; or in the case of deluxe versions of albums, etc.


In [4]:
# getting tracks' features

tracks_features = []

for track in tracks_df['track_id']:
    track_af = get_audio_features(track, token_info) # parses Spotipy.audio_features() to fetch track features
    tracks_features.extend(track_af) # extends each track's features into the empty list init'd before
    
tracks_features_df = audio_features_to_df(tracks_features) # converts nested data from API call to simple df


In [43]:
# getting tracks' artists

tracks_artists = []

for artist_id in tracks_df['artist_id']:
    for i in artist_id: # since one track can have multiple artists, loop over the list of artists to pull data for each one
        artist_info = get_artist_info(i, token_info)
        tracks_artists.append(artist_info)

artists_df = artist_info_to_df(tracks_artists) # converts nested data to df

In [6]:
# getting tracks' albums

tracks_albums = []

for album_id in tracks_df['album_id']:
    album_info = get_album_info(album_id, token_info)
    tracks_albums.append(album_info)

albums_df = album_info_to_df(tracks_albums) # converts nested data to df

In [46]:
# transforming artist genres

# lambda converts the columns into strings and strips of the exterior [], then splits the string into a list based on ','
artists_df['artist_genres'] = artists_df['artist_genres'].apply(lambda x: str(x).strip('[]').split(','))

# unnest the list of genres into seperate, rest of features are duplicated
artists_df = artists_df.explode('artist_genres')
# artists_df.reset_index(inplace=True)

In [49]:
artists_missing_genres = artists_df.loc[artists_df['artist_genres'] == ""].copy()
artists_missing_genres['genres_list'] = None

for index, artist in artists_missing_genres.iterrows():
    artist_id = artist['artist_id']
    similar_artists = get_similar_artists(artist_id, token_info)
    
    genres_list = [artist['genres'] for artist in similar_artists if artist['genres']]
    
    if genres_list:
        flattened_genres = list(chain.from_iterable(genres_list))
        artists_missing_genres.at[index, 'genres_list'] = flattened_genres
        
        print(Counter(flattened_genres))

Counter({'bedroom r&b': 1, 'modern indie pop': 1, 'pov: indie': 1})
Counter({'bedroom r&b': 11, 'korean dream pop': 2, 'korean city pop': 2, 'korean indie rock': 1, 'modern dream pop': 1, 'modern indie pop': 1})


In [None]:
## REUSABLE TESTING CODE 

# with open('all_tracks_albums.json', 'w') as outfile:
#     json.dump(all_tracks_albums, outfile, indent=4)

