In [None]:
#Environment setup


#Libraries to extract music data
!pip install spotipy -q
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth

#Libraries to deal with special data types and automatization process
import time
from IPython.core.display import clear_output
from collections import defaultdict

#Libraries to deal with text from wikipedia and extract music genre words
import pandas as pd 
import numpy as np 
from statistics import mode


!pip install metadata-filter  
from metadata_filter import *
rules = (
    REMASTERED_FILTER_RULES
    + SUFFIX_FILTER_RULES
    + VERSION_FILTER_RULES
    + ORIGIN_FILTER_RULES
    + FEATURE_FILTER_RULES
    + CLEAN_EXPLICIT_FILTER_RULES
    + LIVE_FILTER_RULES
    + TRIM_WHITESPACE_FILTER_RULES
)
#Metadata filtering procedure from https://github.com/YodaEmbedding/metadata-filter

from tqdm import notebook



redirectUri = 'http://localhost:8888/callback/'
clientId = '53feb49d834447c49204942bb4c6e700'
clientSecret = '89c6b517065145a1af6cd3fd0b6b8ed1'
clientUsername = 'shlederman'
scope = 'user-library-modify user-library-read playlist-read-private'
 
%env SPOTIPY_CLIENT_ID=$clientId
%env SPOTIPY_CLIENT_SECRET=$clientSecret
%env SPOTIPY_REDIRECT_URI=$redirectUri
%env SPOTIPY_CLIENT_USERNAME=$clientUsername


#For Spotipy calls which require OAuth
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(open_browser=False, scope=scope))
 
#For Spotipy calls which require Client Credentials
auth_manager_cc = SpotifyClientCredentials()
spt = spotipy.Spotify(auth_manager=auth_manager_cc)


In [4]:
# Create empty data structures for storing values to be captured

# Create empty dictionaries
track_features = {}
audio_features = {}
album_features = {}
artist_features = {}

#Create empty lists

track_ids=[]
artist_ids=[]
album_ids=[]


# List of fields to be used in the dataframe

library_list = ["artist", "album", "track_name", "track_id",
                "danceability", "energy", "key", "loudness", "mode", "speechiness",
                "instrumentalness", "liveness", "valence", "tempo", "duration_ms" ,"time_signature"]


# Create empty dataframe

library_df = pd.DataFrame(columns = library_list)

In [None]:

###Get track IDs

#If looking to access a playlist of a specific user, use results = sp.user_playlist_tracks(creator, playlist_id)["items"]

#If looking to access a playlist of a specific user, use results = spt.playlist_tracks("playlist_id")["items"] 

#If looking to access all Saved Tracks for client name, use results = sp.current_user_saved_tracks()


#Since the playlist_tracks method only gets up to 100 tracks at a time, keep making calls until
#no more 'next' values appear (meaning that it is the last set of tracks)
#The method 'extend' is used rather than 'append' so that each track is added to the list individuallly
results = spt.playlist_tracks("4NSMn9ypXFNjueSCmjzPUx",market="US")
tracks = results['items']
while results['next']:
  results = sp.next(results)
  tracks.extend(results['items'])

#Get the number of tracks in the list
total_songs=len(tracks)


  

In [None]:
'''

for i in notebook.tqdm(range(0,total_songs,50)):
  library_tracks = sp.current_user_saved_tracks(limit=50,offset=i)
  for library_track in library_tracks['items']:
    track_ids.append(library_track['track']['id'])
    artist_ids.append(library_track['track']['artists'][0]['id'])
    album_ids.append(library_track['track']['album']['id'])

'''


#Loop through all the tracks to get the track_ids, artist_ids, and album_ids.
#Put the set of IDs each in their own list
for i in notebook.tqdm(range(0,total_songs)):
  track_ids.append(tracks[i]['track']['id'])
  artist_ids.append(tracks[i]['track']['artists'][0]['id'])
  album_ids.append(tracks[i]['track']['album']['id'])


#Confirm the number of items in the lists is what is expected
print("tracks = ",len(track_ids))
print("artists = ",len(artist_ids))
print("albums = ",len(album_ids))



  

In [None]:
library_df = pd.DataFrame(columns = library_list)
    
# Loop through all tracks, extract features and append the features to the df

for track in notebook.tqdm(track_ids):
  #Get the next track
    library_track = spt.track(track)

  #Get metadata for the tracks
    track_features["artist"] = library_track["artists"][0]["name"]
    track_features["album"] = library_track["album"]["name"]
    track_features["track_name"] = library_track["name"]
    track_features["track_id"] = library_track["id"]

  #Get audio features for the tracks.  If any have a value of 'None', skip to the next one
    audio_features = spt.audio_features(library_track["id"])
    for feature in library_list[4:]:
        if audio_features[0] is None:
          continue
        else:
          track_features[feature] = audio_features[0][feature]

  #Get artist specific information
    artist_object = spt.artist(library_track["artists"][0]["id"])
    artist_features["artist_id"] = artist_object["id"]
    artist_features["artist_name"] = artist_object["name"]
    artist_features["artist_spotify_popularity"] = artist_object["popularity"]
    artist_features["artist_spotify_genres"] = artist_object["genres"]

  #Get album specific information
    album_object = spt.album(library_track["album"]["id"])
    album_features["album_id"] = album_object["id"]
    album_features["album_name"] = album_object["name"]
    album_features["albun_spotify_popularity"] = album_object["popularity"]
    album_features["album_spotify_genres"] = album_object["genres"]
    album_features["album_release_date"] = album_object["release_date"]
    album_features["album_release_date_precision"] = album_object["release_date_precision"]      

  #Create a dataframe for the individual track information
  #Concatenate the track dataframe to the library dataframe  
    track_df = pd.DataFrame(track_features, index = [0])
    library_df = pd.concat([library_df, track_df], ignore_index = True)

#Create a csv file with the dataframe
library_df.to_csv('library2.csv',index=False)

In [None]:
album_features

In [None]:
'''
#To set up where the user can decide which type of search to perform, use functions
#which will be called based on what is needed.  Example below.

def get_playlist_tracks(username,playlist_id):
    results = sp.user_playlist_tracks(username,playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

'''

In [5]:
artistName="Crosby"
trackName="Southern Cross"

#q="track:"southern cross" AND artist:Crosby"
#q="track:"+trackName+" AND artist:"+artistName
q="track:"+trackName  # AND artist:"+artistName
songsearch = sp.search(q, limit=50, offset=0, type='track', market="US")
songsearchitems = songsearch['tracks']['items']
numSongs=len(songsearchitems)
numSongs

Go to the following URL: https://accounts.spotify.com/authorize?client_id=53feb49d834447c49204942bb4c6e700&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%3A8888%2Fcallback%2F&scope=playlist-read-private+user-library-modify+user-library-read
Enter the URL you were redirected to: http://localhost:8888/callback/?code=AQDzTc13Mrhx5aKzAINzM4CG4AFGcNm_YCgvSTFnJjQ15Y3jaMVyQrC1qnd9UbbAmWAgDEcVOCn6AXTdV1q4fwWgTD-rBKwNQI1NCyGoR8Ctc2g5NWG9ErXLVqOhFyBCRJY1B4zJP0IMMwCC-8HxeMKzeogqsMP1MuixcwWwV4fsSZMYed6FNW6QBtFhMhDM2iYTRn91Vt_y8cKLgyMZW8HquoIwYVph2NgxyEg7JPWr7fIgTxMkYC6fdRCGvJW_xHM


50

In [None]:
for x in range(0,numSongs):
  if songsearchitems[x]['album']['album_type'] == 'album':
    print(songsearchitems[x]['album']['name'] + "   " + songsearchitems[x]['album']['artists'][0]['name'] )


In [None]:
for x in range(0,numSongs):
  track = apply_filters(rules, songsearchitems[x]['album']['name'])
  print(track)
