# Spotify Recommendation - Finding your music taste!

## Import necessary libraries

In [1]:
import pandas as pd
from requests_oauthlib import OAuth2Session
from oauthlib.oauth2 import BackendApplicationClient
import requests
import time
from pandas import json_normalize

## Using Credentials to access OAuth for spotify

In [2]:
# Your credentials
client_id = '0af53b02c90245839d0f6d6f3bf45fb1'
client_secret = '89783cd95eac43eebbfc7f2e2a885dbb'

# Create a session
client = BackendApplicationClient(client_id=client_id)
oauth = OAuth2Session(client=client)

# Get token for the session
token = oauth.fetch_token(
    token_url='https://accounts.spotify.com/api/token',
    client_id=client_id,
    client_secret=client_secret,
    include_client_id=True
)

## Ideas for model

Before we start, we need to consider a couple of things when recommending songs:

- Is the track available for specified country?

- Did the user dislike similar tracks or this specified track?

Features for the model:

- Maybe consider getting input from users of language/country of song you want?

## Initializing Token and Getting API Request

In [3]:
# The headers to provide the access token for authentication
headers = {
    'Authorization': f'Bearer {token["access_token"]}',     # OAuth 2.0 Bearer token for authorization
    'Content-Type': 'application/json'                      # Indicates the media type of the resource
}

In [4]:
token_expiry = token['expires_at'] # gives time of expiry in total seconds
#token_expiry_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(token_expiry)) # converting to readable time format

we need to check if time.time() is going to exceed the token_expiry_date. If so then renew the token!

### Function `refresh_token_if_needed` 

In [71]:
# Assuming that client_id and client_secret are defined elsewhere in the global scope
global client_id, client_secret

def refresh_token_if_needed():
    """
        This function refreshes the token for spotify API, the function does take in any parameters and does not return anything. 
        It directly changes the `token` and `token_expiry`assuming it is a global variable.
        The function requires `client_id` and `client_secret` predefined global variables.
    Parameters:
        None
    
    Returns:
        None
    """
    global token, token_expiry  # Accessing the global variables token and token_expiry

    # Ensure we have current values for client_id and client_secret
    if 'client_id' not in globals() or 'client_secret' not in globals():
        raise ValueError("client_id and client_secret must be set in the global scope before calling this function.")

    # Check if the current time is close to or past the token's expiry time
    if not token_expiry or time.time() > token_expiry - 60:
        client = BackendApplicationClient(client_id=client_id)
        oauth = OAuth2Session(client=client)

        # Fetch a new token and update the global token variable
        token = oauth.fetch_token(
            token_url='https://accounts.spotify.com/api/token',
            client_id=client_id,
            client_secret=client_secret,
            include_client_id=True
        )

        # Calculate and update the global token_expiry variable
        token_expiry = time.time() + token['expires_in']

## Finding songs based on user chosen track

In [6]:
song_title = "Self Care"
artist_name = "Mac Miller"
# check if search is case sensitive?

function that search user defined song.

The search is not case sensitive.

Ex: artist_name = mac miller, song_title='whats the use'

It will still give the song. Original name is "Mac Miller", "What's the use?"

### Function `search_song`

In [7]:
def search_song(artist_name="", song_title=""):
    """
    Searches for a song on Spotify based on the given artist name and song title and returns the most relevant result.

    Parameters:
        artist_name (str): The name of the artist.
        song_title (str): The title of the song.

    Returns:
        DataFrame/None: A pandas DataFrame containing the song information if found, otherwise None.
    """
    if not artist_name and not song_title:
        raise ValueError("Both artist_name and song_title cannot be empty")

    url = "https://api.spotify.com/v1/search"
    refresh_token_if_needed()

    # Ensure token is defined, e.g., globally or within an enclosing scope.
    global token
    headers = {
        'Authorization': f'Bearer {token["access_token"]}',
        'Content-Type': 'application/json'
    }

    search_query = f'track:{song_title} artist:{artist_name}'.strip()
    search_params = {
        'q': search_query if search_query else 'year:0000',  # A default query that returns minimal results
        'type': 'track',  # specifies that you are searching for tracks
        'limit': 1  # limits the response to the top 1 most relevant result
    }

    response = requests.get(url, headers=headers, params=search_params).json()

    # Error handling to check for API errors
    if 'error' in response:
        raise Exception(f"Spotify API error: {response['error'].get('message')}")

    # Check if any items were found; otherwise return None
    if not response.get('tracks', {}).get('items', []):
        return None

    # Normalize and return the first item as a DataFrame
    return json_normalize(response['tracks']['items'][0])

#### Example of `search_song`

In [8]:
df = search_song('mac miller', 'whats the use')
df

Unnamed: 0,artists,available_markets,disc_number,duration_ms,explicit,href,id,is_local,name,popularity,...,album.id,album.images,album.name,album.release_date,album.release_date_precision,album.total_tracks,album.type,album.uri,external_ids.isrc,external_urls.spotify
0,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,288640,True,https://api.spotify.com/v1/tracks/2dgrYdgguVZK...,2dgrYdgguVZKeCsrVb9XEs,False,What's the Use?,69,...,5wtE5aLX5r7jOosmPhJhhk,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Swimming,2018-08-03,day,13,album,spotify:album:5wtE5aLX5r7jOosmPhJhhk,USWB11801213,https://open.spotify.com/track/2dgrYdgguVZKeCs...


## Recommending songs based on user specified playlist

removed items in will be shown like `this`

`preview_url`: URL of a 30-second preview MP3 file of the track. Not all tracks have previews available.

available_markets: List of ISO 3166-1 alpha-2 country codes indicating the markets in which the track is available.

explicit: Boolean flag indicating whether the track has explicit content or not; true means there is explicit content.

`type`: The object type, which is "track" for track objects.

`episode`: Boolean flag indicating if the object is an episode of a show; typically false for music tracks.

`track`: Boolean flag indicating if the object is a track (true) or not. This can be used to differentiate between tracks and other types of content like episodes.

album: Information about the album on which the track appears. This would usually be a nested object containing details like album name, album type, available markets for the album, album release date, etc.

artists: List of artist objects involved with the track. Each artist object typically contains the artist's name, Spotify ID, and URL to the artist's Spotify page.

`disc_number`: The disc number (in the album) on which the track appears.

`track_number`: The number of the track on its disc.

duration_ms: The track length in milliseconds.

`external_ids`: External identifiers for the track, such as the ISRC code.

`external_urls`: An external URL object pointing to more information about the track on Spotify's official website.

`href`: A link to the Spotify Web API endpoint providing full details of the track.

id: The Spotify ID for the track.

name: The name of the track.

popularity: A measure from 0 to 100, calculated based on the number of plays the track has had and how recent those plays are.

`uri`: The Spotify URI for the track.

is_local: A boolean indicating whether the track is from a local file.


### Function to extract featured artist

In [59]:
def featured_artists(artists: list) -> list:
    """
    Extracts the names of featured artists from a list of dictionaries (must contain the 'name' keyword for each dictionary), skipping the first artist.

    Parameters:
        artists (list): A list of dictionaries, each containing details of an artist.

    Returns:
        list: A list of names of the featured artists.
    """
    # Skip the first artist (the main artist)
    artists = artists[1:]

    # Initialize an empty list to store names of featured artists
    featured_artists = []

    # Loop through the remaining artists and extract their names
    for artist in artists:
        featured_artists.append(artist['name'])

    return featured_artists


#### Example to how function `featured_artists` is used

In [68]:
artists_test= [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3hvinNZRzTLoREmqFiKr1b'},
  'href': 'https://api.spotify.com/v1/artists/3hvinNZRzTLoREmqFiKr1b',
  'id': '3hvinNZRzTLoREmqFiKr1b',
  'name': 'test',
  'type': 'artist',
  'uri': 'spotify:artist:3hvinNZRzTLoREmqFiKr1b'},
 {'external_urls': {'spotify': 'https://open.spotify.com/artist/0tkHE1pQ5ZCgQb8WZ0ba79'},
  'href': 'https://api.spotify.com/v1/artists/0tkHE1pQ5ZCgQb8WZ0ba79',
  'id': '0tkHE1pQ5ZCgQb8WZ0ba79',
  'name': 'test2',
  'type': 'artist',
  'uri': 'spotify:artist:0tkHE1pQ5ZCgQb8WZ0ba79'},
  
 {'external_urls': {'spotify': 'https://open.spotify.com/artist/0tkHE1pQ5ZCgQb8WZ0ba79'},
  'href': 'https://api.spotify.com/v1/artists/0tkHE1pQ5ZCgQb8WZ0ba79',
  'id': '0tkHE1pQ5ZCgQb8WZ0ba79',
  'name': 'test3',
  'type': 'artist',
  'uri': 'spotify:artist:0tkHE1pQ5ZCgQb8WZ0ba79'},
  
 {'external_urls': {'spotify': 'https://open.spotify.com/artist/0tkHE1pQ5ZCgQb8WZ0ba79'},
  'href': 'https://api.spotify.com/v1/artists/0tkHE1pQ5ZCgQb8WZ0ba79',
  'id': '0tkHE1pQ5ZCgQb8WZ0ba79',
  'name': 'test4',
  'type': 'artist',
  'uri': 'spotify:artist:0tkHE1pQ5ZCgQb8WZ0ba79'}]
featured_artists(artists_test)

['test2', 'test3', 'test4']

### Function `get_playlist`

Data frame will have these columns:

`available_markets`	= country where the songs are available

`explicit`	= if the song contains nsfw languages

`track_id`	= unique id of the song

`song_name` = song name

`popularity` = 	A measure from 0 to 100, calculated based on the number of plays the track has had and how recent those plays are

`is_local` = A boolean indicating whether the track is from a local file

`duration_sec` = total duration of the song in seconds

`album_type` = 	*Album* represetns standard album with multiple tracks. *Single* is used for shorter releases with one or few tracks only. *Compliation* contains tracks by multiple artists

`release_date` = Released date of the song

`album_name` = Name of album

`artist_name` = Name of artist that created the album

`featured_artist(s)` = Name(s) of artist that helped create the album


In [69]:
def get_playlist(playlist_url: str) -> pd.DataFrame:
    """
    Fetches all tracks from a Spotify playlist and returns them as a pandas DataFrame.
    
    Parameters:
        playlist_url (str): The actual Spotify playlist URL.
    
    Returns:
        pd.DataFrame: DataFrame containing simplified track's data.
    """
    if not playlist_url:
        raise ValueError("Playlist URL cannot be empty")

    playlist_id = (playlist_url.split('/')[-1]).split('?')[0]
    refresh_token_if_needed()

    tracks = []
    url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'

    while url:
        response = requests.get(url, headers={
            'Authorization': f'Bearer {token["access_token"]}',
            'Content-Type': 'application/json'
        }).json()
        
        if 'error' in response:
            print("Failed to fetch data:", response.get('error', {}))
            break
        if not response.get('items'):
            print("No items found in response.")
            break

        tracks.extend([item['track'] for item in response['items'] if item.get('track')])
        url = response.get('next')

    if not tracks:
        return pd.DataFrame()  # Return an empty DataFrame if no tracks were found

    output = pd.DataFrame(tracks)

    # Data processing
    output['duration_sec'] = (output['duration_ms'] / 1000).round().astype(int)
    output['album_type'] = output['album'].apply(lambda row: row['album_type'])
    output['release_date'] = output['album'].apply(lambda row: row['release_date'])
    output['album_name'] = output['album'].apply(lambda row: row['name'])
    output['artist_name'] = output['artists'].apply(lambda row: row[0]['name'])
    output['featured_artist(s)'] = output['artists'].apply(lambda row: featured_artists(row))

    output = output.rename(columns={'name': 'song_name', 'id': 'track_id'})
    output = output[output['type']=='track']
    output = output.drop(columns=["preview_url", "episode", "disc_number", "track_number", "uri", "href", "external_ids", "external_urls", 'album','artists', 'duration_ms','type','track'])
    return output

#### Example of `get_playlist`

In [70]:
playlist_url = "https://open.spotify.com/playlist/5Al2k5OqoDoE2SRfKl9I45?si=9f4700ba812b4eb9"
df = get_playlist(playlist_url)
df

Unnamed: 0,available_markets,explicit,track_id,song_name,popularity,is_local,duration_sec,album_type,release_date,album_name,artist_name,featured_artist(s)
0,[],False,06eq0M1eZRcIIRfx6d00nu,METEOR,0,False,197,album,2019-11-29,Boyhood,CHANGMO,[]
1,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,2Mwy8p7paYnL2BvnFrQVxK,Free My Mind,15,False,256,single,2018-10-16,BETWEEN US,Lee Moon Sae,[Gaeko]
2,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,7rHxGL9g6jqybXKw3K2478,What's Wrong (Feat. YUNHWAY),28,False,197,album,2019-12-02,Fatal Album Ⅲ,GIRIBOY,[YUNHWAY]
3,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,1iIhGHzzrzqQfuNkFI2qAn,Any song,65,False,227,single,2020-01-13,Any song,ZICO,[]
4,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,1kQcX6cLpC57HQqqSRrHi4,RING RING,33,False,207,single,2017-06-05,H.A.L.F (Have.A.Little.Fun),Sik-K,[Gaeko]
...,...,...,...,...,...,...,...,...,...,...,...,...
268,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,0mVvkepe2sQUa0j8NWukaZ,"Flower (Feat. Jay Park, Woo, GIRIBOY)",42,False,273,album,2020-04-02,PEOPLE,CODE KUNST,"[Jay Park, Woo, GIRIBOY]"
269,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",True,20e0EjOZrM0AEMSepdh2fU,love me (feat. Hoody),39,False,201,album,2020-01-06,"u n u, Pt. 1",nafla,[Hoody]
270,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,5MmDbUVKsC7GyFx03PYqUY,Lee Rohan (Prod. by Padi),31,False,280,compilation,2018-04-14,School Rapper 2 Final,Rohann,"[ELO, Jessi]"
271,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,6aGFlx1shAP1KidFY4AiT8,Skip And Kiss,35,False,191,single,2018-07-11,youth.wit.purpose,Sik-K,[]


## Audio Features

### Function `get_audio_features`

**Description of columns:**

`Danceability`:

Measures how suitable a track is for dancing based on tempo, rhythm stability, beat strength, and overall regularity. Ranges from 0.0 (least danceable) to 1.0 (most danceable).

`Energy`:

A measure from 0.0 to 1.0 representing a perceptual measure of intensity and activity. Higher values indicate more energetic tracks.

`Key`:

Indicates the key the track is in, using standard Pitch Class notation. Values range from 0 (C) to 11 (B), with -1 indicating no key detected.

`Loudness`:

The overall loudness of the track in decibels (dB). Typically ranges between -60 dB and 0 dB.

`Mode`:

The modality of a track, where 1 represents a major scale and 0 represents a minor scale.

`Speechiness`:

Measures the presence of spoken words in a track. A value closer to 1.0 indicates more spoken word content (e.g., talk shows, audiobooks).

`Acousticness`:

A confidence measure from 0.0 to 1.0 indicating whether the track is acoustic. Higher values represent more acoustic tracks.

`Instrumentalness`:

Indicates whether a track contains no vocals. Values close to 1.0 suggest the track is likely instrumental.

`Liveness`:

Detects the presence of an audience in the recording. Higher values suggest the recording is live. Value ranges from 0 to 1

`Valence`:

A measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track. Higher values suggest a more positive, happier track.

`Tempo`:

The overall estimated tempo of the track in beats per minute (BPM).

`Type`:

The type of the object, usually "audio_features".

`Duration_ms`:

The duration of the track in milliseconds.

`time_signature`:

An estimated time signature of the track, indicating how many beats are in each bar (measure). Values typically range from 3 to 7, indicating different musical meters.

In [76]:
def get_audio_features(track_id):
    """
        Fetch audio features for a given track using Spotify's Web API.

    Parameters:
        track_id of the song
    
    Returns:
        pd.DataFrame: DataFrame containing simplified track's audio data.
    """
    url = f'https://api.spotify.com/v1/audio-features/{track_id}'
    headers = {
        'Authorization': f'Bearer {token['access_token']}'
    }
    response = requests.get(url, headers = headers)
    if response.status_code != 200:
        print(f"Failed to fetch data: {response.json()}")
        print(f"Used token: {token}")
        return None
    df_audio_feat = json_normalize(response.json())
    df_audio_feat = df_audio_feat.drop(columns=['uri','track_href','analysis_url','duration_ms'])
    df_audio_feat = df_audio_feat[['id'] + [col for col in df_audio_feat if col != 'id']]
    return df_audio_feat

#### Example `get_audio_features`

In [77]:
get_audio_features("06eq0M1eZRcIIRfx6d00nu")

Unnamed: 0,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,time_signature
0,06eq0M1eZRcIIRfx6d00nu,0.728,0.642,8,-4.989,1,0.0727,0.316,0,0.0834,0.755,185.797,audio_features,4


### Function `get_audio_analysis`

In [79]:
def get_audio_analysis(track_id):
    """Fetch audio features for a given track using Spotify's Web API."""
    url = f'https://api.spotify.com/v1/audio-analysis/{track_id}'
    headers = {
        'Authorization': f'Bearer {token['access_token']}'
    }
    response = requests.get(url, headers = headers)
    if response.status_code != 200:
        print(f"Failed to fetch data: {response.json()}")
        print(f"Used token: {token}")
        return None
    df_audio_analysis = json_normalize(response.json())
    return df_audio_analysis

Currently building get_audio_analysis functions. Need to look into all columns and extract important data featutures about the song. The features should describe the music as much as possible.

In [85]:
test = get_audio_analysis("06eq0M1eZRcIIRfx6d00nu")
test.head(1)

Unnamed: 0,bars,beats,sections,segments,tatums,meta.analyzer_version,meta.platform,meta.detailed_status,meta.status_code,meta.timestamp,...,track.mode,track.mode_confidence,track.codestring,track.code_version,track.echoprintstring,track.echoprint_version,track.synchstring,track.synch_version,track.rhythmstring,track.rhythm_version
0,"[{'start': 0.66713, 'duration': 1.28984, 'conf...","[{'start': 0.34496, 'duration': 0.32217, 'conf...","[{'start': 0.0, 'duration': 10.0273, 'confiden...","[{'start': 0.0, 'duration': 0.32331, 'confiden...","[{'start': 0.34496, 'duration': 0.16108, 'conf...",4.0.0,Linux,OK,0,1575289972,...,1,0.347,eJxVmolxZDcMRFOZEHgf-Sfm95qjlVxlq5Ycfh4g0GgAvH...,3.15,eJztnQuW3DpyRLdE4o_l4Lv_JfhG1ljUa0-hLffrkezxnD...,4.12,eJxVmIl17TgMQ1txCaZ29d_YEBf0m5-TxLFlieICkpBbe-...,1.0,eJxtW4lxI7sOTEUhkOCdf2IffZCS9_2qddkrzQwvoNFoYG...,1.0


In [84]:
test['bars'][0]

[{'start': 0.66713, 'duration': 1.28984, 'confidence': 0.826},
 {'start': 1.95697, 'duration': 1.29059, 'confidence': 0.303},
 {'start': 3.24756, 'duration': 1.29135, 'confidence': 0.801},
 {'start': 4.53891, 'duration': 1.29017, 'confidence': 0.143},
 {'start': 5.82908, 'duration': 1.29097, 'confidence': 0.825},
 {'start': 7.12005, 'duration': 1.28941, 'confidence': 0.451},
 {'start': 8.40946, 'duration': 0.96974, 'confidence': 0.751},
 {'start': 9.3792, 'duration': 1.29532, 'confidence': 0.29},
 {'start': 10.67452, 'duration': 1.29183, 'confidence': 0.927},
 {'start': 11.96635, 'duration': 1.29264, 'confidence': 0.62},
 {'start': 13.25899, 'duration': 1.28973, 'confidence': 0.674},
 {'start': 14.54871, 'duration': 1.29117, 'confidence': 0.497},
 {'start': 15.83989, 'duration': 1.2915, 'confidence': 0.807},
 {'start': 17.13139, 'duration': 1.29157, 'confidence': 0.855},
 {'start': 18.42296, 'duration': 1.28962, 'confidence': 0.762},
 {'start': 19.71258, 'duration': 1.28764, 'confidenc

In [28]:
# Assuming token is your dictionary containing the access token and other details
access_token = token['access_token']  # Extract the access token from the dictionary

headers = {
    'Authorization': f'Bearer {access_token}'  # Use the actual access token string here
}

track_id = "06eq0M1eZRcIIRfx6d00nu"
url = f'https://api.spotify.com/v1/audio-features/{track_id}'
response = requests.get(url, headers=headers)  # Ensure headers are correctly passed
print(response)
print(response.json())  # This will print the json response


<Response [200]>
{'danceability': 0.728, 'energy': 0.642, 'key': 8, 'loudness': -4.989, 'mode': 1, 'speechiness': 0.0727, 'acousticness': 0.316, 'instrumentalness': 0, 'liveness': 0.0834, 'valence': 0.755, 'tempo': 185.797, 'type': 'audio_features', 'id': '06eq0M1eZRcIIRfx6d00nu', 'uri': 'spotify:track:06eq0M1eZRcIIRfx6d00nu', 'track_href': 'https://api.spotify.com/v1/tracks/06eq0M1eZRcIIRfx6d00nu', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/06eq0M1eZRcIIRfx6d00nu', 'duration_ms': 197097, 'time_signature': 4}


In [13]:
from datetime import datetime

# Assuming token['expires_at'] is a Unix timestamp
expires_at_timestamp = token['expires_at']
expires_at_datetime = datetime.fromtimestamp(expires_at_timestamp)

print("Expiration datetime:", expires_at_datetime)


Expiration datetime: 2024-05-01 13:52:26.062016


## Next Steps

Need to find suitable model for building the reccomender

Spotify already offers a recommender via API so idk what to do. Maybe we can use that recommender to find a suitable features for the API?