# Find Genres for Tracks

This notebook allows adding genres to individual tracks.

In [5]:
# Imports here
import requests
import pandas as pd
import ast

In [None]:
API_KEY = ''  # Add Last.fm API key

About `get_track_genre()`: <br>
Using the Deezer API (Documentation: https://developers.deezer.com/api).
The rate limit is (I believe) 5 requests per second (and 50 per second for 
authenticated accounts)
Given that most APIs do not provide genres for individual tracks this function first
tries extracting the genres from the album and if not found there from the artist

About `get_track_tags()`: <br>
Using the Last.fm API (Documentation: https://www.last.fm/api).
Rate limits are enforced automatically. I Could not find a specific number.
According to deepseek r1 it is 5 requests per second (free tier).

In [None]:
# Functions here
def get_track_genre(artist_name, track_name):
    '''
    Fetches the genre of a track using the Deezer API
    (https://developers.deezer.com/api).

    Args:
        artist_name (str): The name of the artist.
        track_name (str): The name of the track.

    Returns:
        list: All genres associated with the track.
        Returns [] if the genre cannot be determined.

    Example:
        >>> get_track_genre('Ed Sheeran', 'Shape of you')
        ['Pop', 'Singer & Songwriter']
        >>> get_track_genre('Unknown Artist', 'Unknown Track')
        []
    '''
    # Search for the track and order results by ranking
    search_url = f'https://api.deezer.com/search/track?q=artist:\'{artist_name}\' track:\'{track_name}\'order=RANKING'
    search_response = requests.get(search_url).json()

    if not search_response.get('data'):
        return []

    # Fetch best fitting result
    track_data = search_response['data'][0]
    album_id = track_data['album']['id']
    artist_id = track_data['artist']['id']

    # Fetch genre from album
    album_url = f'https://api.deezer.com/album/{album_id}'
    album_response = requests.get(album_url).json()
    if album_response.get('genres') and album_response['genres']['data']:
        return [album_response['genres']['data'][i]['name']
                for i in range(len(album_response['genres']['data']))]
        

    # Fetch genre from artist (if album genre is missing)
    artist_url = f'https://api.deezer.com/artist/{artist_id}'
    artist_response = requests.get(artist_url).json()
    if artist_response.get('genres') and artist_response['genres']['data']:
        return [artist_response['genres']['data'][i]['name']
                for i in range(len(album_response['genres']['data']))]

    return []

def get_track_tags(artist_name, track_name, api_key=API_KEY):
    '''
    Fetches the tags of a track using the Last.fm API.

    Args:
        artist_name (str): The name of the artist.
        track_name (str): The name of the track.

    Returns:
        list: All tags associated with the track.
        Returns [] if the tags cannot be determined.
    
    Example:
        >>> get_track_tags('Ed Sheeran', 'Shape of you')
        ['pop', '2017', 'dancehall', 'dance', '2010s']
        >>> get_track_genre('Unknown Artist', 'Unknown Track')
        []
    '''
    try:
        track_info_url = f'http://ws.audioscrobbler.com/2.0/?method=track.getInfo&api_key={api_key}&artist={artist_name}&track={track_name}&format=json'

        # Make the API request
        response = requests.get(track_info_url)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Parse the JSON response
        data = response.json()

        # Check if the response contains the tags
        if 'track' in data and 'toptags' in data['track'] and 'tag' in data['track']['toptags']:
            tag_list = data['track']['toptags']['tag']
            return [tag['name'] for tag in tag_list]
        else:
            return []
    except requests.exceptions.RequestException as e:
        # Handle any errors that occur during the request
        print(f'An error occurred while making the API request: {e}')
    except ValueError as e:
        # Handle JSON decoding errors
        print(f'An error occurred while parsing the JSON response: {e}')
    except KeyError as e:
        # Handle missing keys in the JSON response
        print(f'An error occurred while accessing the response data: {e}')

    return []

def add_tags(row):
    '''
    Adds found tags to genres
    '''
    tags = get_track_tags(row['artist_names'], row['track_name'])
    if tags:
        row['genres'].extend(tags)
    return row['genres']

def replace_with_tag(row):
    '''
    Replaces genres with found tags
    '''
    tags = get_track_tags(row['artist_names'], row['track_name'])
    if tags:
        return tags
    return row['genres']

In [None]:
# Add genres using ONLY the Deezer API

# Specify input file
input_file = ''
output_file = input_file.split('.')[0] + '_with_genres.csv'

# Read the input CSV file
df = pd.read_csv(input_file)

# Add a new column for genre
df['genres'] = df.apply(
    lambda row: get_track_genre(row['artist_names'], row['track_name']), axis=1)

# Save the updated data frame
df.to_csv(output_file, index=False)

print(f'Genres added and saved to {output_file}')

In [None]:
# Add tags as genres using ONLY the Last.fm API

# Specify input file
input_file = ''
output_file = input_file.split('.')[0] + '_with_tags.csv'

# Read the input CSV file
df = pd.read_csv(input_file)

# Clean data from missing values
df['genres'] = df['genres'].fillna('[]')

# Convert strings representing lists in 'genres'
# to actual listsg for later list operations
df['genres'] = df['genres'].apply(ast.literal_eval)
# df['genres'] = df['genres'].apply(
#     lambda col: ast.literal_eval(col) if isinstance(col, str) else col)

# Check if conversion was succesful
# print(df['genres'].apply(type))

# Choose method (replace_with_tags or add_tags)
method = add_tags

# Apply choosen method on all genres
df['genres'] = df.apply(method, axis=1)

# Save data
df.to_csv(output_file, index=False)

print(f'Genres added and saved to {output_file}')

In [None]:
# Add genres using BOTH the Deezer API AND add tags as genres using the Last.fm API 

# Specify input file
input_file = ''
output_file = input_file.split('.')[0] + '_with_genres+tags.csv'

# Read the input CSV file
df = pd.read_csv(input_file)

# Clean data from missing values
df['genres'] = df['genres'].fillna('[]')

# Convert strings representing lists in 'genres'
# to actual listsg for later list operations
df['genres'] = df['genres'].apply(ast.literal_eval)

# Check if conversion was succesful
# print(df['genres'].apply(type))

# Add a new column for genre
df['genres'] = df.apply(
    lambda row: get_track_genre(row['artist_names'], row['track_name']) +
                get_track_tags(row['artist_names'], row['track_name']), axis=1)

# Save data
df.to_csv(output_file, index=False)

print(f'Genres added and saved to {output_file}')

In [23]:
df

Unnamed: 0,artist_names,track_name,peak_streams,total_streams,peak_rank_max,max_days_on_chart,best_day_streams,first_appearance,uri,source,best_day_date,genres
0,The Weeknd,Blinding Lights,8453567,4552239347,8,1858,8453567,2023-01-01,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,Republic Records,2020-03-20,"['R&B', 'synthpop', 'synthwave', 'pop', '2019'..."
1,Harry Styles,As It Was,16103849,3640335748,40,1002,16103849,2023-01-01,spotify:track:4Dvkj6JhhA12EX05fT7y2e,Columbia,2022-04-01,"['Pop', 'pop', 'synthpop', 'rock', 'indie pop'..."
2,Lewis Capaldi,Someone You Loved,3974554,3625415177,198,2166,3974554,2023-01-01,spotify:track:7qEHsqek33rTcFNT9PFqLf,Vertigo Berlin,2019-10-30,"['Alternative', '2010s', 'pop', 'alternative',..."
3,"The Kid LAROI, Justin Bieber",STAY (with Justin Bieber),10629302,3233689138,4,1202,10629302,2023-01-01,spotify:track:5PjdY0CKGZdEuoNab3yDmX,Columbia,2021-08-20,[]
4,"Post Malone, Swae Lee",Sunflower - Spider-Man: Into the Spider-Verse,5033261,3232496846,2,2187,5033261,2023-01-02,spotify:track:0RiRZpuVRbi7oqRdSMwhQY,Republic Records,2019-01-18,"['Filme/Videospiele', 'Filmmusik', 'Soundtrack..."
...,...,...,...,...,...,...,...,...,...,...,...,...
7006,Sia,Cheap Thrills,554971,554971,27,242,554971,2019-01-01,spotify:track:27SdWb2rFzO6GWiYDBTD9j,Monkey Puzzle Records/RCA Records,2019-01-01,"['Electro', 'Dance', 'Pop', 'International Pop..."
7007,"LX, Maxwell, Bonez MC, Gzuz, Sa4",HaifischNikez Allstars,544655,544655,143,4,544655,2019-01-02,spotify:track:0kLA41xkWzzYzMIEGFuDkP,187 Strassenbande,2019-01-02,['Rap/Hip Hop']
7008,Bee Gees,"Stayin' Alive - From ""Saturday Night Fever"" So...",536886,536886,119,3,536886,2019-01-01,spotify:track:79hJaqmVdohltPBNN6BULM,Bee Gees Catalog,2019-01-01,"['Pop', 'Disco', '70s', 'Soundtrack', 'funk', ..."
7009,"MC Kevin o Chris, DENNIS",Medley da Gaiola - DENNIS Remix,536447,536447,194,1,536447,2019-01-01,spotify:track:7rLegaz7zKB6EtDNzcslKE,Sony Music Entertainment,2019-01-01,[]
