In [1]:
# !pip install tabulate
import pandas as pd
import re
from collections import defaultdict

In [2]:
df = pd.read_csv("Project_3_cleaned.csv")

In [3]:
df.head()

Unnamed: 0,Track URI,Track Name,Artist URI(s),Artist Name(s),Album URI,Album Artist Name(s),Album Release Date,Track Duration (ms),Track Preview URL,Explicit,Popularity,Artist Genres
0,spotify:track:0vNPJrUrBnMFdCs8b2MTNG,Fader,spotify:artist:4W48hZAnAHVOC2c8WH8pcq,The Temper Trap,spotify:album:0V59MMtgoruvEqMv18KAOH,The Temper Trap,2009,192373,https://p.scdn.co/mp3-preview/14264bd1501d2723...,False,0,"indietronica,modern rock,shimmer pop"
1,spotify:track:0NpvdCO506uO58D4AbKzki,Sherry,spotify:artist:6mcrZQmgzFGRWf7C0SObou,Frankie Valli & The Four Seasons,spotify:album:0NUEQILaBzavnzcMEs4buZ,Frankie Valli & The Four Seasons,1/14/2003,152160,https://p.scdn.co/mp3-preview/e3f765262ebc349e...,False,54,"adult standards,bubblegum pop,doo-wop,lounge,n..."
2,spotify:track:1MtUq6Wp1eQ8PC6BbPCj8P,I Took A Pill In Ibiza - Seeb Remix,"spotify:artist:2KsP6tYLJlTBvSUxnwlVWa, spotify...","Mike Posner, Seeb",spotify:album:1Tz3Ai1guEFf4hV3d9i17K,Mike Posner,5/6/2016,197933,https://p.scdn.co/mp3-preview/7bae6aac6d699135...,True,63,"dance pop,pop,pop dance,pop rap,pop dance"
3,spotify:track:59lq75uFIqzUZcgZ4CbqFG,Let Go for Tonight,spotify:artist:7qRll6DYV06u2VuRPAVqug,Foxes,spotify:album:5AQ7uKRSpAv7SNUl4j24ru,Foxes,5/12/2014,238413,https://p.scdn.co/mp3-preview/84a003d72f9f1468...,False,39,"electropop,metropopolis,uk pop"
4,spotify:track:7KdcZQ3GJeGdserhK61kfv,The Way I Want To Touch You,spotify:artist:7BEfMxbaqx6dOpbtlEqScm,Captain & Tennille,spotify:album:3GUxesVyOehInaxJyCTh6d,Captain & Tennille,1/1/1975,163586,https://p.scdn.co/mp3-preview/9e7a4a7b7dc56dc3...,False,35,"mellow gold,soft rock,yacht rock"


In [4]:
# Predefined list of genres
genres_list = ['rap', 'edm', 'pop', 'alternative', 'classical', 'rock', 'jazz', 'latin', 'indie', 'country', 'soul', 'blues', 'r&b', 'euro', 'urban']

In [5]:
# Predefined list of genres (convert to lowercase for case-insensitive matching)
genres_list = [genre.lower() for genre in genres_list]  # Make sure all genres in the list are lowercase

In [6]:
# Initialize the genre count dictionary
genre_counts = defaultdict(int)

In [7]:
# Function to filter genres and handle special cases for 'rap'/'hip hop' and 'dance'/'edm'
def filter_and_count_genres(genre_string, genres_list):
    # Check if genre_string is a valid string
    if isinstance(genre_string, str):
        # Create a regex pattern based on the genres_list
        pattern = r'\b(?:' + '|'.join([re.escape(genre) for genre in genres_list]) + r')\b'
        
        # Find all matching genres in the genre_string
        matched_genres = re.findall(pattern, genre_string)
        
        # Handle the special case for 'rap' and 'hip hop' (add only one of them)
        if 'rap' in matched_genres and 'hip hop' in matched_genres:
            matched_genres = [genre for genre in matched_genres if genre != 'hip hop']
        
        # Handle the special case for 'dance' and 'edm' (add only one of them)
        if ('dance' in matched_genres or 'dance pop' in matched_genres) and 'edm' in matched_genres:
            matched_genres = [genre for genre in matched_genres if genre not in ['dance', 'dance pop']]
        
        # Remove duplicates by converting to a set
        matched_genres = list(set(matched_genres))
        
        # Debugging: Print the matched genres for each iteration
        # print(f"Matched Genres: {matched_genres[:20]}")
        
        # Update the genre counts for each matched genre
        for genre in matched_genres:
            genre_counts[genre] += 1
        
        # Debugging: Print the genre count after updating
        # print(f"Updated Genre Counts: {dict(genre_counts)}")
        
        # Return the matched genres as a comma-separated string
        return ', '.join(matched_genres)
    else:
        # Return an empty string if genre_string is not valid
        return ''

In [8]:
# Apply the function to the "Artist Genres" column
df['Filtered Artist Genres'] = df['Artist Genres'].apply(lambda x: filter_and_count_genres(x, genres_list))
df.head(10)

Unnamed: 0,Track URI,Track Name,Artist URI(s),Artist Name(s),Album URI,Album Artist Name(s),Album Release Date,Track Duration (ms),Track Preview URL,Explicit,Popularity,Artist Genres,Filtered Artist Genres
0,spotify:track:0vNPJrUrBnMFdCs8b2MTNG,Fader,spotify:artist:4W48hZAnAHVOC2c8WH8pcq,The Temper Trap,spotify:album:0V59MMtgoruvEqMv18KAOH,The Temper Trap,2009,192373,https://p.scdn.co/mp3-preview/14264bd1501d2723...,False,0,"indietronica,modern rock,shimmer pop","pop, rock"
1,spotify:track:0NpvdCO506uO58D4AbKzki,Sherry,spotify:artist:6mcrZQmgzFGRWf7C0SObou,Frankie Valli & The Four Seasons,spotify:album:0NUEQILaBzavnzcMEs4buZ,Frankie Valli & The Four Seasons,1/14/2003,152160,https://p.scdn.co/mp3-preview/e3f765262ebc349e...,False,54,"adult standards,bubblegum pop,doo-wop,lounge,n...","pop, soul, rock"
2,spotify:track:1MtUq6Wp1eQ8PC6BbPCj8P,I Took A Pill In Ibiza - Seeb Remix,"spotify:artist:2KsP6tYLJlTBvSUxnwlVWa, spotify...","Mike Posner, Seeb",spotify:album:1Tz3Ai1guEFf4hV3d9i17K,Mike Posner,5/6/2016,197933,https://p.scdn.co/mp3-preview/7bae6aac6d699135...,True,63,"dance pop,pop,pop dance,pop rap,pop dance","pop, rap"
3,spotify:track:59lq75uFIqzUZcgZ4CbqFG,Let Go for Tonight,spotify:artist:7qRll6DYV06u2VuRPAVqug,Foxes,spotify:album:5AQ7uKRSpAv7SNUl4j24ru,Foxes,5/12/2014,238413,https://p.scdn.co/mp3-preview/84a003d72f9f1468...,False,39,"electropop,metropopolis,uk pop",pop
4,spotify:track:7KdcZQ3GJeGdserhK61kfv,The Way I Want To Touch You,spotify:artist:7BEfMxbaqx6dOpbtlEqScm,Captain & Tennille,spotify:album:3GUxesVyOehInaxJyCTh6d,Captain & Tennille,1/1/1975,163586,https://p.scdn.co/mp3-preview/9e7a4a7b7dc56dc3...,False,35,"mellow gold,soft rock,yacht rock",rock
5,spotify:track:000xQL6tZNLJzIrtIgxqSl,Still Got Time (feat. PARTYNEXTDOOR),"spotify:artist:5ZsFI1h6hIdQRw2ti0hz81, spotify...","ZAYN, PARTYNEXTDOOR",spotify:album:2kGUeTGnkLOYlinKRJe47G,ZAYN,3/23/2017,188490,https://p.scdn.co/mp3-preview/765e08c9b8930c6d...,False,53,"pop,uk pop,r&b,rap,urban contemporary","pop, r&b, rap, urban"
6,spotify:track:46xkXPGjR9Ig9BcaTUNus3,Your Song,spotify:artist:5CCwRZC6euC8Odo6y9X8jr,Rita Ora,spotify:album:6Vn8F3hERVHYYz5RfKmsAN,Rita Ora,11/23/2018,180160,https://p.scdn.co/mp3-preview/d57008f417a1466a...,False,60,"dance pop,pop,uk pop",pop
7,spotify:track:7LVHVU3tWfcxj5aiPFEW4Q,Fix You,spotify:artist:4gzpq5DPGxSnKTe4SA8HAU,Coldplay,spotify:album:4E7bV0pzG0LciBSWTszra6,Coldplay,6/7/2005,295533,https://p.scdn.co/mp3-preview/4ee64ea0fcfc1e61...,False,83,"permanent wave,pop",pop
8,spotify:track:1s2khOWzC99udpUaPICLJI,There You'll Be - 2007 Remaster,spotify:artist:25NQNriVT2YbSW80ILRWJa,Faith Hill,spotify:album:7fvl3dOnDrv9rq5IBmLbAa,Faith Hill,2007,220840,https://p.scdn.co/mp3-preview/a3b43c90cfdf09ed...,False,44,"contemporary country,country,country dawn,coun...",country
9,spotify:track:2sXp9Qmvc7mRaDBjBgcGGi,Every Breath You Take - Remastered 2003,spotify:artist:5NGO30tJxFlKixkPSgXcFE,The Police,spotify:album:7yDxJXFPl88Dt9kBo0dDD6,The Police,6/1/1983,253886,https://p.scdn.co/mp3-preview/92d7997134bb18b9...,False,3,"album rock,classic rock,permanent wave,rock",rock


In [9]:
# Now, genre_counts will hold the running total of each genre
print("Final Genre Counts:", dict(genre_counts))

Final Genre Counts: {'pop': 5239, 'rock': 3929, 'soul': 491, 'rap': 799, 'r&b': 558, 'urban': 435, 'country': 397, 'edm': 394, 'alternative': 767, 'indie': 448, 'latin': 54, 'blues': 221, 'classical': 3, 'jazz': 124, 'euro': 15}


In [10]:
# Show the updated DataFrame
print(df.head())

                              Track URI                           Track Name  \
0  spotify:track:0vNPJrUrBnMFdCs8b2MTNG                                Fader   
1  spotify:track:0NpvdCO506uO58D4AbKzki                               Sherry   
2  spotify:track:1MtUq6Wp1eQ8PC6BbPCj8P  I Took A Pill In Ibiza - Seeb Remix   
3  spotify:track:59lq75uFIqzUZcgZ4CbqFG                   Let Go for Tonight   
4  spotify:track:7KdcZQ3GJeGdserhK61kfv          The Way I Want To Touch You   

                                       Artist URI(s)  \
0              spotify:artist:4W48hZAnAHVOC2c8WH8pcq   
1              spotify:artist:6mcrZQmgzFGRWf7C0SObou   
2  spotify:artist:2KsP6tYLJlTBvSUxnwlVWa, spotify...   
3              spotify:artist:7qRll6DYV06u2VuRPAVqug   
4              spotify:artist:7BEfMxbaqx6dOpbtlEqScm   

                     Artist Name(s)                             Album URI  \
0                   The Temper Trap  spotify:album:0V59MMtgoruvEqMv18KAOH   
1  Frankie Valli & The Four 

In [11]:
from tabulate import tabulate

# Pretty print the first 20 rows of the DataFrame using tabulate
print(tabulate(df.head(20), headers='keys', tablefmt='pretty', showindex=False))


+--------------------------------------+-------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------+--------------------------------------------+--------------------------------------+--------------------------------------------+--------------------+---------------------+-------------------------------------------------------------------------------------------------------------+----------+------------+-------------------------------------------------------------------------------------+------------------------+
|              Track URI               |                            Track Name                             |                                                    Artist URI(s)                                                    |               Artist Name(s)               |              Album URI               |            Album Artist Name(s)            | 