## Find Artist Genres

## Create List of Artists

In [1]:
import pandas as pd

# load the cleaned music tracks data from the VS Code project directory
file_path = '../Cleaned_Data/Music_Streaming_History.csv'
df = pd.read_csv(file_path)

# find unique artists
unique_artists = df['artist_name'].dropna().unique()

# convert to a DataFrame
unique_artists_df = pd.DataFrame(unique_artists, columns=['artist_name'])

# save the unique artists to a new CSV
output_path = '../Cleaned_Data/Artist_List.csv'
unique_artists_df.to_csv(output_path, index=False, encoding='utf-8-sig')

#### Import Spotipy module

In [2]:
# find artist genres
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
import os
from dotenv import load_dotenv
import pandas as pd

#### Store Credentials in .env file

In [121]:
# enter spotify credentials
load_dotenv()
client_id = os.getenv("SPOTIFY_CLIENT_ID")
client_secret = os.getenv("SPOTIFY_CLIENT_SECRET")
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

#### Loop Through Artist List and Add Genre to List

In [3]:
# read artist csv
input_path = '../Cleaned_Data/Artist_List.csv'
artist_df = pd.read_csv(input_path)
artist_names = artist_df['artist_name'].tolist()

# create a list to store results
artist_genre =[]

# find genres for each artist
for artist_name in artist_names:
    try:
        # search for artist on Spotify
        results = sp.search(q=artist_name,type='artist', limit =1)
        if results['artists']['items']:
            artist = results['artists']['items'][0]
            genres = ', '.join(artist['genres']) if artist['genres'] else 'No genres found'
        else:
            genres = 'Not Found'
    except Exception as e:
        genres = f'Error: {e}'
    artist_genre.append(genres)

# create DataFrame with the results
artist_genre_data = pd.DataFrame({
    'artist_name': artist_names,
    'genres': artist_genre
})

In [95]:
# save the data as new file
artist_genre_data.to_csv('../Cleaned_Data/Artist_Genre_List.csv', index=False, encoding='utf-8-sig')

#### Normalize Genre Labels

In [149]:
file_path = '../Cleaned_Data/Artist_Genre_List.csv'
artist_genre_data = pd.read_csv(file_path)

### Clean the genres

In [153]:
# define genre mapping
# used regular expression \b (word boundaries) for precise genre replacement to avoid partial matches 
# define genre mapping
# used regular expression \b (word boundaries) for precise genre replacement to avoid partial matches 
genre_mapping = {
    r'\bc-pop\b': 'mandopop',
    r'\bclassic mandopop\b': 'mandopop',
    r'\bmando pop\b': 'mandopop',
    r'\bmandarin pop\b': 'mandopop',
    r'\bchinese pop\b': 'mandopop',
    r'\btaiwanese pop\b': 'mandopop',
    r'\bmainland chinese pop\b': 'mandopop',
    r'\btaiwan pop\b': 'mandopop',
    r'\bpop mainland mandopop\b': 'mandopop',
    r'\bmandopop, taiwan singer-songwriter\b': 'mandopop',
    r'\bmalaysian mandopop, mandopop\b': 'mandopop',
    r'\bmalaysian mandopop\b': 'mandopop',
    r'\bsingaporean mandopop\b': 'mandopop',
    r'\bzhongguo feng\b': 'mandopop',
    r'\bmainland mandopop\b': 'mandopop',
    r'\bsingaporean mandopop\b': 'mandopop',
    r'\bsingaporean mandopop\b': 'mandopop',
    r'\bdance pop\b': 'pop',
    r'\bchinese reggae\b': 'reggae',
    r'\breggae flow\b': 'reggae',
    r'\breggae colombiano\b': 'reggae',
    r'\breggae rock\b': 'reggae',
    r'\bhk-pop\b': 'cantopop',
    r'\bkorean pop\b': 'k-pop',
    r'\bclassic k-pop\b': 'k-pop',
    r'\bpop r&b\b': 'r&b',
    r'\bcontemporary r&b\b': 'r&b',
    r'\bcanadian contemporary r&b\b': 'r&b',
    r'\bkorean r&b\b': 'r&b',
    r'\bchinese r&b\b': 'r&b',
    r'\bchill r&b\b': 'r&b',
    r'\buk contemporary r&b\b': 'r&b',
    r'\bdark r&b\b': 'r&b',
    r'\bboy band,contemporary r&b\b': 'r&b',
    r'\bindie r&b\b': 'r&b',
    r'\bbedroom r&b\b': 'r&b',
    r'\bcanadian r&b\b': 'r&b',
    r'\buk r&b\b': 'r&b',
    r'\bhouse\b': 'edm',
    r'\bprogressive house\b': 'edm',
    r'\bfilter house\b': 'edm',
    r'\bswedish tropical house\b': 'edm',
    r'\bchill house\b': 'edm',
    r'\btropical house\b': 'edm',
    r'\bdutch house\b': 'edm',
    r'\bjazz house\b': 'edm',
    r'\bnordic house\b': 'edm',
    r'\bstutter house\b': 'edm',
    r'\btech house\b': 'edm',
    r'\bgerman house\b': 'edm',
    r'\bg-house\b': 'edm',
    r'\bprogressive edm\b': 'edm',
    r'\bgaming edm\b': 'edm',
    r'\bdutch edm\b': 'edm',
    r'\bg-edm\b': 'edm',
    r'\btropical edm\b': 'edm',
    r'\bbelgian edm\b': 'edm',
    r'\bfilter edm\b': 'edm',
    r'\bbrostep\b': 'edm',
    r'\bclassic dubstep\b': 'edm',
    r'\btech edm\b': 'edm',
    r'\bpop rap\b': 'rap',
    r'\btrap\b': 'hip hop',
    r'\bdetroit hip hop\b': 'hip hop',
    r'\bviral hip hop\b': 'hip hop',
    r'\bhip hop latino\b': 'hip hop',
    r'\blgbtq+ hip hop\b': 'hip hop',
    r'\bcanadian hip hop\b': 'hip hop',
    r'\bhip hop queen\b': 'hip hop',
    r'\bdark hip hop\b': 'hip hop',
    r'\bmalaysian hip hop\b': 'hip hop',
    r'\belectronic hip hop\b': 'hip hop',
    r'\barab hip hop\b': 'hip hop',
    r'\bturkish hip hop\b': 'hip hop',
    r'\baustralian hip hop\b': 'hip hop',
    r'\batl hip hop\b': 'hip hop',
    r'\bhip hop brasileiro\b': 'hip hop',
    r'\bhip hop baiano\b': 'hip hop',
    r'\bhip hop soul\b': 'hip hop',
    r'\bsouth carolina hip hop\b': 'hip hop',
    r'\bpittsburgh rap\b': 'rap',
    r'\bflorida rap\b': 'rap',
    r'\bgangster rap\b': 'rap',    
    r'\bst louis rap\b': 'rap',   
    r'\bcali rap\b': 'rap',   
    r'\brap latina\b': 'rap',   
    r'\bk-rap\b': 'rap',   
    r'\bchicago rap\b': 'rap',   
    r'\bdfw rap\b': 'rap',   
    r'\bviral rap\b': 'rap',   
    r'\bwest coast rap\b': 'rap',   
    r'\bhouston rap\b': 'rap',   
    r'\brhode island rap\b': 'rap',   
    r'\bphilly rap\b': 'rap',   
    r'\brage rap\b': 'rap',   
    r'\bchinese rap\b': 'rap',   
    r'\bsad rap\b': 'rap',   
    r'\bnyc rap\b': 'rap',   
    r'\blondon rap\b': 'rap',   
    r'\bbaton rouge rap\b': 'rap',   
    r'\brap dominicano\b': 'rap',   
    r'\bnew orleans rap\b': 'rap',   
    r'\bdirty south rap\b': 'rap',   
    r'\brap conscient\b': 'rap',   
    r'\bemo rap\b': 'rap',
    r'\bmeme rap\b': 'rap',   
    r'\bigbo rap\b': 'rap',   
    r'\bchicano rap\b': 'rap',   
    r'\bnew jersey rap\b': 'rap',
    r'\bsan diego rap\b': 'rap',   
    r'\bnew jersey underground rap\b': 'rap',   
    r'\btexas latin rap\b': 'rap',   
    r'\brap rock\b': 'rap',
    r'\bcloud rap\b': 'rap',   
    r'\bdmv rap\b': 'rap',   
    r'\bscam rap\b': 'rap',   
    r'\bpost-teen pop\b': 'pop',
    r'\bpost-teen pop\b': 'pop',
    r'\bviral pop\b': 'pop',
    r'\bmelodic rap\b': 'rap',
    r'\bindietronica\b': 'indie',
    r'\btaiwan indie\b': 'indie',
    r'\bindie rap\b': 'indie',
    r'\bhong kong indie\b': 'indie',
    r'\bchinese indie\b': 'indie',
    r'\bnorthwest china indie\b': 'indie',
    r'\btaiwanese indie\b': 'indie',
    r'\bmodern indie pop\b': 'indie',
    r'\bindie folk\b': 'indie',
    r'\bindie rap\b': 'indie',
    r'\bindie pop\b': 'indie',
    r'\bchinese indie rock\b': 'indie',
    r'\bindie poptimism\b': 'indie',
    r'\baustralian indie\b': 'indie',
    r'\bauckland indie\b': 'indie',
    r'\bpov: indie\b': 'indie',
    r'\bindie rockism\b': 'indie',
    r'\bnorwegian indie\b': 'indie',
    r'\bbergen indie\b': 'indie',
    r'\bindie electropop\b': 'indie',
    r'\bindie soul\b': 'indie',
    r'\bchinese indie pop\b': 'indie',
    r'\bsouthern china indie\b': 'indie',
    r'\bchannel islands indie\b': 'indie',
    r'\bnorth east england indie\b': 'indie',
    r'\bsheffield indie\b': 'indie'   
    r'\bmodern rock\b': 'rock',
    r'\bmodern alternative rock\b': 'rock',
    r'\bpop rock\b': 'rock',
    r'\bdance rock\b': 'rock',
    r'\bmodern blues rock\b': 'rock',
    r'\bceltic rock\b': 'rock',
    r'\bmodern country rock\b': 'rock',
    r'\bpiano rock\b': 'rock',
    r'\bhong kong rock\b': 'rock',
    r'\bchinese post-rock\b': 'rock',
    r'\bsoft rock\b': 'rock',
    r'\bmexican rock\b': 'rock',
    r'\bj-rock\b': 'rock',
    r'\bthai rock\b': 'rock',
    r'\balternative pop rock\b': 'rock',
    r'\bglam rock\b': 'rock',
    r'\bclassic rock\b': 'rock',
    r'\bhard rock\b': 'rock',
    r'\bheartland rock\b': 'rock',
    r'\balbum rock\b': 'rock',
    r'\bcountry rock\b': 'rock',
    r'\bfolk rock\b': 'rock',
    r'\broots rock\b': 'rock',
    r'\btaiwan rock\b': 'rock',
    r'\bchristian alternative rock\b': 'rock',
    r'\byacht rock\b': 'rock',
    r'\bmodern folk rock\b': 'rock',
    r'\bbritish alternative rock\b': 'rock',
    r'\bfunk rock\b': 'rock',
    r'\bacoustic rock\b': 'rock',
    r'\bcanadian rock\b': 'rock',
    r'\bkiwi rock\b': 'rock',
    r'\bsouthern rock\b': 'rock',
    r'\bswamp rock\b': 'rock',
    r'\bpsychedelic rock\b': 'rock',
    r'\bdanish pop rock\b': 'rock'
    
    
    

}


# apply genre replacements
for old_genre, new_genre in genre_mapping.items():
    artist_genre_data['genres'] = artist_genre_data['genres'].str.replace(old_genre, new_genre, regex=True)


SyntaxError: ':' expected after dictionary key (3084599750.py, line 101)

In [None]:
# apply genre replacements
for old_genre, new_genre in genre_mapping.items():
    artist_genre_data['genres'] = artist_genre_data['genres'].str.replace(old_genre, new_genre, regex=True)

In [151]:
#  normalize the 'genres' column
artist_genre_data['genres'] = artist_genre_data['genres'].str.lower().str.strip()

In [152]:
# Remove duplicates in genres
artist_genre_data['genres'] = artist_genre_data['genres'].str.split(',').apply(lambda x:','.join(sorted(set(x))))

In [None]:
# split genres into list
artist_genre_data['genres'] = artist_genre_data['genres'].str.split(',')

# explode the DataFrame to one genre per row
exploded_data = artist_genre_data.explode('genres')

# drop duplicate rows
exploded_data = exploded_data.drop_duplicates(subset=['artist_name','genres'])

# format DataFrame
formatted_data = exploded_data.reset_index(drop=True)
formatted_data['genres'] = formatted_data['genres'].str.strip()

# save the updated data
formatted_data.to_csv('./Cleaned_Data/Artist_Genre_List.csv', index=False, encoding='utf-8-sig')