In [98]:
from bs4 import BeautifulSoup
import requests, sys
import pandas as pd
import os
import re
import time

from threading import get_ident
from multiprocessing import cpu_count
import asyncio
from concurrent.futures import ThreadPoolExecutor as PoolExecuter

from joblib.externals.loky import set_loky_pickler
from joblib import parallel_backend, Parallel, delayed, wrap_non_picklable_objects

def chunkIt(seq, num):
    avg = len(seq) / float(num)
    out = []
    last = 0.0
    while last < len(seq):
        out.append(seq[int(last):int(last + avg)])
        last += avg
    return out

r = requests.get("http://everynoise.com/engenremap.html")

In [110]:
import re
import sys
import requests
from bs4 import BeautifulSoup
from requests.exceptions import RequestException
from multiprocessing import cpu_count

def fetch_artists(soup):
    all_artist_divs = set(soup.find_all("div", "genre scanme"))
    artist_list = []
    art_weights = []

    for artist in all_artist_divs:
        weight = artist['style'].split()[-1].replace('%', '')
        artist_name = artist.text.strip().replace("»", "")

        if not artist_name.isspace():
            art_weights.append(weight)
            artist_list.append(artist_name)

    return artist_list, art_weights


def fetch_similar_and_opposite_genres(soup, subgenre_name):
    all_genres_related = set(soup.find_all("div", "genre"))
    all_artist_divs = set(soup.find_all("div", "genre scanme"))
    all_genres_related = all_genres_related - all_artist_divs

    similar_genres = []
    opposite_genres = []
    sim_weights = []
    opp_weights = []

    for other_genre in all_genres_related:
        weight = other_genre['style'].split()[-1].replace('%', '')

        if 'nearby' in other_genre['id']:
            genre = other_genre.text.strip().replace("»", '')

            if genre != subgenre_name:
                sim_weights.append(weight)
                similar_genres.append(genre)

        elif 'mirror' in other_genre['id']:
            opp_weights.append(weight)
            opposite_genres.append(other_genre.text.strip().replace("»", ''))

    return similar_genres, sim_weights, opposite_genres, opp_weights


def genre_scraper(subgenre_subset):
    # Metadata
    subgenre_list_artist = []
    subgenre_list_opp = []
    subgenre_list_sim = []
    subgenre_list = []
    playlists = []
    sim_weights = []
    opp_weights = []
    artist_weights = []

    split_genrecount = 0

    # Maximum number of allowed retries
    max_attempts = 5

    # Looping over subgenres to get metadata
    for genreDiv in subgenre_subset:
        subgenre = re.sub("[:'+»&\s-]", '', genreDiv)
        subgenre_name = re.sub("[:+»\s]", ' ', genreDiv).strip()
        subgenre_list.append(subgenre_name)

        # Printing Genre Name
        sys.stdout.flush()
        print('Pulling genre # {} - Subgenre: {}\n'.format(split_genrecount, subgenre_name))
        sys.stdout.flush()

        # Fetch the subgenre
        subgenre_page = 'http://everynoise.com/engenremap-' + subgenre + '.html'
        skip_subgenre = False

        # Retry the request with a maximum number of attempts
        for attempt in range(max_attempts):
            try:
                r2 = requests.get(subgenre_page, timeout=10)
                break
            except RequestException as e:
                if attempt < max_attempts - 1:
                    print(f"Request {subgenre_page} failed with error '{e}', retrying... (attempt {attempt+1})")
                else:
                    print(f"Max retries exceeded for {subgenre_page}. Skipping...")
                    skip_subgenre = True
                    break

        if skip_subgenre:
            continue

        soup2 = BeautifulSoup(r2.text, 'html.parser')

        # Get the Spotify playlist link
        spotify_link = soup2.find_all('a', text='playlist')
        playlists.append(spotify_link[0]['href'] if spotify_link else None)

        # Fetch artists associated with the subgenre
        artist_list, art_weights = fetch_artists(soup2)
        subgenre_list_artist.append(artist_list)
        artist_weights.append(art_weights)

        # Get similar / opposite genres
        sim_genres, sim_gen_weights, opp_genres, opp_gen_weights = fetch_similar_and_opposite_genres(soup2, subgenre_name)
        subgenre_list_sim.append(sim_genres)
        subgenre_list_opp.append(opp_genres)
        sim_weights.append(sim_gen_weights)
        opp_weights.append(opp_gen_weights)

        split_genrecount += 1

    return [subgenre_list, playlists, subgenre_list_sim, sim_weights, subgenre_list_opp, opp_weights, subgenre_list_artist, artist_weights]

def parallel_wrapper(allGenreDivs, n_jobs=cpu_count()):
    subgenre_subsets = chunkIt(allGenreDivs, n_jobs)
    results = Parallel(n_jobs=n_jobs)(delayed(genre_scraper)(subset) for subset in subgenre_subsets)
    res = list(map(list, zip(*results)))
    res_2 = []
    
    for i in res:
        q = []
        for j in i:
            q += j
        res_2.append(q)

    return res_2

In [112]:
soup = BeautifulSoup(r.text,"html.parser")
allGenreDivs = soup.find_all("div", "genre scanme")
agd = []
for i in allGenreDivs:
    agd.append(i.text)
t1 = time.time()
music_data = parallel_wrapper(agd)
t2 = time.time()
print('Time Taken to Gather Data: {:.02f}s'.format(t2-t1))

Pulling genre # 0 - Subgenre: musica feirense

Pulling genre # 0 - Subgenre: comedy

Pulling genre # 0 - Subgenre: rap salvadoreno

Pulling genre # 0 - Subgenre: dangdut

Pulling genre # 0 - Subgenre: galician rock

Pulling genre # 0 - Subgenre: pop

Pulling genre # 0 - Subgenre: south sudanese pop

Pulling genre # 0 - Subgenre: tallava

Pulling genre # 1 - Subgenre: fort worth indie

Pulling genre # 1 - Subgenre: bisaya worship

Pulling genre # 1 - Subgenre: garifuna folk

Pulling genre # 1 - Subgenre: street punk espanol

Pulling genre # 1 - Subgenre: german boom bap

Pulling genre # 1 - Subgenre: chicago house

Pulling genre # 1 - Subgenre: rap

Pulling genre # 1 - Subgenre: trancecore

Pulling genre # 2 - Subgenre: meenawati

Pulling genre # 2 - Subgenre: zim gospel

Pulling genre # 2 - Subgenre: jazz puertorriqueno

Pulling genre # 2 - Subgenre: congolese gospel

Pulling genre # 2 - Subgenre: pianissimo

Pulling genre # 2 - Subgenre: shabad

Pulling genre # 2 - Subgenre: rock

Pul

Pulling genre # 23 - Subgenre: canadian black metal

Pulling genre # 23 - Subgenre: danzon

Pulling genre # 23 - Subgenre: vincy soca

Pulling genre # 23 - Subgenre: cumbia peruana

Pulling genre # 21 - Subgenre: album rock

Pulling genre # 24 - Subgenre: ugandan hip hop

Pulling genre # 23 - Subgenre: french tech house

Pulling genre # 23 - Subgenre: czech punk

Pulling genre # 24 - Subgenre: polyphonies corses

Pulling genre # 24 - Subgenre: beach music

Pulling genre # 24 - Subgenre: rock chapin

Pulling genre # 24 - Subgenre: intelligent dance music

Pulling genre # 25 - Subgenre: candombe

Pulling genre # 24 - Subgenre: lesen

Pulling genre # 22 - Subgenre: contemporary country

Pulling genre # 24 - Subgenre: touhou

Pulling genre # 25 - Subgenre: samba gospel

Pulling genre # 25 - Subgenre: arkansas indie

Pulling genre # 25 - Subgenre: trival

Pulling genre # 25 - Subgenre: ocean

Pulling genre # 26 - Subgenre: finnish electronic

Pulling genre # 25 - Subgenre: russian edm

Pull

Pulling genre # 46 - Subgenre: trap maroc

Pulling genre # 45 - Subgenre: drone metal

Pulling genre # 45 - Subgenre: arab electronic

Pulling genre # 46 - Subgenre: serbian electronic

Pulling genre # 47 - Subgenre: streichquartett

Pulling genre # 44 - Subgenre: folklore argentino

Pulling genre # 47 - Subgenre: lao pop

Pulling genre # 42 - Subgenre: sertanejo universitario

Pulling genre # 47 - Subgenre: rhode island indie

Pulling genre # 46 - Subgenre: dream trance

Pulling genre # 46 - Subgenre: drain

Pulling genre # 47 - Subgenre: sheilat

Pulling genre # 48 - Subgenre: folclore castilla y leon

Pulling genre # 45 - Subgenre: boston rock

Pulling genre # 48 - Subgenre: indie pop rock

Pulling genre # 43 - Subgenre: alternative rock

Pulling genre # 48 - Subgenre: pittsburgh rock

Pulling genre # 47 - Subgenre: christian afrobeat

Pulling genre # 47 - Subgenre: classic french pop

Pulling genre # 48 - Subgenre: berlin minimal techno

Pulling genre # 49 - Subgenre: grim death me

Pulling genre # 65 - Subgenre: musica infantil

Pulling genre # 62 - Subgenre: gangster rap

Pulling genre # 69 - Subgenre: japanese techno

Pulling genre # 70 - Subgenre: danish folk

Pulling genre # 68 - Subgenre: british modern classical

Pulling genre # 69 - Subgenre: contemporary folk

Pulling genre # 69 - Subgenre: korean indie folk

Pulling genre # 68 - Subgenre: rap boricua

Pulling genre # 66 - Subgenre: gujarati pop

Pulling genre # 63 - Subgenre: latin alternative

Pulling genre # 71 - Subgenre: oromo pop

Pulling genre # 69 - Subgenre: hokkaido indie

Pulling genre # 70 - Subgenre: guam indie

Pulling genre # 70 - Subgenre: worcester ma indie

Pulling genre # 70 - Subgenre: west australian hip hop

Pulling genre # 69 - Subgenre: rock independant francais

Pulling genre # 67 - Subgenre: rock urbano mexicano

Pulling genre # 64 - Subgenre: ranchera

Pulling genre # 70 - Subgenre: modern hardcore

Pulling genre # 72 - Subgenre: anime game

Pulling genre # 71 - Subgenre: sesoth

Pulling genre # 90 - Subgenre: indonesian blues

Pulling genre # 83 - Subgenre: latin rock

Pulling genre # 91 - Subgenre: pinoy praise

Pulling genre # 93 - Subgenre: luxembourgian hip hop

Pulling genre # 89 - Subgenre: pop nacional antigas

Pulling genre # 92 - Subgenre: serbian indie

Pulling genre # 91 - Subgenre: drift

Pulling genre # 91 - Subgenre: ottawa indie

Pulling genre # 87 - Subgenre: italian underground hip hop

Pulling genre # 84 - Subgenre: rap metal

Pulling genre # 92 - Subgenre: malayalam indie

Pulling genre # 93 - Subgenre: hip hop galsen

Pulling genre # 94 - Subgenre: romanian contemporary classical

Pulling genre # 90 - Subgenre: parody

Pulling genre # 92 - Subgenre: kingston on indie

Pulling genre # 92 - Subgenre: french garage rock

Pulling genre # 88 - Subgenre: toronto indie

Pulling genre # 85 - Subgenre: modern alternative rock

Pulling genre # 95 - Subgenre: tuna

Pulling genre # 94 - Subgenre: rap abc paulista

Pulling genre # 93 - Subgenre: undergr

Pulling genre # 110 - Subgenre: easycore

Pulling genre # 114 - Subgenre: honky-tonk piano

Pulling genre # 115 - Subgenre: rap sergipano

Pulling genre # 112 - Subgenre: new mexico music

Pulling genre # 112 - Subgenre: medieval folk

Pulling genre # 104 - Subgenre: talent show

Pulling genre # 113 - Subgenre: memphis indie

Pulling genre # 108 - Subgenre: dangdut koplo

Pulling genre # 115 - Subgenre: taiwan metal

Pulling genre # 116 - Subgenre: belarusian metal

Pulling genre # 111 - Subgenre: indie rockism

Pulling genre # 113 - Subgenre: norsk lovsang

Pulling genre # 113 - Subgenre: japanese chill rap

Pulling genre # 105 - Subgenre: boy band

Pulling genre # 114 - Subgenre: ambient trance

Pulling genre # 116 - Subgenre: italian post-hardcore

Pulling genre # 109 - Subgenre: rock alternativo brasileiro

Pulling genre # 117 - Subgenre: norwegian blues

Pulling genre # 112 - Subgenre: american folk revival

Pulling genre # 114 - Subgenre: chinese traditional

Pulling genre # 114 

Pulling genre # 124 - Subgenre: tropical

Pulling genre # 129 - Subgenre: korean ost

Pulling genre # 135 - Subgenre: singing bowl

Pulling genre # 139 - Subgenre: kinderliedjies

Pulling genre # 138 - Subgenre: lapland hip hop

Pulling genre # 135 - Subgenre: peruvian indie

Pulling genre # 136 - Subgenre: indonesian metal

Pulling genre # 125 - Subgenre: argentine hip hop

Pulling genre # 130 - Subgenre: indonesian folk

Pulling genre # 136 - Subgenre: guatemalan pop

Pulling genre # 140 - Subgenre: deep free jazz

Pulling genre # 139 - Subgenre: kritika

Pulling genre # 136 - Subgenre: viral afropop

Pulling genre # 137 - Subgenre: nz gangsta rap

Pulling genre # 126 - Subgenre: psychedelic rock

Pulling genre # 131 - Subgenre: g-house

Pulling genre # 137 - Subgenre: australian ska

Pulling genre # 141 - Subgenre: guitar case

Pulling genre # 140 - Subgenre: dansk lovsang

Pulling genre # 138 - Subgenre: bouyon

Pulling genre # 137 - Subgenre: popgaze

Pulling genre # 127 - Subgenr

Pulling genre # 157 - Subgenre: trad quebecois

Pulling genre # 160 - Subgenre: mongolian folk

Pulling genre # 144 - Subgenre: german pop

Pulling genre # 161 - Subgenre: indie arequipeno

Pulling genre # 156 - Subgenre: kawaii edm

Pulling genre # 150 - Subgenre: duranguense

Pulling genre # 142 - Subgenre: melodic techno

Pulling genre # 157 - Subgenre: cape verdean folk

Pulling genre # 158 - Subgenre: modern psychedelic folk

Pulling genre # 161 - Subgenre: rap norteno chileno

Pulling genre # 162 - Subgenre: sinhala edm

Pulling genre # 157 - Subgenre: danseband

Pulling genre # 145 - Subgenre: mexican hip hop

Pulling genre # 151 - Subgenre: british folk

Pulling genre # 158 - Subgenre: retro metal

Pulling genre # 143 - Subgenre: colombian rock

Pulling genre # 159 - Subgenre: afrobeat fusion

Pulling genre # 162 - Subgenre: iowa indie

Pulling genre # 163 - Subgenre: javanese gamelan

Pulling genre # 158 - Subgenre: cowpunk

Pulling genre # 146 - Subgenre: desi hip hop

Pullin

Pulling genre # 178 - Subgenre: scottish folk

Pulling genre # 183 - Subgenre: norwegian experimental

Pulling genre # 164 - Subgenre: indie folk

Pulling genre # 184 - Subgenre: japanese buddhist chant

Pulling genre # 178 - Subgenre: neo-pagan

Pulling genre # 180 - Subgenre: erotica

Pulling genre # 163 - Subgenre: chilean indie

Pulling genre # 172 - Subgenre: j-pop boy group

Pulling genre # 184 - Subgenre: german renaissance

Pulling genre # 179 - Subgenre: taiwan idol pop

Pulling genre # 185 - Subgenre: jazz tuba

Pulling genre # 165 - Subgenre: disco

Pulling genre # 181 - Subgenre: musica potiguar

Pulling genre # 173 - Subgenre: russian drain

Pulling genre # 164 - Subgenre: brega funk

Pulling genre # 185 - Subgenre: chilean metal

Pulling genre # 180 - Subgenre: japanese math rock

Pulling genre # 186 - Subgenre: musica ecuatoguineana

Pulling genre # 166 - Subgenre: pluggnb

Pulling genre # 182 - Subgenre: swiss alternative rock

Pulling genre # 174 - Subgenre: brighton i

Pulling genre # 184 - Subgenre: mariachi

Pulling genre # 184 - Subgenre: darksynth

Pulling genre # 187 - Subgenre: beatdown

Pulling genre # 202 - Subgenre: malawian pop

Pulling genre # 193 - Subgenre: variete francaise

Pulling genre # 200 - Subgenre: musica juiz-forana

Pulling genre # 207 - Subgenre: macedonian rock

Pulling genre # 206 - Subgenre: burundian pop

Pulling genre # 185 - Subgenre: trap italiana

Pulling genre # 185 - Subgenre: alternative roots rock

Pulling genre # 203 - Subgenre: spanish indie rock

Pulling genre # 188 - Subgenre: musica para criancas

Pulling genre # 194 - Subgenre: early modern classical

Pulling genre # 201 - Subgenre: freak folk

Pulling genre # 208 - Subgenre: rap gasy

Pulling genre # 207 - Subgenre: russian folk metal

Pulling genre # 186 - Subgenre: melodic metalcore

Pulling genre # 204 - Subgenre: jazz violin

Pulling genre # 186 - Subgenre: cowboy western

Pulling genre # 202 - Subgenre: folkmusik

Pulling genre # 189 - Subgenre: louisi

Pulling genre # 204 - Subgenre: barbadian pop

Pulling genre # 205 - Subgenre: russian metal

Pulling genre # 229 - Subgenre: kolo

Pulling genre # 221 - Subgenre: czech classical

Pulling genre # 213 - Subgenre: new age piano

Pulling genre # 224 - Subgenre: pakistani rock

Pulling genre # 228 - Subgenre: polish psychedelia

Pulling genre # 209 - Subgenre: nz christian

Pulling genre # 205 - Subgenre: australian pop

Pulling genre # 206 - Subgenre: atmosphere

Pulling genre # 230 - Subgenre: dub product

Pulling genre # 222 - Subgenre: bolero cubano

Pulling genre # 214 - Subgenre: st louis rap

Pulling genre # 225 - Subgenre: reiki

Pulling genre # 229 - Subgenre: limerick indie

Pulling genre # 210 - Subgenre: telugu worship

Pulling genre # 206 - Subgenre: melancholia

Pulling genre # 207 - Subgenre: greek underground rap

Pulling genre # 231 - Subgenre: israeli classical

Pulling genre # 223 - Subgenre: classic bangla pop

Pulling genre # 215 - Subgenre: hel

Pulling genre # 226 -

Pulling genre # 249 - Subgenre: marinera

Pulling genre # 234 - Subgenre: scam rap

Pulling genre # 246 - Subgenre: euskal indie

Pulling genre # 251 - Subgenre: classical saxophone

Pulling genre # 243 - Subgenre: trot

Pulling genre # 231 - Subgenre: voetbal

Pulling genre # 227 - Subgenre: classic norwegian pop

Pulling genre # 226 - Subgenre: cali rap

Pulling genre # 235 - Subgenre: japanese classical

Pulling genre # 250 - Subgenre: izvorna muzika

Pulling genre # 244 - Subgenre: funk melody

Pulling genre # 247 - Subgenre: halloween

Pulling genre # 252 - Subgenre: serbian alternative rock

Pulling genre # 227 - Subgenre: rock-and-roll

Pulling genre # 232 - Subgenre: drill beats

Pulling genre # 228 - Subgenre: metallic hardcore

Pulling genre # 236 - Subgenre: haryanvi hip hop

Pulling genre # 251 - Subgenre: middle eastern traditional

Pulling genre # 245 - Subgenre: brazilian classical

Pulling genre # 253 - Subgenre: spanish renaissance

Pulling genre # 248 - Subgenre: roma

Pulling genre # 265 - Subgenre: spanish death metal

Pulling genre # 242 - Subgenre: emo mexicano

Pulling genre # 267 - Subgenre: siberian folk

Pulling genre # 250 - Subgenre: corridos alternativos

Pulling genre # 259 - Subgenre: chanson quebecois

Pulling genre # 234 - Subgenre: polish death metal

Pulling genre # 262 - Subgenre: madrigal

Pulling genre # 229 - Subgenre: francoton

Pulling genre # 266 - Subgenre: korean punk

Pulling genre # 243 - Subgenre: argentine punk

Pulling genre # 268 - Subgenre: hard glam

Pulling genre # 251 - Subgenre: hk-pop

Pulling genre # 260 - Subgenre: musica sarda

Pulling genre # 235 - Subgenre: chinese talent show

Pulling genre # 263 - Subgenre: deep soul house

Pulling genre # 230 - Subgenre: dream pop

Pulling genre # 267 - Subgenre: alandsk musik

Pulling genre # 269 - Subgenre: ukrainian ccm

Pulling genre # 244 - Subgenre: orgcore

Pulling genre # 252 - Subgenre: ukrainian viral pop

Pulling genre # 236 - Subgenre: musica eletronica gospel

Pulling genre # 289 - Subgenre: zambian gospel

Pulling genre # 271 - Subgenre: indie quebecois

Pulling genre # 280 - Subgenre: orebro indie

Pulling genre # 249 - Subgenre: sertanejo pop

Pulling genre # 288 - Subgenre: lincoln ne indie

Pulling genre # 256 - Subgenre: dub reggae

Pulling genre # 284 - Subgenre: moderne ludovky

Pulling genre # 264 - Subgenre: ambient guitar

Pulling genre # 290 - Subgenre: classic psychedelic rock

Pulling genre # 272 - Subgenre: soul jazz

Pulling genre # 281 - Subgenre: dutch americana

Pulling genre # 250 - Subgenre: neon pop punk

Pulling genre # 289 - Subgenre: early french punk

Pulling genre # 265 - Subgenre: peruvian hip hop

Pulling genre # 285 - Subgenre: detske pesnicky

Pulling genre # 257 - Subgenre: soviet synthpop

Pulling genre # 273 - Subgenre: electro swing

Pulling genre # 291 - Subgenre: yorkshire folk

Pulling genre # 282 - Subgenre: south african jazz

Pulling genre # 290 - Subgenre: bikutsi

Pulling genre # 251 - Subgenre: gru

Pulling genre # 301 - Subgenre: southern soul blues

Pulling genre # 277 - Subgenre: polish black metal

Pulling genre # 311 - Subgenre: rock pernambucano

Pulling genre # 269 - Subgenre: australian rock

Pulling genre # 312 - Subgenre: poetry

Pulling genre # 292 - Subgenre: swedish drill

Pulling genre # 286 - Subgenre: deutschrock

Pulling genre # 307 - Subgenre: lancashire indie

Pulling genre # 278 - Subgenre: gothenburg hip hop

Pulling genre # 302 - Subgenre: apostolic worship

Pulling genre # 312 - Subgenre: welsh choir

Pulling genre # 293 - Subgenre: cyberpunk

Pulling genre # 313 - Subgenre: protest folk

Pulling genre # 308 - Subgenre: french classical piano

Pulling genre # 270 - Subgenre: texas country

Pulling genre # 313 - Subgenre: indie nica

Pulling genre # 303 - Subgenre: galante era

Pulling genre # 279 - Subgenre: puerto rican folk

Pulling genre # 287 - Subgenre: lagu timur

Pulling genre # 294 - Subgenre: british indie rock

Pulling genre # 314 - Subgenre: anthe

Pulling genre # 312 - Subgenre: rap maroc

Pulling genre # 334 - Subgenre: downtempo fusion

Pulling genre # 323 - Subgenre: spacewave

Pulling genre # 287 - Subgenre: baroque pop

Pulling genre # 329 - Subgenre: french folk

Pulling genre # 307 - Subgenre: egyptian alternative

Pulling genre # 335 - Subgenre: kosovan folk

Pulling genre # 300 - Subgenre: progressive alternative

Pulling genre # 335 - Subgenre: ugandan gospel

Pulling genre # 313 - Subgenre: haryanvi pop

Pulling genre # 324 - Subgenre: dublin indie

Pulling genre # 288 - Subgenre: girl group

Pulling genre # 330 - Subgenre: new wave of speed metal

Pulling genre # 308 - Subgenre: ukrainian indie

Pulling genre # 336 - Subgenre: czsk black metal

Pulling genre # 301 - Subgenre: neofolk

Pulling genre # 336 - Subgenre: australian classical piano

Pulling genre # 314 - Subgenre: livetronica

Pulling genre # 289 - Subgenre: anime rock

Pulling genre # 331 - Subgenre: rock tico

Pulling genre # 337 - Subgenre: prank

Pulli

Pulling genre # 321 - Subgenre: utah indie

Pulling genre # 357 - Subgenre: cuatro puertorriqueno

Pulling genre # 345 - Subgenre: minimal wave

Pulling genre # 357 - Subgenre: garage rock mexicano

Pulling genre # 307 - Subgenre: canadian rock

Pulling genre # 334 - Subgenre: vegas indie

Pulling genre # 351 - Subgenre: kenyan drill

Pulling genre # 328 - Subgenre: russian rave

Pulling genre # 322 - Subgenre: bouzouki

Pulling genre # 358 - Subgenre: german choir

Pulling genre # 346 - Subgenre: uk bass

Pulling genre # 358 - Subgenre: british classical piano

Pulling genre # 308 - Subgenre: rockabilly

Pulling genre # 335 - Subgenre: chicha

Pulling genre # 352 - Subgenre: deep east coast hip hop

Pulling genre # 329 - Subgenre: shanty

Pulling genre # 323 - Subgenre: gyerekdalok

Pulling genre # 359 - Subgenre: soda pop

Pulling genre # 347 - Subgenre: salsa international

Pulling genre # 359 - Subgenre: irish fiddle

Pulling genre # 336 - Subgenre: asian american hip hop

Pulling 

Pulling genre # 349 - Subgenre: shonen

Pulling genre # 327 - Subgenre: pixie

Pulling genre # 355 - Subgenre: forro de favela

Pulling genre # 373 - Subgenre: latvian indie

Pulling genre # 367 - Subgenre: indie siciliano

Pulling genre # 380 - Subgenre: bulgarian metal

Pulling genre # 379 - Subgenre: scottish drill

Pulling genre # 344 - Subgenre: pennsylvania hardcore

Pulling genre # 328 - Subgenre: vapor trap

Pulling genre # 356 - Subgenre: traphall

Pulling genre # 350 - Subgenre: classic tollywood

Pulling genre # 368 - Subgenre: deep dnb

Pulling genre # 374 - Subgenre: japanese girl punk

Pulling genre # 380 - Subgenre: techno argentina

Pulling genre # 345 - Subgenre: ghanaian gospel

Pulling genre # 381 - Subgenre: bury st edmunds indie

Pulling genre # 357 - Subgenre: organic electronic

Pulling genre # 329 - Subgenre: hyperpop

Pulling genre # 351 - Subgenre: birmingham grime

Pulling genre # 375 - Subgenre: house argentino

Pulling genre # 382 - Subgenre: dweilorkest

P

Pulling genre # 365 - Subgenre: korean superband

Pulling genre # 376 - Subgenre: pet calming

Pulling genre # 402 - Subgenre: rock abc paulista

Pulling genre # 348 - Subgenre: pop edm

Pulling genre # 388 - Subgenre: eugene indie

Pulling genre # 401 - Subgenre: macedonian electronic

Pulling genre # 395 - Subgenre: musica paranaense

Pulling genre # 371 - Subgenre: thai folk rock

Pulling genre # 366 - Subgenre: polish prog

Pulling genre # 377 - Subgenre: argentine alternative rock

Pulling genre # 403 - Subgenre: rap paraense

Pulling genre # 349 - Subgenre: tejano

Pulling genre # 389 - Subgenre: israeli indie

Pulling genre # 402 - Subgenre: alternative metalcore

Pulling genre # 396 - Subgenre: deep southern trap

Pulling genre # 372 - Subgenre: post-metal

Pulling genre # 367 - Subgenre: devon indie

Pulling genre # 378 - Subgenre: black metal

Pulling genre # 404 - Subgenre: folklore panameno

Pulling genre # 403 - Subgenre: arab metal

Pulling genre # 350 - Subgenre: german 

Pulling genre # 368 - Subgenre: korean r&b

Pulling genre # 416 - Subgenre: technical grindcore

Pulling genre # 424 - Subgenre: african experimental

Pulling genre # 409 - Subgenre: dutch indie rock

Pulling genre # 397 - Subgenre: rap cearense

Pulling genre # 392 - Subgenre: cumbia lagunera

Pulling genre # 387 - Subgenre: musica yucateca

Pulling genre # 423 - Subgenre: uk post-metal

Pulling genre # 369 - Subgenre: pop flamenco

Pulling genre # 417 - Subgenre: mbira

Pulling genre # 410 - Subgenre: progressive death metal

Pulling genre # 425 - Subgenre: brazilian surf rock

Pulling genre # 398 - Subgenre: virginia hip hop

Pulling genre # 393 - Subgenre: progressive deathcore

Pulling genre # 388 - Subgenre: sound art

Pulling genre # 424 - Subgenre: forest black metal

Pulling genre # 418 - Subgenre: vaudeville

Pulling genre # 370 - Subgenre: meme rap

Pulling genre # 426 - Subgenre: folklore quebecois

Pulling genre # 411 - Subgenre: mgpjr

Pulling genre # 399 - Subgenre: oc r

Pulling genre # 430 - Subgenre: japanese rockabilly

Pulling genre # 413 - Subgenre: rock alternativo espanol

Pulling genre # 444 - Subgenre: pohadky

Pulling genre # 418 - Subgenre: polish classical

Pulling genre # 438 - Subgenre: italian post-punk

Pulling genre # 408 - Subgenre: birmingham indie

Pulling genre # 389 - Subgenre: spanish rock

Pulling genre # 447 - Subgenre: metal paraguayo

Pulling genre # 431 - Subgenre: schweizer rap

Pulling genre # 414 - Subgenre: japanese instrumental

Pulling genre # 445 - Subgenre: korean jazz

Pulling genre # 419 - Subgenre: sky room

Pulling genre # 439 - Subgenre: austrian black metal

Pulling genre # 390 - Subgenre: la pop

Pulling genre # 409 - Subgenre: uppsala indie

Pulling genre # 448 - Subgenre: pinoy traditional

Pulling genre # 415 - Subgenre: boston punk

Pulling genre # 446 - Subgenre: british brass band

Pulling genre # 432 - Subgenre: yaoi

Pulling genre # 440 - Subgenre: musique acadienne

Pulling genre # 420 - Subgenre: aus

Pulling genre # 466 - Subgenre: cypriot hip hop

Pulling genre # 409 - Subgenre: pop emo

Pulling genre # 452 - Subgenre: melodipop

Pulling genre # 460 - Subgenre: judaica

Pulling genre # 435 - Subgenre: smutny rap

Pulling genre # 469 - Subgenre: french orchestra

Pulling genre # 430 - Subgenre: german prog

Pulling genre # 467 - Subgenre: northwest china indie

Pulling genre # 440 - Subgenre: cumbia ranchera

Pulling genre # 410 - Subgenre: country pop

Pulling genre # 453 - Subgenre: j-core

Pulling genre # 461 - Subgenre: galician folk

Pulling genre # 436 - Subgenre: uk post-punk revival

Pulling genre # 470 - Subgenre: moorish traditional

Pulling genre # 431 - Subgenre: venda pop

Pulling genre # 468 - Subgenre: rez country

Pulling genre # 441 - Subgenre: canadian electropop

Pulling genre # 454 - Subgenre: drill and bass

Pulling genre # 411 - Subgenre: electro latino

Pulling genre # 471 - Subgenre: thai traditional

Pulling genre # 462 - Subgenre: brazilian tech house

Pul

Pulling genre # 481 - Subgenre: rautalanka

Pulling genre # 451 - Subgenre: montenegrin pop

Pulling genre # 429 - Subgenre: flamenco urbano

Pulling genre # 456 - Subgenre: eurobeat

Pulling genre # 491 - Subgenre: pansori

Pulling genre # 482 - Subgenre: blackened crust

Pulling genre # 452 - Subgenre: rap femenino mexicano

Pulling genre # 474 - Subgenre: arunachal indie

Pulling genre # 489 - Subgenre: war metal

Pulling genre # 461 - Subgenre: indian edm

Pulling genre # 430 - Subgenre: tollywood

Pulling genre # 457 - Subgenre: harmonica blues

Pulling genre # 483 - Subgenre: jewish a capella

Pulling genre # 453 - Subgenre: charango

Pulling genre # 492 - Subgenre: experimental poetry

Pulling genre # 490 - Subgenre: lastelaulud

Pulling genre # 462 - Subgenre: israeli rock

Pulling genre # 475 - Subgenre: new orleans soul

Pulling genre # 458 - Subgenre: finnish alternative rock

Pulling genre # 431 - Subgenre: video game music

Pulling genre # 454 - Subgenre: sepedi pop

Pulli

Pulling genre # 450 - Subgenre: cantopop

Pulling genre # 510 - Subgenre: afghan traditional

Pulling genre # 512 - Subgenre: turkmen hip hop

Pulling genre # 480 - Subgenre: vapor pop

Pulling genre # 478 - Subgenre: witch house

Pulling genre # 496 - Subgenre: korean underground rap

Pulling genre # 474 - Subgenre: austin rock

Pulling genre # 451 - Subgenre: chillhop

Pulling genre # 503 - Subgenre: atlantic canada hip hop

Pulling genre # 511 - Subgenre: swedish fiddle

Pulling genre # 481 - Subgenre: kazakh pop

Pulling genre # 513 - Subgenre: ethnomusicology

Pulling genre # 479 - Subgenre: louvor

Pulling genre # 475 - Subgenre: rock of gibraltar

Pulling genre # 497 - Subgenre: tone

Pulling genre # 452 - Subgenre: latin worship

Pulling genre # 482 - Subgenre: australian house

Pulling genre # 504 - Subgenre: jalsat

Pulling genre # 512 - Subgenre: romanian metal

Pulling genre # 514 - Subgenre: turkish hardcore

Pulling genre # 476 - Subgenre: icelandic hip hop

Pulling genre

Pulling genre # 523 - Subgenre: swedish post-hardcore

Pulling genre # 534 - Subgenre: danish contemporary classical

Pulling genre # 502 - Subgenre: focus beats

Pulling genre # 500 - Subgenre: surf music

Pulling genre # 530 - Subgenre: latvian folk

Pulling genre # 518 - Subgenre: russian trance

Pulling genre # 472 - Subgenre: canadian singer-songwriter

Pulling genre # 524 - Subgenre: southern china indie

Pulling genre # 496 - Subgenre: berlin school

Pulling genre # 503 - Subgenre: musica tradicional cubana

Pulling genre # 535 - Subgenre: inuit traditional

Pulling genre # 531 - Subgenre: indie poblano

Pulling genre # 501 - Subgenre: manchester indie

Pulling genre # 519 - Subgenre: carimbo

Pulling genre # 525 - Subgenre: musique touareg

Pulling genre # 473 - Subgenre: classic swedish pop

Pulling genre # 504 - Subgenre: afghan pop

Pulling genre # 497 - Subgenre: fallen angel

Pulling genre # 532 - Subgenre: oulu indie

Pulling genre # 536 - Subgenre: tibetan folk pop

Pull

Pulling genre # 492 - Subgenre: japanese vgm

Pulling genre # 545 - Subgenre: nyc metal

Pulling genre # 523 - Subgenre: russian electronic

Pulling genre # 556 - Subgenre: portuguese early music

Pulling genre # 517 - Subgenre: indie psych-pop

Pulling genre # 551 - Subgenre: jug band

Pulling genre # 493 - Subgenre: uk metalcore

Pulling genre # 539 - Subgenre: uk worship

Pulling genre # 520 - Subgenre: vocal harmony group

Pulling genre # 557 - Subgenre: suomisaundi

Pulling genre # 546 - Subgenre: psybreaks

Pulling genre # 552 - Subgenre: shanghai indie

Pulling genre # 518 - Subgenre: russian ccm

Pulling genre # 524 - Subgenre: south african rock

Pulling genre # 494 - Subgenre: jam band

Pulling genre # 540 - Subgenre: neru

Pulling genre # 521 - Subgenre: new comedy

Pulling genre # 547 - Subgenre: darbuka

Pulling genre # 558 - Subgenre: metal baiano

Pulling genre # 553 - Subgenre: lovecraftian metal

Pulling genre # 541 - Subgenre: zamba

Pulling genre # 522 - Subgenre: de

Pulling genre # 565 - Subgenre: traditional english folk

Pulling genre # 512 - Subgenre: edmonton indie

Pulling genre # 575 - Subgenre: canadian shoegaze

Pulling genre # 562 - Subgenre: kundiman

Pulling genre # 579 - Subgenre: austrian choir

Pulling genre # 540 - Subgenre: irish folk

Pulling genre # 513 - Subgenre: lo-fi jazzhop

Pulling genre # 576 - Subgenre: adelaide punk

Pulling genre # 538 - Subgenre: sound effects

Pulling genre # 580 - Subgenre: yiddish folk

Pulling genre # 542 - Subgenre: venezuelan indie

Pulling genre # 566 - Subgenre: nordic shoegaze

Pulling genre # 563 - Subgenre: granada indie

Pulling genre # 514 - Subgenre: etherpop

Pulling genre # 577 - Subgenre: turkish experimental

Pulling genre # 541 - Subgenre: celtic punk

Pulling genre # 543 - Subgenre: ethnotronica

Pulling genre # 581 - Subgenre: italian orchestra

Pulling genre # 567 - Subgenre: punk galego

Pulling genre # 564 - Subgenre: tipico

Pulling genre # 515 - Subgenre: pop rap brasileiro

P

Pulling genre # 582 - Subgenre: funana

Pulling genre # 601 - Subgenre: romanian classical piano

Pulling genre # 560 - Subgenre: viking metal

Pulling genre # 533 - Subgenre: taiwan pop

Pulling genre # 559 - Subgenre: nordic contemporary classical

Pulling genre # 599 - Subgenre: spanish post-rock

Pulling genre # 566 - Subgenre: slovak rock

Pulling genre # 589 - Subgenre: norwegian gospel

Pulling genre # 583 - Subgenre: melodic hard rock

Pulling genre # 561 - Subgenre: dub

Pulling genre # 534 - Subgenre: swamp rock

Pulling genre # 602 - Subgenre: czech contemporary classical

Pulling genre # 560 - Subgenre: bagpipe

Pulling genre # 590 - Subgenre: tribute

Pulling genre # 567 - Subgenre: mundart

Pulling genre # 600 - Subgenre: hungarian contemporary classical

Pulling genre # 535 - Subgenre: rap dominicano

Pulling genre # 562 - Subgenre: post-rock

Pulling genre # 603 - Subgenre: hawaiian punk

Pulling genre # 584 - Subgenre: merengue tipico

Pulling genre # 561 - Subgenre: f

Pulling genre # 587 - Subgenre: rock quebecois

Pulling genre # 580 - Subgenre: cosmic post-rock

Pulling genre # 604 - Subgenre: children's story

Pulling genre # 609 - Subgenre: sevilla indie

Pulling genre # 623 - Subgenre: kazakh traditional

Pulling genre # 620 - Subgenre: modern chamber music

Pulling genre # 582 - Subgenre: japanese vtuber

Pulling genre # 555 - Subgenre: musica chihuahuense

Pulling genre # 588 - Subgenre: turkish modern jazz

Pulling genre # 581 - Subgenre: jazz catala

Pulling genre # 621 - Subgenre: folk metal latinoamericano

Pulling genre # 610 - Subgenre: euroska

Pulling genre # 605 - Subgenre: deep techno

Pulling genre # 624 - Subgenre: baltic classical piano

Pulling genre # 556 - Subgenre: lo-fi product

Pulling genre # 583 - Subgenre: french romanticism

Pulling genre # 589 - Subgenre: malian blues

Pulling genre # 611 - Subgenre: dark psytrance

Pulling genre # 622 - Subgenre: melodic progressive metal

Pulling genre # 625 - Subgenre: necrogrind

P

Pulling genre # 624 - Subgenre: tibetan pop

Pulling genre # 604 - Subgenre: thai trap

Pulling genre # 643 - Subgenre: kompa chretien

Pulling genre # 645 - Subgenre: peruvian death metal

Pulling genre # 609 - Subgenre: british jazz

Pulling genre # 631 - Subgenre: old school nederhop

Pulling genre # 601 - Subgenre: rap metal espanol

Pulling genre # 575 - Subgenre: crunk

Pulling genre # 625 - Subgenre: rap inde

Pulling genre # 605 - Subgenre: deep turkish pop

Pulling genre # 644 - Subgenre: vintage norwegian pop

Pulling genre # 646 - Subgenre: ethereal gothic

Pulling genre # 632 - Subgenre: experimental psych

Pulling genre # 610 - Subgenre: musica alagoana

Pulling genre # 602 - Subgenre: deep idm

Pulling genre # 576 - Subgenre: axe

Pulling genre # 626 - Subgenre: bayerischer rap

Pulling genre # 645 - Subgenre: faroese folk

Pulling genre # 606 - Subgenre: punk urbano

Pulling genre # 611 - Subgenre: new italo disco

Pulling genre # 647 - Subgenre: taarab

Pulling genre # 

Pulling genre # 622 - Subgenre: hungarian underground rap

Pulling genre # 629 - Subgenre: turkce trap metal

Pulling genre # 653 - Subgenre: musica de intervencao

Pulling genre # 667 - Subgenre: cantonese traditional

Pulling genre # 666 - Subgenre: neo honky tonk

Pulling genre # 626 - Subgenre: indie jazz

Pulling genre # 646 - Subgenre: keroncong

Pulling genre # 623 - Subgenre: breton folk

Pulling genre # 654 - Subgenre: komedi

Pulling genre # 630 - Subgenre: disco polo

Pulling genre # 596 - Subgenre: old school hip hop

Pulling genre # 668 - Subgenre: boy soprano

Pulling genre # 667 - Subgenre: deep classic garage rock

Pulling genre # 627 - Subgenre: mexican pop punk

Pulling genre # 624 - Subgenre: neo-kraut

Pulling genre # 647 - Subgenre: symphonic death metal

Pulling genre # 631 - Subgenre: bangla pop

Pulling genre # 655 - Subgenre: jazz venezolano

Pulling genre # 668 - Subgenre: vintage swing

Pulling genre # 597 - Subgenre: uk alternative hip hop

Pulling genre # 6

Pulling genre # 616 - Subgenre: smooth jazz

Pulling genre # 647 - Subgenre: swedish hard rock

Pulling genre # 689 - Subgenre: musica mogiana

Pulling genre # 666 - Subgenre: indonesian indie pop

Pulling genre # 674 - Subgenre: deep discofox

Pulling genre # 688 - Subgenre: dutch underground hip hop

Pulling genre # 651 - Subgenre: fingerstyle

Pulling genre # 644 - Subgenre: hungarian classical performance

Pulling genre # 617 - Subgenre: jazz funk

Pulling genre # 667 - Subgenre: bleep techno

Pulling genre # 648 - Subgenre: washington indie

Pulling genre # 675 - Subgenre: thai instrumental

Pulling genre # 652 - Subgenre: memphis blues

Pulling genre # 645 - Subgenre: marimba de guatemala

Pulling genre # 689 - Subgenre: russian power metal

Pulling genre # 690 - Subgenre: draaiorgel

Pulling genre # 618 - Subgenre: trova

Pulling genre # 668 - Subgenre: bogor indie

Pulling genre # 676 - Subgenre: szanty

Pulling genre # 653 - Subgenre: experimental classical

Pulling genre # 64

Pulling genre # 709 - Subgenre: karen pop

Pulling genre # 710 - Subgenre: peruvian experimental

Pulling genre # 665 - Subgenre: music hall

Pulling genre # 668 - Subgenre: chihuahua indie

Pulling genre # 637 - Subgenre: albanian hip hop

Pulling genre # 697 - Subgenre: estonian indie

Pulling genre # 688 - Subgenre: danish post-punk

Pulling genre # 711 - Subgenre: vintage swoon

Pulling genre # 710 - Subgenre: jazz dominicano

Pulling genre # 672 - Subgenre: sinhala indie

Pulling genre # 666 - Subgenre: afrikaans hip hop

Pulling genre # 669 - Subgenre: christian metalcore

Pulling genre # 638 - Subgenre: nashville sound

Pulling genre # 698 - Subgenre: indian metal

Pulling genre # 689 - Subgenre: spanish folk rock

Pulling genre # 712 - Subgenre: wandelweiser

Pulling genre # 711 - Subgenre: grunge revival

Pulling genre # 673 - Subgenre: magyar alternative

Pulling genre # 667 - Subgenre: thall

Pulling genre # 639 - Subgenre: rap genovese

Pulling genre # 670 - Subgenre: melod

Pulling genre # 730 - Subgenre: russian black metal

Pulling genre # 688 - Subgenre: gospel r&b

Pulling genre # 732 - Subgenre: chinese opera

Pulling genre # 659 - Subgenre: chicano rap

Pulling genre # 719 - Subgenre: australian hardcore

Pulling genre # 694 - Subgenre: atlanta bass

Pulling genre # 709 - Subgenre: dutch prog

Pulling genre # 731 - Subgenre: classic ukrainian pop

Pulling genre # 733 - Subgenre: deep breakcore

Pulling genre # 660 - Subgenre: finnish hip hop

Pulling genre # 687 - Subgenre: southampton indie

Pulling genre # 689 - Subgenre: taiwan hip hop

Pulling genre # 720 - Subgenre: sean-nos singing

Pulling genre # 695 - Subgenre: futuristic swag

Pulling genre # 710 - Subgenre: musica istmena

Pulling genre # 661 - Subgenre: disco house

Pulling genre # 734 - Subgenre: papuan traditional

Pulling genre # 688 - Subgenre: dub punk

Pulling genre # 732 - Subgenre: pops orchestra

Pulling genre # 690 - Subgenre: christian trap

Pulling genre # 721 - Subgenre: sou

Pulling genre # 680 - Subgenre: new jersey rap

Pulling genre # 754 - Subgenre: himene tarava

Pulling genre # 707 - Subgenre: norwegian americana

Pulling genre # 730 - Subgenre: persian melodic rap

Pulling genre # 710 - Subgenre: chiptune

Pulling genre # 715 - Subgenre: straight edge

Pulling genre # 741 - Subgenre: canadian death metal

Pulling genre # 752 - Subgenre: chinese experimental

Pulling genre # 681 - Subgenre: covertronica

Pulling genre # 755 - Subgenre: yunnan traditional

Pulling genre # 708 - Subgenre: uk beatdown

Pulling genre # 731 - Subgenre: tampa indie

Pulling genre # 711 - Subgenre: afrikaans

Pulling genre # 742 - Subgenre: hampton roads indie

Pulling genre # 682 - Subgenre: british singer-songwriter

Pulling genre # 756 - Subgenre: classical piano quartet

Pulling genre # 716 - Subgenre: vgm remix

Pulling genre # 709 - Subgenre: czsk reggae

Pulling genre # 732 - Subgenre: musica ayacuchana

Pulling genre # 753 - Subgenre: hungarian classical piano

Pull

Pulling genre # 733 - Subgenre: canadian experimental

Pulling genre # 706 - Subgenre: small room

Pulling genre # 758 - Subgenre: chinese indie rock

Pulling genre # 741 - Subgenre: collage pop

Pulling genre # 737 - Subgenre: future rock

Pulling genre # 734 - Subgenre: rap galego

Pulling genre # 707 - Subgenre: philly indie

Pulling genre # 759 - Subgenre: cyber metal

Pulling genre # 738 - Subgenre: ethereal wave

Pulling genre # 742 - Subgenre: ontario indie

Pulling genre # 735 - Subgenre: fijian pop

Pulling genre # 708 - Subgenre: japanese emo

Pulling genre # 760 - Subgenre: modern funk

Pulling genre # 743 - Subgenre: russian chanson

Pulling genre # 739 - Subgenre: northern irish indie

Pulling genre # 736 - Subgenre: deep deep house

Pulling genre # 709 - Subgenre: gothic metal

Pulling genre # 761 - Subgenre: smooth soul

Pulling genre # 744 - Subgenre: kurdish hip hop

Pulling genre # 740 - Subgenre: spanish folk metal

Pulling genre # 710 - Subgenre: indonesian rock

Pu

In [5]:
import sys
import re
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd

class GenreProcessingError(Exception):
    def __init__(self, message, genre, *args, **kwargs):
        super(GenreProcessingError, self).__init__(message, *args, **kwargs)
        self.genre = genre

def fetch_genre_page(subgenre):
    try:
        genre_page_url = f"http://everynoise.com/engenremap-{subgenre}.html"
        response = requests.get(genre_page_url)
        return BeautifulSoup(response.text, "html.parser")
    except requests.RequestException as request_err:
        raise GenreProcessingError("Error fetching genre page.", subgenre) from request_err

def process_genre(genre_div):
    genre = re.sub("[:'+»&\s-]", '', genre_div.text)
    soup2 = fetch_genre_page(genre)

    spotify_link = soup2.find_all("a", text='playlist')
    playlist = spotify_link[0]['href'] if len(spotify_link) > 0 else None

    all_artist_divs = set(soup2.find_all("div", "genre scanme"))
    all_genres_related = set(soup2.find_all("div", "genre")) - all_artist_divs

    artist_weights = [
        artist['style'].split()[-1].replace('%', '') for artist in all_artist_divs
    ]
    artists = [
        artist.text.strip().replace("»", "") for artist in all_artist_divs
        if not artist.text.strip().replace("»", "").isspace()
    ]

    sim_weights = [
        weight['style'].split()[-1].replace('%', '')
        for weight in all_genres_related if 'nearby' in weight['id']
    ]
    opp_weights = [
        weight['style'].split()[-1].replace('%', '')
        for weight in all_genres_related if 'mirror' in weight['id']
    ]

    sim_genres = [
        other_genre.text.strip().replace("»", "")
        for other_genre in all_genres_related if 'nearby' in other_genre['id']
    ]
    opp_genres = [
        other_genre.text.strip().replace("»", "")
        for other_genre in all_genres_related if 'mirror' in other_genre['id']
    ]

    return {
        'genre': genre,
        'playlist': playlist,
        'artist_weights': artist_weights,
        'artists': artists,
        'sim_weights': sim_weights,
        'opp_weights': opp_weights,
        'sim_genres': sim_genres,
        'opp_genres': opp_genres,
    }
    
def process_genres_and_update_csv(filename):
    df = pd.read_csv(filename)

    for idx, row in df.iterrows():
        if pd.isnull(row['SPOTIFY_URL']):
            genre_div = BeautifulSoup(f'<div>{row["GENRE"]}</div>', "html.parser")
            try:
                result = process_genre(genre_div)
                print(f"Processed genre #{idx}: {result['genre']}")

                # Update the corresponding row in the DataFrame
                df.at[idx, 'SIM_GENRES'] = ', '.join(result['sim_genres'])
                df.at[idx, 'SIM_WEIGHTS'] = ', '.join(result['sim_weights'])
                df.at[idx, 'OPP_GENRES'] = ', '.join(result['opp_genres'])
                df.at[idx, 'OPP_WEIGHTS'] = ', '.join(result['opp_weights'])
                df.at[idx, 'REL_ARTISTS'] = ', '.join(result['artists'])
                df.at[idx, 'ARTIST_WEIGHTS'] = ', '.join(result['artist_weights'])
                df.at[idx, 'SPOTIFY_URL'] = result['playlist']
            except GenreProcessingError as gpe:
                print(f"Error processing genre #{idx} ({gpe.genre}): {gpe}")

    # Save the updated DataFrame back to the CSV file
    df.to_csv(filename, index=False)

filename = "all_genres1.csv"
process_genres_and_update_csv(filename)

response = requests.get("http://everynoise.com/engenremap.html")
soup = BeautifulSoup(response.text, "html.parser")
all_genre_divs = soup.find_all("div", "genre scanme")

results = []

for index, genre_div in enumerate(all_genre_divs):
    genre = re.sub("[:'+»&\s-]", '', genre_div.text)

    if genre in genres_to_process:
        try:
            result = process_genre(genre_div)
            results.append(result)
            print(f"Processed genre #{index}: {result['genre']}")
        except GenreProcessingError as gpe:
            print(f"Error processing genre #{index} ({gpe.genre}): {gpe}")


Processed genre #208: hardcorehiphop
Processed genre #211: popvenezolano
Processed genre #212: frenchpop
Processed genre #215: popreggaeton
Processed genre #216: classicsoul
Processed genre #219: dutchhouse
Processed genre #220: orchestralsoundtrack
Processed genre #222: popsoul
Processed genre #224: outlawcountry
Processed genre #225: braziliangospel
Processed genre #226: calirap
Processed genre #434: viralrap
Processed genre #438: indonesiansingersongwriter
Processed genre #441: ambientpop
Processed genre #442: ukalternativepop
Processed genre #443: kentuckyhiphop
Processed genre #448: brazilianreggae
Processed genre #449: czskhiphop
Processed genre #452: latinworship
Processed genre #454: jazzpop
Processed genre #458: pagodebaiano
Processed genre #465: mexicanindie
Processed genre #466: earlymusic
Processed genre #467: germandrill
Processed genre #468: frenchrock
Processed genre #469: elpasoindie
Processed genre #470: countryrap
Processed genre #477: drillespanol
Processed genre #47

Processed genre #1265: afghanpop
Processed genre #1266: russianpostpunk
Processed genre #1267: boombapbrasileiro
Processed genre #1269: chicagosoul
Processed genre #1271: canzonenapoletana
Processed genre #1275: ghanaianpop
Processed genre #1276: classiccantopop
Processed genre #1280: psychedelictrance
Processed genre #1287: swedishindierock
Processed genre #1290: russianpunk
Processed genre #1296: cancioninfantillatinoamericana
Processed genre #1297: riotgrrrl
Processed genre #1300: australianalternativepop
Processed genre #1302: celticpunk
Processed genre #1305: ukrainianrock
Processed genre #1307: mexicanclassicrock
Processed genre #1312: russiandrill
Processed genre #1315: sludgemetal
Processed genre #1318: neueneuedeutschewelle
Processed genre #1321: vikingmetal
Processed genre #1324: mariachicristiano
Processed genre #1325: canadianccm
Processed genre #1332: classicrussianpop
Processed genre #1334: anthememo
Processed genre #1336: folkrockitaliano
Processed genre #1337: kiwirock


Processed genre #2084: frenchdeathmetal
Processed genre #2087: polishpunk
Processed genre #2088: indianrock
Processed genre #2092: deepchill
Processed genre #2095: musicaperbambini
Processed genre #2097: jreggae
Processed genre #2098: balkanbeats
Processed genre #2100: bostonmetal
Processed genre #2101: ocindie
Processed genre #2102: worldfusion
Processed genre #2108: grooveroom
Processed genre #2109: acidhouse
Processed genre #2110: rockquebecois
Processed genre #2112: malianblues
Processed genre #2117: southafricantrap
Processed genre #2118: estonianhiphop
Processed genre #2119: fremantleindie
Processed genre #2120: neotraditionalbluegrass
Processed genre #2122: japanesedancepop
Processed genre #2124: alaskaindie
Processed genre #2126: salsachoke
Processed genre #2129: nzhiphop
Processed genre #2130: bongoflava
Processed genre #2131: deeptechhouse
Processed genre #2133: musicaalagoana
Processed genre #2134: newitalodisco
Processed genre #2135: vlaamsekinderliedje
Processed genre #213

Processed genre #2923: danishjazz
Processed genre #2924: rapundergroundcolombiano
Processed genre #2926: autonomousblackmetal
Processed genre #2929: kannadapop
Processed genre #2931: symphonicdeathmetal
Processed genre #2932: nordnorskmusikk
Processed genre #2938: classicsinhalapop
Processed genre #2946: cypriotpop
Processed genre #2947: vietnamesebolero
Processed genre #2948: mongolianalternative
Processed genre #2950: indonesianindiepop
Processed genre #2951: bleeptechno
Processed genre #2952: bogorindie
Processed genre #2954: wufam
Processed genre #2955: bassmusic
Processed genre #2959: munichelectronic
Processed genre #2964: victorianbritain
Processed genre #2967: horrorsynth
Processed genre #2968: indienapoletano
Processed genre #2970: coveracustico
Processed genre #2972: danishpostpunk
Processed genre #2973: spanishfolkrock
Processed genre #2976: spanishreggae
Processed genre #2977: atmosphericdnb
Processed genre #2978: okcindie
Processed genre #2979: windsoronindie
Processed gen

Processed genre #3743: armenianfolk
Processed genre #3745: singaporeanhiphop
Processed genre #3746: indiefolkitaliano
Processed genre #3747: musicalombarda
Processed genre #3748: halifaxindie
Processed genre #3750: mississippiindie
Processed genre #3751: frenchblackmetal
Processed genre #3753: norwegianamericana
Processed genre #3754: ukbeatdown
Processed genre #3755: czskreggae
Processed genre #3756: aarhusindie
Processed genre #3757: deepbrazilianpop
Processed genre #3758: segamauricien
Processed genre #3759: histoirepourenfants
Processed genre #3761: tucsonindie
Processed genre #3762: nyroots
Processed genre #3763: musicaevangelicainstrumental
Processed genre #3764: deepdubstep
Processed genre #3765: northdakotaindie
Processed genre #3766: swedishprog
Processed genre #3767: ryukyuongaku
Processed genre #3770: flemishfolk
Processed genre #3771: garagepsych
Processed genre #3773: britishindustrial
Processed genre #3774: bahamianpop
Processed genre #3775: spanishrockabilly
Processed ge

Processed genre #4560: swedishexperimental
Processed genre #4561: czechalternativerap
Processed genre #4563: czskelectropop
Processed genre #4565: deeprb
Processed genre #4566: dessinanime
Processed genre #4568: japanesedeathmetal
Processed genre #4801: hawaiianindie
Processed genre #4802: organicambient
Processed genre #4806: geininsong
Processed genre #4810: mexicanpostrock
Processed genre #4811: sufichant
Processed genre #4812: faroeserock
Processed genre #4814: progressivethrash
Processed genre #4818: southcarolinametal
Processed genre #4819: australianpostpunk
Processed genre #5058: warmetal
Processed genre #5060: hammereddulcimer
Processed genre #5062: derryindie
Processed genre #5064: hotjazz
Processed genre #5066: musicamadeirense
Processed genre #5068: norwegiantechno
Processed genre #5069: newhampshireindie
Processed genre #5071: italianblackmetal
Processed genre #5075: namibianpop
Processed genre #5077: danishclassical
Processed genre #5085: bernindie
Processed genre #5087: 

Processed genre #5894: accordionband
Processed genre #5895: metaluruguayo
Processed genre #5897: balticblackmetal
Processed genre #5899: fifeanddrum
Processed genre #5900: koreanhardcore
Processed genre #5901: alternativehardcore
Processed genre #5905: classicalguitarquartet
Processed genre #5906: marathibalgeet
Processed genre #5908: balticchoir
Processed genre #5909: chaoticblackmetal
Processed genre #5910: austrianchoir
Processed genre #5911: yiddishfolk
Processed genre #5914: chineseblackmetal
Processed genre #5916: mexicanblackmetal
Processed genre #5923: maliantraditional
Processed genre #5927: musicasinfonica
Processed genre #5928: montanametal
Processed genre #5930: estonianjazz
Processed genre #5933: czechcontemporaryclassical
Processed genre #5936: iranianmetal
Processed genre #5943: uyghurfolk
Processed genre #5945: cosmicupliftingtrance
Processed genre #5946: metalbalear
Processed genre #5948: polishexperimental
Processed genre #5949: industrialnoise
Processed genre #5952: 

In [1]:
import pandas as pd
# [subgenre_list, playlists, subgenre_list_sim, sim_weights, subgenre_list_opp, opp_weights, subgenre_list_artist, artist_weights]
#df = pd.DataFrame({"GENRE":music_data[0], 
                  # "SIM_GENRES":music_data[2],
                  # "SIM_WEIGHTS":music_data[3],
                  # "OPP_GENRES":music_data[4],
                  # "OPP_WEIGHTS":music_data[5],
                  # "REL_ARTISTS":music_data[6], 
                  # "ARTIST_WEIGHTS":music_data[7],
                  # "SPOTIFY_URL":music_data[1]})
#df.to_csv('all_genres1.csv')
df = pd.read_csv("all_genres1.csv")
df.dropna(inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6091 entries, 0 to 6092
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   GENRE           6091 non-null   object
 1   SIM_GENRES      6091 non-null   object
 2   SIM_WEIGHTS     6091 non-null   object
 3   OPP_GENRES      6091 non-null   object
 4   OPP_WEIGHTS     6091 non-null   object
 5   REL_ARTISTS     6091 non-null   object
 6   ARTIST_WEIGHTS  6091 non-null   object
 7   SPOTIFY_URL     6091 non-null   object
dtypes: object(8)
memory usage: 428.3+ KB


In [1]:
import spotipy
import spotipy.oauth2 as oauth2
from spotipy.oauth2 import SpotifyClientCredentials

sp_client_id = ''
sp_client_secret = ''
credentials = oauth2.SpotifyClientCredentials(client_id=sp_client_id, client_secret=sp_client_secret)
sp = spotipy.Spotify(client_credentials_manager=credentials)

features = ['popularity', 'danceability', 'energy', 'key', 'loudness',
                    'mode', 'speechiness', 'acousticness', 'instrumentalness',
                    'liveness', 'valence', 'tempo', 'time_signature']

In [5]:
import time
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import json

def get_track_ids_from_playlist(sp, playlist_id):

    results = sp.playlist_tracks(playlist_id)
    track_ids = [item['track']['id'] for item in results['items']]
    return track_ids


def get_track_ids_for_playlists(sp, playlists):
    track_ids_dict = {}

        # Load existing track IDs
    try:
        with open('track_ids.json', 'r') as f:
            track_ids_dict = json.load(f)
    except FileNotFoundError:
        pass
    # Define a function for processing a single playlist URL in a separate thread
    def process_playlist_url(playlist_url):
        nonlocal track_ids_dict

        if playlist_url is None:
            track_ids_dict[playlist_url] = None
        elif playlist_url not in track_ids_dict:
            playlist_id = playlist_url.split('playlist/')[1]

            success, retries = False, 0
            while not success and retries < 2:  # Attempt once and retry once
                try:
                    track_ids = get_track_ids_from_playlist(sp, playlist_id)
                    track_ids_dict[playlist_url] = track_ids
                    success = True
                except Exception as e:
                    sleep_time = 2 ** retries
                    time.sleep(sleep_time)
                    retries += 1

    # Process all playlist URLs concurrently
    with ThreadPoolExecutor() as executor:
        list(tqdm(executor.map(process_playlist_url, playlists), total=len(playlists), desc='Processing playlists'))

    # Save track IDs to a file
    with open('track_ids.json', 'w') as f:
        json.dump(track_ids_dict, f)

    return track_ids_dict

playlists = [url for url in df['SPOTIFY_URL'].tolist() if isinstance(url, str)]
playlist_track_ids = get_track_ids_for_playlists(sp, playlists)

Processing playlists: 100%|█████████████████| 6091/6091 [02:46<00:00, 36.65it/s]


In [8]:
from ratelimiter import RateLimiter
sp = spotipy.Spotify(client_credentials_manager=spotipy.oauth2.SpotifyClientCredentials(client_id = '', client_secret = ''))

def get_track_popularity(sp, track_ids):
    track_popularity = {}
    
    tracks = sp.tracks(track_ids)
    for track in tracks['tracks']:
        track_popularity[track['id']] = track['popularity']

    return track_popularity

def get_audio_features_for_tracks(sp, track_ids_dict):
    audio_features_dict = {}
        # Load existing audio features
    try:
        with open('audio_features.json', 'r') as f:
            audio_features_dict = json.load(f)
    except FileNotFoundError:
        pass
    
    rate_limiter = RateLimiter(max_calls=30, period=1)
    # Adjust the rate limit as needed

    def process_track_ids(track_ids):
        nonlocal audio_features_dict
        batch_size = 50
    
        for i in range(0, len(track_ids), batch_size):
            batch = track_ids[i:i + batch_size]
        
        # Divide the batch into two parts: unprocessed_batch and processed_batch
        unprocessed_batch = [tid for tid in batch if tid not in audio_features_dict]
        processed_batch = [tid for tid in batch if tid in audio_features_dict]
        
        if unprocessed_batch:
            success, retries = False, 0
            while not success and retries < 10:  # Attempt once and retry once
                with rate_limiter:
                    try:
                        features = sp.audio_features(unprocessed_batch)
                        popularity = get_track_popularity(sp, unprocessed_batch)
                        
                        for feature in features:
                            feature['popularity'] = popularity[feature['id']]
                            audio_features_dict[feature['id']] = feature
                        
                        success = True
                    except Exception as e:
                        sleep_time = 2 ** retries
                        time.sleep(sleep_time)
                        retries += 1
        
        # Update popularity for processed tracks
        if processed_batch:
            popularity = get_track_popularity(sp, processed_batch)
                
            for tid in processed_batch:
                audio_features_dict[tid]['popularity'] = popularity[tid]
                
    # Process all track IDs concurrently
    with ThreadPoolExecutor(max_workers=6) as executor:
        track_ids_list = [ids for ids in track_ids_dict.values() if ids is not None]
        all_track_ids = [tid for ids in track_ids_list for tid in ids]
        chunks = [all_track_ids[i:i+50] for i in range(0, len(all_track_ids), 50)]
        list(tqdm(executor.map(process_track_ids, chunks), total=len(chunks), desc='Processing audio features'))
    # Save audio features to a file
    with open('audio_features.json', 'w') as f:
        json.dump(audio_features_dict, f)

    return audio_features_dict

audio_features = get_audio_features_for_tracks(sp, playlist_track_ids)

Processing audio features:  26%|██▊        | 3069/11805 [05:25<15:25,  9.43it/s]

KeyboardInterrupt



In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm

# Create a dictionary with genres as keys and lists of track IDs as values
genre_track_ids = {}
for index, row in df.iterrows():
    genre = row['GENRE']
    playlist_url = row['SPOTIFY_URL']
    track_ids = playlist_track_ids.get(playlist_url, [])
    if track_ids is not None:
        if genre not in genre_track_ids:
            genre_track_ids[genre] = []
        genre_track_ids[genre].extend(track_ids)

# Calculate the average of each audio feature for the tracks in each genre
genre_audio_features_avg = {}
for genre, track_ids in tqdm(genre_track_ids.items(), desc='Processing genres'):
    features_list = [audio_features[tid] for tid in track_ids if tid in audio_features]
    if features_list:
        features_df = pd.DataFrame(features_list)
        features_mean = features_df.mean(numeric_only=True)
        genre_audio_features_avg[genre] = features_mean.to_dict()
    else:
        genre_audio_features_avg[genre] = None

In [19]:
genre_audio_features_avg

{'pop': {'danceability': 0.6620600000000002,
  'energy': 0.6511099999999999,
  'key': 5.12,
  'loudness': -5.726880000000001,
  'mode': 0.64,
  'speechiness': 0.07093500000000001,
  'acousticness': 0.1975984,
  'instrumentalness': 0.006247488700000001,
  'liveness': 0.17319500000000002,
  'valence': 0.512612,
  'tempo': 123.58468999999997,
  'duration_ms': 204923.68,
  'time_signature': 3.94},
 'rap': {'danceability': 0.7286400000000001,
  'energy': 0.63693,
  'key': 5.77,
  'loudness': -6.236810000000002,
  'mode': 0.56,
  'speechiness': 0.15241000000000002,
  'acousticness': 0.14459235000000004,
  'instrumentalness': 0.0012794415999999996,
  'liveness': 0.196954,
  'valence': 0.4643110000000001,
  'tempo': 114.62312999999999,
  'duration_ms': 223874.0,
  'time_signature': 3.96},
 'rock': {'danceability': 0.5032700000000001,
  'energy': 0.71774,
  'key': 4.68,
  'loudness': -7.786729999999999,
  'mode': 0.7,
  'speechiness': 0.058156,
  'acousticness': 0.12671333900000004,
  'instrume

In [15]:
import pandas as pd
import numpy as np

# (same code as before to create genre_track_ids and genre_audio_features_avg)

# Filter the original DataFrame based on genres with averaged out features
valid_genres = [genre for genre, features in genre_audio_features_avg.items() if features is not None]
filtered_df = df[df['GENRE'].isin(valid_genres)].copy()

# Add the average features to the filtered DataFrame
numeric_features = features_df.select_dtypes(include=np.number).columns
for feature_name in numeric_features:
    filtered_df[f'{feature_name}'] = filtered_df['GENRE'].apply(lambda genre: genre_audio_features_avg[genre][feature_name])

# Print the filtered DataFrame with average features
filtered_df.head()
filtered_df.to_csv("avg_genres.csv", index=False)
    

