In [113]:
from bs4 import BeautifulSoup
import requests as req

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import sys

import numpy as np
import pandas as pd

scope = "user-library-read"
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))
spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials())

# Get Chicago Rap Artists from [Every Noise](https://everynoise.com/)

In [22]:
def generate_beautifulSoup(url):
    Web = req.get(url)
    S = BeautifulSoup(Web.text, 'lxml')
    return(S)

def top_n_artists(S, n=10):
    
    # finding the div where all of the names are stored
    canvas = S.find('div', class_="canvas")
    
    artists = []
    for div in canvas:

        
        # the elements in canvas alternate between divs with information and blank tags, 
        # we only want to look at the ones with information
        if (len(div) > 1):

            # finding the font size
            # font size measures artist importance
            style = div.get('style')
            style_attr = style.split('; ')
            importance = '';
            for pair in style_attr:
                attr, val = pair.split(': ')
                if (attr == 'font-size'):
                    importance = val
                    
            # finding the artist name 
            onclick = div.get('onclick')
            artist_string = onclick.split(', ')[1]
            artist = artist_string.split('"')[1]
            
            # appending info
            artists.append((artist, importance))



    # sorting artist by "importance"
    artists = sorted(artists, key=lambda x: x[1], reverse=True)[0:n]
    top_names = [x[0] for x in artists]

    
    
    return(top_names)

In [25]:
# running both functions with chicagorap
S = generate_beautifulSoup('https://everynoise.com/engenremap-chicagorap.html')
top_chicago_artists = top_n_artists(S, n=10)
print(top_artists)

['Kanye West', 'Juice WRLD', 'Polo G', 'Lil Durk', 'King Von', 'Chief Keef', 'Jeremih', 'Chance the Rapper', 'G Herbo', 'Lupe Fiasco']


In [31]:
# search artist for 
artists = []

for name in top_chicago_artists:
    results = spotify.search(q='artist:' + name, type='artist')
    items = results['artists']['items']
    if len(items) > 0:
        artist = items[0]
        artists.append(artist)
        print((artist['name'], artist['uri'], artist['genres']))

('Kanye West', 'spotify:artist:5K4W6rqBFWDnAN6FQUkS6x', ['chicago rap', 'rap'])
('Juice WRLD', 'spotify:artist:4MCBfE4596Uoi2O4DtmEMz', ['chicago rap', 'melodic rap'])
('Polo G', 'spotify:artist:6AgTAQt8XS6jRWi4sX7w49', ['chicago rap'])
('Lil Durk', 'spotify:artist:3hcs9uc56yIGFCSy9leWe7', ['chicago drill', 'chicago rap', 'drill', 'hip hop', 'rap', 'trap'])
('King Von', 'spotify:artist:6QtgPSJPSzcnn7dPZ4VINp', ['chicago rap'])
('Chief Keef', 'spotify:artist:15iVAtD3s3FsQR4w1v6M0P', ['chicago bop', 'chicago drill', 'chicago rap', 'drill', 'hip hop', 'rap', 'southern hip hop', 'trap', 'underground hip hop'])
('Jeremih', 'spotify:artist:3KV3p5EY4AvKxOlhGHORLg', ['chicago rap', 'dance pop', 'hip hop', 'pop', 'pop rap', 'r&b', 'southern hip hop', 'trap', 'urban contemporary'])
('Chance the Rapper', 'spotify:artist:1anyVhU62p31KFi8MEzkbf', ['chicago rap', 'conscious hip hop', 'hip hop', 'pop rap', 'rap'])
('G Herbo', 'spotify:artist:5QdEbQJ3ylBnc3gsIASAT5', ['chicago drill', 'chicago rap', '

In [84]:
# get all albums from artist without duplicates

albums = []
albums_set = set()

for artist in artists:
    print(artist['name'], end="... ") # to track progress
    
    results = spotify.artist_albums(artist['uri'], album_type='album')
    
    # only add new albums
    for album in results['items']:
        all_artists = [album_artist['name'] for album_artist in album['artists']]
        album_and_artist = f"{album['name']} {' '.join(all_artists)}" # assume static artist order
        
        if album_and_artist not in albums_set: albums.append(album)
        albums_set.add(album_and_artist)
    
    # continue loading albums if there are more
    while results['next']:
        results = spotify.next(results)
        
        for album in results['items']:
            all_artists = [album_artist['name'] for album_artist in album['artists']]
            album_and_artist = f"{album['name']} {' '.join(all_artists)}"

            if album_and_artist not in albums_set: albums.append(album)
            albums_set.add(album_and_artist)
    
print("\n\nAll Albums")
for album in albums:
    print(album['name'])

Kanye West... Juice WRLD... Polo G... Lil Durk... King Von... Chief Keef... Jeremih... Chance the Rapper... G Herbo... Lupe Fiasco... 

All Albums
Donda (Deluxe)
Donda
JESUS IS KING
KIDS SEE GHOSTS
ye
The Life Of Pablo
Yeezus
Watch The Throne (Deluxe)
Watch The Throne
My Beautiful Dark Twisted Fantasy
808s & Heartbreak
Graduation
Late Orchestration
Late Registration
The College Dropout
Fighting Demons (Deluxe)
Goodbye & Good Riddance (Anniversary)
Legends Never Die
Death Race For Love (Bonus Track Version)
Goodbye & Good Riddance
Future & Juice WRLD Present... WRLD ON DRUGS
Hall of Fame 2.0
Hall of Fame
THE GOAT
Die A Legend
7220 (Reloaded)
7220
The Voice of the Heroes
The Voice (Deluxe)
The Voice
Just Cause Y'all Waited 2 (Deluxe)
Just Cause Y'all Waited 2
Family Over Everything
Love Songs 4 the Streets 2
Signed To The Streets 3
Just Cause Y'all Waited
Bloodas
Signed to the Streets 2.5
Love Songs for the Streets
They Forgot
Lil Durk 2X (Deluxe)
Lil Durk 2X
300 Days 300 Nights
Remember

In [111]:
# get all songs from albums
features = []
tracks = []
track_set = set()

for album in albums:
    results = spotify.album_tracks(album['uri'])
    tracks.extend(results['items'])
    
    # add features to list
    for info in results['items']:
        features.extend([artist['name'] for artist in info['artists']])
    
    # continue loading tracks if there are more
    while results['next']:
        results = spotify.next(results)
        tracks.extend(results['items'])
        
        for info in results['items']:
            features.extend([artist['name'] for artist in info['artists']])

features = set(features)
print('\nFeatures')
print(features)


features
{'Lil Reese', 'Key Glock', 'Alex Wiley', 'Freeway', 'YungLiV', 'Logic', 'Snoop Dogg', 'Raekwon', 'Sir Michael Rocks', 'YNW Melly', 'C3', 'Johnny May Cash', 'Boss Top', 'Gucci Mane', 'Crystal Torres', 'Sway', 'G Herbo', 'Pusha T', 'Buk', 'Lil Durk', 'Pooh Bear', 'Victoria Monét', 'MDMA', 'Peter CottonTale', 'Sonyae', 'Simon Sayz', 'Beyoncé', 'Common', 'Lil 40', 'YFN Lucci', 'Pop Smoke', 'Chance the Rapper', 'Soulja Boy', 'Migos', 'Booka600', 'UNKLE', 'Justin Bieber', 'MadeinTYO', 'Ludacris', 'Zona Man', 'Cash Out', 'Lil Yachty', '2 Chainz', 'Fabolous', 'Crystal "Røvél" Torres', 'whoiskeithjames', 'Francis and the Lights', 'Chris Brown', 'Noname', 'GLC', 'C. Mula', 'Ant Clemons', 'Jeremih', 'Glasses Malone', 'Southside', 'Otis Redding', 'Mozzy', 'Benji Glo', 'Bishop G', 'Swizz Beatz', 'Mike WiLL Made-It', 'Stefflon Don', 'DeJ Loaf', 'Troi', 'Sydny August', 'Megan Thee Stallion', 'Calboy', 'Moneybagg Yo', 'Rondo', 'Eryn Allen Kane', 'Young Thug', 'Bon Iver', 'Billy Blue', 'Trey 

In [112]:
print(len(set(features)))

304
