In [1]:
import pandas as pd
from bs4 import BeautifulSoup as bs
from urllib.parse import unquote
from dotenv import load_dotenv
import os 
from requests import post, get
import requests
import json
import base64

In [2]:
html_file = "get_albums.html"

In [3]:
with open(html_file, 'r', encoding="utf-8-sig") as f:
    soup = bs(f, 'html.parser')
    artist_album = []
    full_list = soup.find_all("button", class_="chartlist-play-button js-playlink-station desktop-playlink")
    scrobble_count = soup.find_all("span", class_="chartlist-count-bar-value")
    for data, count in zip(full_list, scrobble_count):
        artist = data["data-station-url"].split('/')[4]
        album = data["data-station-url"].split('/')[5]
        count_value = int(count.text.strip().split()[0].replace(',',''))
        artist_album.append((artist, album, count_value))


In [4]:
df = pd.DataFrame(artist_album, columns =['Artist', 'Album','Stream Count'])

In [5]:
df.to_csv('top50.csv')

In [20]:
load_dotenv()
client_id = os.getenv('clientId')
client_secret = os.getenv('clientSecret')

In [21]:
def get_token():
    auth_string = client_id + ":" + client_secret
    auth_bytes = auth_string.encode("utf-8")
    auth_base64 = str(base64.b64encode(auth_bytes),"utf-8")
    url = "https://accounts.spotify.com/api/token"
    headers = {"Authorization":"Basic "+auth_base64,
               "Content-Type":"application/x-www-form-urlencoded"}
    data = {"grant_type":"client_credentials"}
    result = post(url, headers=headers, data=data)
    json_result = json.loads(result.content)
    token = json_result["access_token"]
    return token

In [22]:
token = get_token()

In [23]:
def get_auth_header(token):
    return {"Authorization":"Bearer "+token}

In [24]:
def get_ids(df):
    artist_dict = {}
    token = get_token()  
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)  
    for index, row in df.iterrows():
        artist = row['Artist']
        formatted_artist = artist.replace('+', ' ')
        query = f"?q={artist}&type=artist&limit=1&offset=0"
        query_url = url + query
        result = requests.get(query_url, headers=headers)
        json_result = json.loads(result.content)
        if 'artists' in json_result and 'items' in json_result['artists']:
            artist_data = json_result['artists']['items'][0]  
            artist_id = artist_data['id']
            artist_name = artist_data['name']
            artist_dict[artist_name] = artist_id
    return artist_dict


In [25]:
ids = get_ids(df)

In [26]:
def get_album_data(ids):
    albums = {}
    token = get_token()
    headers = get_auth_header(token)
    for artist,id in ids.items():
        url = "https://api.spotify.com/v1/artists/{}/albums?limit=50".format(id)
        result = get(url, headers=headers)
        json_result = json.loads(result.content)
        albums[artist] = json_result
    return albums

In [27]:
album_data  = get_album_data(ids)

In [28]:
def filtering_albums(album_data):
    filtered_lst = []
    for artist,albums in album_data.items():
        for type in albums['items']:
            if type['album_group'] not in ['album', 'compilation','single']:
                pass
            else:
                filtered_lst.append(type)
    return filtered_lst

In [29]:
data_dict = filtering_albums(album_data)

In [30]:
def get_album_id(df): 
    album_dict = {}
    for data in data_dict:
        for index, row in df.iterrows():
            artist = row['Artist']
            album = unquote(row['Album'].replace('+', ' ')).replace('%2B', '+')
            if album.lower() == unquote(data['name'].replace('+', ' ')).replace('%2B', '+').lower():
                if artist in album_dict:
                    if data['id'] not in album_dict[artist]:
                        album_dict[artist].append(data['id'])
                else:
                    album_dict[artist] = [data['id']]
    return album_dict


In [31]:
albums_data = get_album_id(df)

In [32]:
def album_details(albums_data):
    token = get_token()
    headers = get_auth_header(token)  
    details = []
    for artist,album_id in albums_data.items():
        for id in album_id:
            url = "https://api.spotify.com/v1/albums/{}".format(id)
            result = requests.get(url, headers=headers)
            json_result = json.loads(result.content)
            details.append(json_result)
    return details

In [33]:
albums_all = album_details(albums_data)

In [34]:
albums_all

[{'album_type': 'album',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6kGMx9MqwnbKR2EYvZvvrG'},
    'href': 'https://api.spotify.com/v1/artists/6kGMx9MqwnbKR2EYvZvvrG',
    'id': '6kGMx9MqwnbKR2EYvZvvrG',
    'name': 'Alex Cameron',
    'type': 'artist',
    'uri': 'spotify:artist:6kGMx9MqwnbKR2EYvZvvrG'}],
  'available_markets': ['AR',
   'AU',
   'AT',
   'BE',
   'BO',
   'BR',
   'BG',
   'CA',
   'CL',
   'CO',
   'CR',
   'CY',
   'CZ',
   'DK',
   'DO',
   'DE',
   'EC',
   'EE',
   'SV',
   'FI',
   'FR',
   'GR',
   'GT',
   'HN',
   'HK',
   'HU',
   'IS',
   'IE',
   'IT',
   'LV',
   'LT',
   'LU',
   'MY',
   'MT',
   'MX',
   'NL',
   'NZ',
   'NI',
   'NO',
   'PA',
   'PY',
   'PE',
   'PH',
   'PL',
   'PT',
   'SG',
   'SK',
   'ES',
   'SE',
   'CH',
   'TW',
   'TR',
   'UY',
   'US',
   'GB',
   'AD',
   'LI',
   'MC',
   'ID',
   'JP',
   'TH',
   'VN',
   'RO',
   'IL',
   'ZA',
   'SA',
   'AE',
   'BH',
   'QA',
   'OM',
   'KW',

In [35]:
def get_album_deets(albums, df):
    deets = {}
    
    for album in albums:
        name = album['name']
        cover = album['images'][0]['url']
        # album_link = album['external_urls']['spotify']
        for album, streams in zip(df['Album'], df['Stream Count']):
             if album.lower().replace("%27", "'").split('+') == name.lower().split():

                album_data = {
                    # 'release_date': release,
                    # 'album_type': type,
                    # 'track_count': track_count,
                    'cover_url': cover,
                    # 'album_link': album_link,
                    # 'scrobbles': streams,
                    # 'tracks': []
                }

                # for track in tracks:
                #     track_name = track['name']
                #     track_duration = track['duration_ms']
                #     track_number = track['track_number']
                #     explicit = track['explicit']
                #     preview = track['preview_url']
                #     artist_lst = [artist['name'] for artist in track['artists']]
                    
                #     album_data['tracks'].append({
                #         'name': track_name,
                #         'duration_minutes': track_duration,
                #         'artist(s)': artist_lst,
                #         'track_number': track_number,
                #         'explicit': explicit,
                #         'preview': preview
                #     })

                deets[name] = album_data

    return deets


In [36]:
test = get_album_deets(albums_all,df)

In [37]:
def get_album_details(albums, df):
    details = {}
    
    for album in albums:
        name = album['name']
        release = album['release_date']
        type = album['album_type']
        tracks = album['tracks']['items']
        track_count = album['total_tracks']
        cover = album['images'][0]['url']
        album_link = album['external_urls']['spotify']
        for album, streams in zip(df['Album'], df['Stream Count']):
             if album.lower().replace("%27", "'").split('+') == name.lower().split():

                album_data = {
                    'release_date': release,
                    'album_type': type,
                    'track_count': track_count,
                    'cover_url': cover,
                    'album_link': album_link,
                    'scrobbles': streams,
                    'tracks': []
                }

                for track in tracks:
                    track_name = track['name']
                    track_duration = track['duration_ms']
                    track_number = track['track_number']
                    explicit = track['explicit']
                    preview = track['preview_url']
                    artist_lst = [artist['name'] for artist in track['artists']]
                    
                    album_data['tracks'].append({
                        'name': track_name,
                        'duration_minutes': track_duration,
                        'artist(s)': artist_lst,
                        'track_number': track_number,
                        'explicit': explicit,
                        'preview': preview
                    })

                details[name] = album_data

    return details


In [38]:
test

{'Miami Memory': {'cover_url': 'https://i.scdn.co/image/ab67616d0000b273497ffb57475bc98492b54bd4'},
 'Forced Witness': {'cover_url': 'https://i.scdn.co/image/ab67616d0000b273a9f9f499d82111f429413264'},
 "God's Favorite Customer": {'cover_url': 'https://i.scdn.co/image/ab67616d0000b273b775f9a1a6f232ffc7c56a00'},
 'Pure Comedy': {'cover_url': 'https://i.scdn.co/image/ab67616d0000b2732e74e5926952e461ea94f0e3'},
 'I Love You, Honeybear': {'cover_url': 'https://i.scdn.co/image/ab67616d0000b2731c483b9ccbabc97c8835fc94'},
 'Fear Fun': {'cover_url': 'https://i.scdn.co/image/ab67616d0000b273f81572e9a17c58d81b0f70e9'},
 'Live at Electric Lady': {'cover_url': 'https://i.scdn.co/image/ab67616d0000b27303bcb8e9958d3f4b8942ac0a'},
 'Off-Key in Hamburg': {'cover_url': 'https://i.scdn.co/image/ab67616d0000b273d6e3dd9579327376ecac4706'},
 'AURORA': {'cover_url': 'https://i.scdn.co/image/ab67616d0000b27303d0025f96528545ca9a921d'},
 "Speak Now (Taylor's Version)": {'cover_url': 'https://i.scdn.co/image/ab

In [39]:
album_details = get_album_details(albums_all,df)

In [40]:
album_details

{'Miami Memory': {'release_date': '2019-09-13',
  'album_type': 'album',
  'track_count': 10,
  'cover_url': 'https://i.scdn.co/image/ab67616d0000b273497ffb57475bc98492b54bd4',
  'album_link': 'https://open.spotify.com/album/48DbemfLnt57uu0Nd3EsKc',
  'scrobbles': 371,
  'tracks': [{'name': 'Stepdad',
    'duration_minutes': 255000,
    'artist(s)': ['Alex Cameron'],
    'track_number': 1,
    'explicit': False,
    'preview': 'https://p.scdn.co/mp3-preview/54ce58b7cb1acc38ac3543f207ec092f0e587300?cid=4dd98c178ff74d3a82dc42f537ad5736'},
   {'name': 'Miami Memory',
    'duration_minutes': 265813,
    'artist(s)': ['Alex Cameron'],
    'track_number': 2,
    'explicit': False,
    'preview': 'https://p.scdn.co/mp3-preview/3bc591f88e966569bedd73315a278fe1d3a82c2c?cid=4dd98c178ff74d3a82dc42f537ad5736'},
   {'name': 'Far From Born Again',
    'duration_minutes': 256239,
    'artist(s)': ['Alex Cameron'],
    'track_number': 3,
    'explicit': False,
    'preview': 'https://p.scdn.co/mp3-pre

In [41]:

data = []
for album_name, album_data in album_details.items():
    for track_data in album_data['tracks']:
        row = {
            'Album': album_name,
            'Release Date': album_data['release_date'],
            'Album Type': album_data['album_type'],
            'Track Count': album_data['track_count'],
            'Cover URL': album_data['cover_url'],
            'Album Link': album_data['album_link'],
            'Scrobbles': album_data['scrobbles'],
            'Track Name': track_data['name'],
            'Duration (minutes)': track_data['duration_minutes'],
            'Artists': ', '.join(track_data['artist(s)']),
            'Track Number': track_data['track_number'],
            'Explicit': track_data['explicit'],
            'Preview URL': track_data['preview']
        }
        data.append(row)

df_album_details = pd.DataFrame(data)

In [42]:
df_album_details.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Album</th>\n      <th>Release Date</th>\n      <th>Album Type</th>\n      <th>Track Count</th>\n      <th>Cover URL</th>\n      <th>Album Link</th>\n      <th>Scrobbles</th>\n      <th>Track Name</th>\n      <th>Duration (minutes)</th>\n      <th>Artists</th>\n      <th>Track Number</th>\n      <th>Explicit</th>\n      <th>Preview URL</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Miami Memory</td>\n      <td>2019-09-13</td>\n      <td>album</td>\n      <td>10</td>\n      <td>https://i.scdn.co/image/ab67616d0000b273497ffb57475bc98492b54bd4</td>\n      <td>https://open.spotify.com/album/48DbemfLnt57uu0Nd3EsKc</td>\n      <td>371</td>\n      <td>Stepdad</td>\n      <td>255000</td>\n      <td>Alex Cameron</td>\n      <td>1</td>\n      <td>False</td>\n      <td>https://p.scdn.co/mp3-preview/54ce58b7cb1acc38ac3543f207ec092f0e587300?cid=4dd98c178ff7

In [43]:
df_album_details 

Unnamed: 0,Album,Release Date,Album Type,Track Count,Cover URL,Album Link,Scrobbles,Track Name,Duration (minutes),Artists,Track Number,Explicit,Preview URL
0,Miami Memory,2019-09-13,album,10,https://i.scdn.co/image/ab67616d0000b273497ffb...,https://open.spotify.com/album/48DbemfLnt57uu0...,371,Stepdad,255000,Alex Cameron,1,False,https://p.scdn.co/mp3-preview/54ce58b7cb1acc38...
1,Miami Memory,2019-09-13,album,10,https://i.scdn.co/image/ab67616d0000b273497ffb...,https://open.spotify.com/album/48DbemfLnt57uu0...,371,Miami Memory,265813,Alex Cameron,2,False,https://p.scdn.co/mp3-preview/3bc591f88e966569...
2,Miami Memory,2019-09-13,album,10,https://i.scdn.co/image/ab67616d0000b273497ffb...,https://open.spotify.com/album/48DbemfLnt57uu0...,371,Far From Born Again,256239,Alex Cameron,3,False,https://p.scdn.co/mp3-preview/6b72900e8c014e1c...
3,Miami Memory,2019-09-13,album,10,https://i.scdn.co/image/ab67616d0000b273497ffb...,https://open.spotify.com/album/48DbemfLnt57uu0...,371,Gaslight,265400,Alex Cameron,4,False,https://p.scdn.co/mp3-preview/97baaf76df69b64b...
4,Miami Memory,2019-09-13,album,10,https://i.scdn.co/image/ab67616d0000b273497ffb...,https://open.spotify.com/album/48DbemfLnt57uu0...,371,Bad For The Boys,278733,Alex Cameron,5,False,https://p.scdn.co/mp3-preview/4b62e7a160b1cd48...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
774,Traumazine,2022-08-12,album,18,https://i.scdn.co/image/ab67616d0000b27394229b...,https://open.spotify.com/album/4YP0h2KGDb20eJu...,117,Star (feat. Lucky Daye),210160,"Megan Thee Stallion, Lucky Daye",14,True,https://p.scdn.co/mp3-preview/933a2d7787e28f85...
775,Traumazine,2022-08-12,album,18,https://i.scdn.co/image/ab67616d0000b27394229b...,https://open.spotify.com/album/4YP0h2KGDb20eJu...,117,Pressurelicious (feat. Future),173061,"Megan Thee Stallion, Future",15,True,https://p.scdn.co/mp3-preview/767cda2a4cb0ef42...
776,Traumazine,2022-08-12,album,18,https://i.scdn.co/image/ab67616d0000b27394229b...,https://open.spotify.com/album/4YP0h2KGDb20eJu...,117,Plan B,163764,Megan Thee Stallion,16,True,https://p.scdn.co/mp3-preview/63624771090f26a7...
777,Traumazine,2022-08-12,album,18,https://i.scdn.co/image/ab67616d0000b27394229b...,https://open.spotify.com/album/4YP0h2KGDb20eJu...,117,Southside Royalty Freestyle (feat. Sauce Walka...,243062,"Megan Thee Stallion, Sauce Walka, Lil' Keke, B...",17,True,https://p.scdn.co/mp3-preview/ec2fdd5e98311630...


In [44]:
def get(row):
    index = (row.name)+1  # Retrieve the index of the row
    album = row["Album"].replace("%27", "'").replace('+', " ")
    artist = row['Artist'].replace("%27", "'").replace('+', " ")
    print(f"<p>{index}: {album} by {artist}</p>")

df.apply(get, axis=1)


<p>1: Forced Witness by Alex Cameron</p>
<p>2: Off-Key In Hamburg by Father John Misty</p>
<p>3: I Love You, Honeybear by Father John Misty</p>
<p>4: Fear Fun by Father John Misty</p>
<p>5: AURORA by Daisy Jones & The Six</p>
<p>6: 1989 (Deluxe Edition) by Taylor Swift</p>
<p>7: Camp by Childish Gambino</p>
<p>8: Miami Memory by Alex Cameron</p>
<p>9: Paul's Boutique (20th Anniversary Remastered Edition) by Beastie Boys</p>
<p>10: God's Favorite Customer by Father John Misty</p>
<p>11: folklore by Taylor Swift</p>
<p>12: Midnights (3am Edition) by Taylor Swift</p>
<p>13: Lover by Taylor Swift</p>
<p>14: Norman Fucking Rockwell! by Lana Del Rey</p>
<p>15: Popstar: Never Stop Never Stopping by The Lonely Island</p>
<p>16: Ultraviolence (Deluxe) by Lana Del Rey</p>
<p>17: FOUR (Deluxe) by One Direction</p>
<p>18: The Loneliest Time by Carly Rae Jepsen</p>
<p>19: evermore by Taylor Swift</p>
<p>20: Fearless (Taylor's Version) by Taylor Swift</p>
<p>21: Pure Comedy by Father John Misty</p>


0     None
1     None
2     None
3     None
4     None
5     None
6     None
7     None
8     None
9     None
10    None
11    None
12    None
13    None
14    None
15    None
16    None
17    None
18    None
19    None
20    None
21    None
22    None
23    None
24    None
25    None
26    None
27    None
28    None
29    None
30    None
31    None
32    None
33    None
34    None
35    None
36    None
37    None
38    None
39    None
40    None
41    None
42    None
43    None
44    None
45    None
46    None
47    None
48    None
49    None
dtype: object

In [45]:
def get(df):
    album = df["Album"].replace("%27", "'").replace('+'," ")
    artist = df['Artist'].replace("%27", "'").replace('+'," ")
    print(album.index)
    print(f"<p>{album} by {artist}</p>")

df.apply(get,axis=1)

<built-in method index of str object at 0x149693eb0>
<p>Forced Witness by Alex Cameron</p>
<built-in method index of str object at 0x146008300>
<p>Off-Key In Hamburg by Father John Misty</p>
<built-in method index of str object at 0x1460080d0>
<p>I Love You, Honeybear by Father John Misty</p>
<built-in method index of str object at 0x145f64db0>
<p>Fear Fun by Father John Misty</p>
<built-in method index of str object at 0x14532a070>
<p>AURORA by Daisy Jones & The Six</p>
<built-in method index of str object at 0x146008300>
<p>1989 (Deluxe Edition) by Taylor Swift</p>
<built-in method index of str object at 0x14532a1b0>
<p>Camp by Childish Gambino</p>
<built-in method index of str object at 0x145f64db0>
<p>Miami Memory by Alex Cameron</p>
<built-in method index of str object at 0x1487ba2c0>
<p>Paul's Boutique (20th Anniversary Remastered Edition) by Beastie Boys</p>
<built-in method index of str object at 0x144cca510>
<p>God's Favorite Customer by Father John Misty</p>
<built-in method 

0     None
1     None
2     None
3     None
4     None
5     None
6     None
7     None
8     None
9     None
10    None
11    None
12    None
13    None
14    None
15    None
16    None
17    None
18    None
19    None
20    None
21    None
22    None
23    None
24    None
25    None
26    None
27    None
28    None
29    None
30    None
31    None
32    None
33    None
34    None
35    None
36    None
37    None
38    None
39    None
40    None
41    None
42    None
43    None
44    None
45    None
46    None
47    None
48    None
49    None
dtype: object

In [46]:
df.apply(get,axis=1)

<built-in method index of str object at 0x149692cf0>
<p>Forced Witness by Alex Cameron</p>
<built-in method index of str object at 0x145fc0b20>
<p>Off-Key In Hamburg by Father John Misty</p>
<built-in method index of str object at 0x145fc0b20>
<p>I Love You, Honeybear by Father John Misty</p>
<built-in method index of str object at 0x149692cf0>
<p>Fear Fun by Father John Misty</p>
<built-in method index of str object at 0x14532a070>
<p>AURORA by Daisy Jones & The Six</p>
<built-in method index of str object at 0x145fc0b20>
<p>1989 (Deluxe Edition) by Taylor Swift</p>
<built-in method index of str object at 0x14532a1b0>
<p>Camp by Childish Gambino</p>
<built-in method index of str object at 0x149692cf0>
<p>Miami Memory by Alex Cameron</p>
<built-in method index of str object at 0x14964fc20>
<p>Paul's Boutique (20th Anniversary Remastered Edition) by Beastie Boys</p>
<built-in method index of str object at 0x146008080>
<p>God's Favorite Customer by Father John Misty</p>
<built-in method 

0     None
1     None
2     None
3     None
4     None
5     None
6     None
7     None
8     None
9     None
10    None
11    None
12    None
13    None
14    None
15    None
16    None
17    None
18    None
19    None
20    None
21    None
22    None
23    None
24    None
25    None
26    None
27    None
28    None
29    None
30    None
31    None
32    None
33    None
34    None
35    None
36    None
37    None
38    None
39    None
40    None
41    None
42    None
43    None
44    None
45    None
46    None
47    None
48    None
49    None
dtype: object