In [None]:
#!pip install spotipy

import pandas as pd
import requests  
from bs4 import BeautifulSoup
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import warnings
from tqdm import tqdm
warnings.filterwarnings("ignore")

Сначала мы парсим альбомный чарт Billboard 200, чтобы получить списки альбомов в чарте и их исполнителей:

In [None]:
#source - 'https://www.billboard.com/charts/billboard-200/'

url = 'https://www.billboard.com/charts/billboard-200/'
res = requests.get(url)
tree = BeautifulSoup(res.content, 'html')

first_album = list(tree.find('a', {'class': "c-title__link lrv-a-unstyle-link"}))[0].strip()
albums = tree.find_all('h3', {'id':"title-of-a-story",
                              'class':"c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 lrv-u-font-size-18@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-330 u-max-width-230@tablet-only"})
albums = [list(album)[0].strip() for album in albums]
albums.insert(0, first_album)

first_artist = tree.find('p', {'class': "c-tagline a-font-primary-l a-font-primary-m@mobile-max lrv-u-color-black u-color-white@mobile-max lrv-u-margin-tb-00 lrv-u-padding-t-025 lrv-u-margin-r-150"})
first_artist = list(first_artist)[0]
artists = tree.find_all('span', {'class':"c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only"})
artists = [list(artist)[0].strip() for artist in artists]
artists.insert(0, first_artist)

Здесь мы авторизируемся через аккаунт в Spotify for Developers, чтобы получить доступ к API:

In [None]:
client_credentials_manager = SpotifyClientCredentials(client_id = client_id, client_secret = client_secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

Получаем уникальные коды (URI) каждого альбома:

In [None]:
albums_uri = []
for i in range(200):
  result = sp.search(q='artist:' + artists[i] + ', album:' + albums[i], type="album", limit=1)
  if len(result['albums']['items']) != 0:
    uri = result['albums']['items'][0]['uri'].split(':')[-1]
    albums_uri.append(uri)
len(albums_uri)

['6i7mF7whyRJuLJ4ogbH2wh',
 '151w1FgRZfnKZA9FEcg9Z3',
 '07w0rG5TETcyihsEIZR3qG',
 '6JlCkqkqobGirPsaleJpFr',
 '5aDEezKnOqyQo0qvTFhpkM',
 '5Uly85dJHHDfHQCsyUQ8gw',
 '1ep4OEfNOhvcY85STfEtKy',
 '7txGsnDSqVMoRl6RQ9XyZP',
 '1NAmidJlEaVgA3MpcPFYGq',
 '3RQQmkQEvNCY4prGKE6oc5',
 '446ROKmKfpEwkbi2SjELVX',
 '5Mc6uebYtKnRc5I7bjlNB6',
 '4kS7bSuU0Jm9LYMosFU2x5',
 '2fenSS68JI1h4Fo296JfGr',
 '5L5evi5tJPh8WaEFAQp7Tp',
 '7IouDrXPdAZwT1NzVV3vef',
 '2QJmrSgbdM35R67eoGQo4j',
 '2ODvWsOgouMbaA5xf0RkJe',
 '6uhEg4ASZYLIilZFCBFy1r',
 '6DEjYFkNZh67HP7R9PSZvv',
 '6kZ42qRrzov54LcAk4onW9',
 '0FYvMdfTfYJxnJnKs1wDb0',
 '4kI7ZZF6CgDGFTjZNFwXYG',
 '0HiZ8fNXwJOQcrf5iflrdz',
 '5r36AJ6VOJtp00oxSkBZ5h',
 '2WSeZEFsZKPJQ09abZCIaz',
 '2Xoteh7uEpea4TohMxjtaq',
 '3SpBlxme9WbeQdI9kx7KAV',
 '45F4Pmp3EJi3T6qYonm4Ml',
 '1GG6U2SSJPHO6XsFiBzxYv',
 '0BwWUstDMUbgq2NYONRqlu',
 '1IR2nlwX6YVTXXeu2qzoWO',
 '53Oa5Bu0UTU8o8qCTaHKoz',
 '7rq68qYz66mNdPfidhIEFa',
 '5qENHeCSlwWpEzb25peRmQ',
 '6PBZN8cbwkqm1ERj2BGXJ1',
 '4eLPsYPBmXABThSJ821sqY',
 

Далее распаковываем каждый альбом и формируем таблицу песен со следующими признаками:

*   **name** - название песни
*   **duration** - продолжительность песни
*   **explicit** - содержание матов в тексте песни (True - содержатся, False - нет)
*   **track number** - номер трека в альбоме
*   **countries** - количество стран, в которых можно прослушать эту песню
* **popularity** - популярность песни (относительная величина, основанная на количестве прослушиваний)
* **danceability** - переменная, которая показывает, насколько песня подходит для танцев (0 - наименее подходит, 1 - наиболее подходит)
* **energy** - мера интенсивности и активности песни (0 - наименее интенсивная, 1 - наиболее интенсивная)
* **key** - закодированная переменная тональности песни (например, 0 = C, 1 = C♯/D♭, 2 = D, и так далее)
* **loudness** - уровень громкости песни в децибелах
* **mode** - лад (1 - мажорный, 0 - минорный)
* **speechiness** - доля речетатива в песне
* **acousticness** - мера того, насколько песня акустическая
* **instrumentals** - доля инструментала (частей без вокала) в песне
* **liveness** - мера присутствия бэк-вокала в песне
* **valence** - мера музыкальной позитивности трека (0 - самая непозитивная, 1 - самая позитивная)
* **tempo** - темп песни в битах в минуту

In [None]:
df_tracks = pd.DataFrame()
for album_uri in tqdm(albums_uri):
  tracks = sp.album_tracks(album_uri)['items']
  for track in tracks:
    track_dict = {}
    track_uri = track["uri"]
    track_features = sp.audio_features(track_uri)[0]
    track_info = sp.track(track_uri)
    track_dict['name'] = track["name"]
    track_dict['duration'] = track["duration_ms"]
    track_dict['explicit'] = track["explicit"]
    track_dict['track number'] = track["track_number"]
    track_dict['countries'] = len(track['available_markets'])
    track_dict['popularity'] = track_info['popularity']
    if track_features != None:
      track_dict['danceability'] = track_features['danceability']
      track_dict['energy'] = track_features['energy']
      track_dict['key'] = track_features['key']
      track_dict['loudness'] = track_features['loudness']
      track_dict['mode'] = track_features['mode']
      track_dict['speechiness'] = track_features['speechiness']
      track_dict['acousticness'] = track_features['acousticness']
      track_dict['instrumentalness'] = track_features['instrumentalness']
      track_dict['liveness'] = track_features['liveness']
      track_dict['valence'] = track_features['valence']
      track_dict['tempo'] = track_features['tempo']
    df_tracks = df_tracks.append(track_dict, ignore_index=True)
df_tracks





100%|██████████| 179/179 [09:48<00:00,  3.29s/it]


Unnamed: 0,name,duration,explicit,track number,countries,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Born With A Beer In My Hand,188523,False,1,183,77,0.531,0.810,0.0,-5.475,1.0,0.0401,0.05020,0.000039,0.3570,0.608,148.026
1,Last Night,163854,True,2,183,89,0.492,0.675,6.0,-5.456,1.0,0.0389,0.46700,0.000000,0.1420,0.478,203.759
2,Everything I Love,187047,False,3,183,80,0.568,0.841,8.0,-3.802,1.0,0.0261,0.00514,0.000503,0.1470,0.668,103.983
3,Man Made A Bar (feat. Eric Church),191231,False,4,183,81,0.498,0.764,4.0,-5.006,1.0,0.0309,0.12300,0.000031,0.1190,0.489,147.984
4,Devil Don’t Know,205182,False,5,183,77,0.530,0.664,0.0,-5.648,1.0,0.0272,0.37700,0.000038,0.0937,0.323,125.002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3199,Unbroken,241426,False,8,184,27,0.595,0.792,1.0,-6.979,0.0,0.0286,0.08650,0.000000,0.0769,0.536,119.800
3200,Where The Green Grass Grows,201600,False,9,184,50,0.479,0.698,7.0,-7.914,1.0,0.0277,0.19800,0.000000,0.1260,0.673,159.699
3201,Not A Moment Too Soon,227866,False,10,184,22,0.707,0.363,7.0,-14.798,1.0,0.0288,0.03850,0.000014,0.2300,0.621,120.626
3202,Indian Outlaw - Dance Mix,260306,False,11,184,32,0.698,0.885,4.0,-6.593,1.0,0.0484,0.00110,0.003650,0.3120,0.781,110.179


Проверим, сколько есть пустых значений:

In [None]:
df_tracks.isnull().sum()

In [None]:
df_tracks[df_tracks['key'].isnull() == True]

Поскольку эта запись несет мало информации (почти все признаки пустые), мы ее удалим:

In [None]:
df_tracks.drop(863, axis = 0, inplace = True)

In [None]:
csv = df_tracks.to_csv('tracks_spotify.csv')

***ПАМЯТНИК РУЧНОМУ ТРУДУ*** (не пригодилось)👺

In [None]:
albums_uri1 = ['https://open.spotify.com/album/6i7mF7whyRJuLJ4ogbH2wh?si=tG3HhIYMRpCo2aYGE_NbPg',
              'https://open.spotify.com/album/1JBzeeCJ3axQMVkqWbKh0I?si=KfL-3TXZR5iLlOTFHHfbAg',
              'https://open.spotify.com/album/3lS1y25WAhcqJDATJK70Mq?si=2SU5ZPxgQtarv6aM13xgTw',
              'https://open.spotify.com/album/07w0rG5TETcyihsEIZR3qG?si=qfvXaDRaTy-3Fg21i_qMOg',
              'https://open.spotify.com/album/6JlCkqkqobGirPsaleJpFr?si=59OKyfKZT_yLBFr7ckpVKQ',
              'https://open.spotify.com/album/5aDEezKnOqyQo0qvTFhpkM?si=ZxPWflKZRKGVs1dDBprlYg',
              'https://open.spotify.com/album/5Uly85dJHHDfHQCsyUQ8gw?si=ZwdasyTYRJywlFxNQWQoAQ',
              'https://open.spotify.com/album/1ep4OEfNOhvcY85STfEtKy?si=bY7ejj6uS6CZCVKCVMGafQ',
              'https://open.spotify.com/album/7txGsnDSqVMoRl6RQ9XyZP?si=QdPPo-6AQiWzLxZ8Wuwykw',
              'https://open.spotify.com/album/1NAmidJlEaVgA3MpcPFYGq?si=ky35u3olQ8iOWM0a8G-Jfg',
              'https://open.spotify.com/album/3RQQmkQEvNCY4prGKE6oc5?si=gPEBTXGTQYWA4uDT1jWfWg',
              'https://open.spotify.com/album/6mS81Go5P5wQKoybVT68NW?si=zgu1zRW5QNijapLpjyXg9Q',
              'https://open.spotify.com/album/446ROKmKfpEwkbi2SjELVX?si=k4OIaSsXQBON9G2r01zQTQ',
              'https://open.spotify.com/album/5Mc6uebYtKnRc5I7bjlNB6?si=tlu9HAHURhqfItgS_9q34Q',
              'https://open.spotify.com/album/4kS7bSuU0Jm9LYMosFU2x5?si=U9olxBbwSgOHv5Qkhs8JAg',
              'https://open.spotify.com/album/1pzvBxYgT6OVwJLtHkrdQK?si=VfdK6AIdRS6so9c_gcwivA',
              'https://open.spotify.com/album/5MS3MvWHJ3lOZPLiMxzOU6?si=NFIWIE01SKGMYHnuTggUkg',
              'https://open.spotify.com/album/5L5evi5tJPh8WaEFAQp7Tp?si=5o7YCBwNTWm0yW3WBXfGoA',
              'https://open.spotify.com/album/7IouDrXPdAZwT1NzVV3vef?si=hezN5H1UQYaaoUEyNXx5nA',
              'https://open.spotify.com/album/1yGbNOtRIgdIiGHOEBaZWf?si=Kjzy8yLoTwGILCR5BlYV0w',
              'https://open.spotify.com/album/2ODvWsOgouMbaA5xf0RkJe?si=vK7KvKBXTK-xqQJ-nUfmcw',
              'https://open.spotify.com/album/6uhEg4ASZYLIilZFCBFy1r?si=mxq6z6MYR6K0rFQ_IXr11Q',
              'https://open.spotify.com/album/6DEjYFkNZh67HP7R9PSZvv?si=bC6LvUhJTRmIEi5NryaJMw',
              'https://open.spotify.com/album/6kZ42qRrzov54LcAk4onW9?si=VdltWM9TSlWIGobOHEI4Lw',
              'https://open.spotify.com/album/0FYvMdfTfYJxnJnKs1wDb0?si=jxs95sYlRxiQ5cOuYoX8yA',
              'https://open.spotify.com/album/4kI7ZZF6CgDGFTjZNFwXYG?si=n93lsFN1Rd-qGRPx7VwP3w',
              'https://open.spotify.com/album/0HiZ8fNXwJOQcrf5iflrdz?si=cMznQl02QraQaTR7ite98A',
              'https://open.spotify.com/album/5r36AJ6VOJtp00oxSkBZ5h?si=CHxg9pX-TlK3DlT4IS3n1A',
              'https://open.spotify.com/album/2WSeZEFsZKPJQ09abZCIaz?si=r6AdsHFRS7C8hof-m84WRA',
              'https://open.spotify.com/album/2Xoteh7uEpea4TohMxjtaq?si=xGi1meLRT6GF-fwOv_ZKHA',
              'https://open.spotify.com/album/3SpBlxme9WbeQdI9kx7KAV?si=gBAJU-MgSz25FrJ34l3KfA',
              'https://open.spotify.com/album/45F4Pmp3EJi3T6qYonm4Ml?si=IEDMY4DxTkC8mwB2cFFmtw',
              'https://open.spotify.com/album/45ba6QAtNrdv6Ke4MFOKk9?si=ZS1MpeJ9T7q9SfkQodFT5g',
              'https://open.spotify.com/album/1bt6q2SruMsBtcerNVtpZB?si=GDje4vy7SGCPiPtiZfB5ZA',
              'https://open.spotify.com/album/1IR2nlwX6YVTXXeu2qzoWO?si=Zw2RxVNZSYOPZFAt5jg4Bw',
              'https://open.spotify.com/album/1lhNch5NkOONvFhRPh8qaj?si=CBs1P3fqRGupTJRdSL69pA',
              'https://open.spotify.com/album/3VWrUk4vBznMYXGMPc7dRB?si=rFQ0kIZtTZKQ1I5xazOVhg',
              'https://open.spotify.com/album/5qENHeCSlwWpEzb25peRmQ?si=FxmFxuCYRVeI0IVD4exGNg',
              'https://open.spotify.com/album/748dZDqSZy6aPXKcI9H80u?si=9FVz57BbQdmmPgURGHtbLA',
              'https://open.spotify.com/album/4eLPsYPBmXABThSJ821sqY?si=OvBu7xWDRNOdL031Dwf20g',
              'https://open.spotify.com/album/0ValflNlWce21wm5PlwV60?si=RayQ5vZkTIi1_8JT4g4E1A',
              'https://open.spotify.com/album/6tE9Dnp2zInFij4jKssysL?si=ZzzvO2YmSECCD3lH9qw3QQ',
              'https://open.spotify.com/album/76290XdXVF9rPzGdNRWdCh?si=TYcMVCyhSTqcsdxtN02XRQ',
              'https://open.spotify.com/album/6zaisPwfcIAfdUGPj3mmGY?si=cs1qk9vNTO-5q7v48d_QJQ',
              'https://open.spotify.com/album/4g1ZRSobMefqF6nelkgibi?si=hhXz9sieRhmxJLSeJf6UqA',
              'https://open.spotify.com/album/0Nur9t7M0miEBoAnQ3rPv2?si=oSkbjxPJQqu2-oQaT_Nslg',
              'https://open.spotify.com/album/6UwjRSX9RQyNgJ3LwYhr9i?si=MEz9Dw4FTk21yWrBIHqxjg',
              'https://open.spotify.com/album/5FGZeAnAbmC0MJD597764o?si=B2E11lTURhmL13f6pukbxA',
              'https://open.spotify.com/album/4hDok0OAJd57SGIT8xuWJH?si=o8o2gQsfRj-H7FJk1XsUAA',
              'https://open.spotify.com/album/6s84u2TUpR3wdUv4NgKA2j?si=PjvE2biFTwilqrIP-yHSUw',
              'https://open.spotify.com/album/03GKkfyog7hnllilFS3jIV?si=uJ5aeh1ARsSYq7oH67p6Mg',
              'https://open.spotify.com/album/0S9D5NIDp2YXhYwlvuJzqx?si=UF8TBSiaS--Nm8ww28OTZA',
              'https://open.spotify.com/album/5zi7WsKlIiUXv09tbGLKsE?si=JN9ZmaBMST6keXVWG6csyA',
              'https://open.spotify.com/album/4Gfnly5CzMJQqkUFfoHaP3?si=LJ6rCI_USZOa5VCYmE1OCg',
              'https://open.spotify.com/album/2nkto6YNI4rUYTLqEwWJ3o?si=TZAyWSiaS2OtPvj9SIjjVw',
              'https://open.spotify.com/album/3aTuW4BtsyyyyQa9LKqj8n?si=2cfvNJrzStOpVU0e-nWjXw',
              'https://open.spotify.com/album/6Ar2o9KCqcyYF9J0aQP3au?si=7KmgG4LoRz-of-ecxO7KFA',
              'https://open.spotify.com/album/4UlGauD7ROb3YbVOFMgW5u?si=zulqj_2jQ9OhlEDtQbCL2g',
              'https://open.spotify.com/album/5OZ44LaqZbpP3m9B3oT8br?si=R7_U5vM2RjeRTmmW37CC3w',
              'https://open.spotify.com/album/6X1x82kppWZmDzlXXK3y3q?si=RhwTvXlxQYujQVrFJpD2Gw',
              'https://open.spotify.com/album/1WBZyULtlANBKed7Zf9cDP?si=wYDmnRQ2QByQ8Gmp5sG1dQ',
              'https://open.spotify.com/album/6tkjU4Umpo79wwkgPMV3nZ?si=7BHCtnu8SR6KU8_5e7vCcA',
              'https://open.spotify.com/album/0UMMIkurRUmkruZ3KGBLtG?si=yLwPdsIBQ0qUcCnOE3tXqA',
              'https://open.spotify.com/album/1ATL5GLyefJaxhQzSPVrLX?si=xXGWLnM4SoCfj6DCuAUiBQ',
              'https://open.spotify.com/album/4vXt6IpMcSnqonljffWlMI?si=eohPhJevRXWqWHh02751DA',
              'https://open.spotify.com/album/0CPD6yP11H86RohYYNBD76?si=bZvu0LddSmSNlhqUGQU7NQ',
              'https://open.spotify.com/album/6Oynwy3OABCiAqN9w9UqBN?si=Vme-j3efRSOtV3O5zMkLrQ',
              'https://open.spotify.com/album/53cTBnSWosJkQCDPHzumvg?si=F_his1c_T3GF1T0fYE9PQg',
              'https://open.spotify.com/album/2MDU46hcBn3u94s46BOSdv?si=88vEp42ER5eN6vPQijWp7w',
              'https://open.spotify.com/album/6izHNYgyKzGdRpYwtYFaaG?si=vjMN9yotQ-eEI6IXj2Sf3w',
              'https://open.spotify.com/album/7idxAlo8GuUKWV7RhOlptp?si=_MSnlPIlTSWljbxqMuu3LA',
              'https://open.spotify.com/album/7l49L64CFakYuaV4EL11sP?si=FhHjniTwTDqP_pY_OTDX5w',
              'https://open.spotify.com/album/40GMAhriYJRO1rsY4YdrZb?si=2f8nMOFKSsG0cFpxqeFcUQ',
              'https://open.spotify.com/album/1m9DVgV0kEBiVZ4ElhJEte?si=POQBU_9eSdK88a8GeWSzkA',
              'https://open.spotify.com/album/3mH6qwIy9crq0I9YQbOuDf?si=DQMA_y1YQ5qMuBXdGSA5ng',
              'https://open.spotify.com/album/7lxHnls3yQNl8B9bILmHj7?si=FL5cb5dtQRiNFcKbk66PKw',
              'https://open.spotify.com/album/6FJxoadUE4JNVwWHghBwnb?si=LX7_8mz3RTGIuaxdY2QPjg',
              'https://open.spotify.com/album/0PHMNbcgHfzSUALlfk7wGg?si=4DovWfqZS5WPD-3KjZwFMg',
              'https://open.spotify.com/album/6wTyGUWGCilBFZ837k5aRi?si=hyaq6F-fTWKCuGz04rnZcQ',
              'https://open.spotify.com/album/0xrTH9uvOL1BoFAOR61zTG?si=l9kmnjnLRcOetl0HQxFejw',
              'https://open.spotify.com/album/0vRN2oePynCSTspdY9NDsM?si=D15XcxQcSxOaPijXYFvlmA',
              'https://open.spotify.com/album/4jKeipwuUTjlx9USNYdhZn?si=snD7GxuESgyFQwgYdfjYJQ',
              'https://open.spotify.com/album/5HOHne1wzItQlIYmLXLYfZ?si=9w9_hdnoQ96DGO-8NjPMHw',
              'https://open.spotify.com/album/4XLPYMERZZaBzkJg0mkdvO?si=Fchh3SohRay_h8_FWUbuVw',
              'https://open.spotify.com/album/41GuZcammIkupMPKH2OJ6I?si=Vffh_YQQQsm64ZKYnDKKyw',
              'https://open.spotify.com/album/4SZko61aMnmgvNhfhgTuD3?si=HXyar9MfRAm8kI1su9xVew',
              'https://open.spotify.com/album/1Gtf2hZQlOGVER16uemmzR?si=wLDgltlXTo2gOUrCrKnSpA',
              'https://open.spotify.com/album/7fJJK56U9fHixgO0HQkhtI?si=EqXOuLKITfuvqhLZ1uDzOA',
              'https://open.spotify.com/album/1kCHru7uhxBUdzkm4gzRQc?si=K3HAlCtaTXqLgbKE5rqCpQ',
              'https://open.spotify.com/album/0IOYlkPLTNWcByZGPwf9gm?si=iuVx6QHXSMWogEM65aX5Cg',
              ]
