# Lab | Web Scraping Single Page (GNOD part 1) 

`Question` Scrape the current top 100 songs and their respective artists, and put the information into a pandas dataframe.

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
url = "https://www.popvortex.com/music/charts/top-100-songs.php"

In [3]:
response = requests.get(url)

In [4]:
response.status_code # 200 status code means OK!

200

In [5]:
response.content

b'<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><title>iTunes Top 100 Songs Chart 2024</title><meta name="viewport" content="width=device-width, initial-scale=1"><meta name="description" content="iTunes top 100 songs chart list. The most popular hit music and trending songs of 2024. Chart of today\'s current iTunes top 100 songs is updated daily."><meta property="og:title" content="iTunes Top 100 Songs Chart 2024"/><meta property="og:description" content="Chart of the top 100 songs on iTunes. Chart list of the top 100 song downloads of 2024 is updated daily."/><meta property="og:type" content="article"/><meta property="og:image" content="https://www.popvortex.com/images/logo-facebook.png"/><meta property="og:site_name" content="PopVortex"/><meta property="og:url" content="https://www.popvortex.com/music/charts/top-100-songs.php"/><meta property="fb:admins" content="100000239962942"/><meta property="fb:app_id" content="178831188827052"/><link rel="shortcut icon" href="/favi

In [6]:
# Parse html (create the 'soup')
soup = BeautifulSoup(response.content, "html.parser")

In [7]:
# soup.find_all('cite')

In [8]:
#with the following line I'm able to get all titles, yet not clean
#soup.select("cite.title")

In [9]:
titles = []
for t in soup.select("cite.title"):
    titles.append(t.get_text())

titles

['Not Like Us',
 'A Bar Song (Tipsy)',
 'Lose Control',
 'meet the grahams',
 'euphoria',
 'Where That Came From',
 'Beautiful Things',
 'MILLION DOLLAR BABY',
 'Too Sweet',
 'Family Matters',
 'A Bar Song (Tipsy)',
 'Espresso',
 'Not Today',
 'Hollywood Nights',
 'Miles On It',
 'Fortnight (feat. Post Malone)',
 'Like That',
 'Kendrick Lamar 6:16 In LA Drake Diss',
 'The Door',
 'Live Like You Were Dying',
 'Training Season',
 'Hollywood Nights',
 'Wondering Why',
 'Illusion',
 'Push Ups',
 'i like the way you kiss me',
 'Save Me (with Lainey Wilson)',
 'Wildflowers and Wild Horses (Single Version)',
 'Lil Boo Thang',
 'Stick Season',
 'Where the Wild Things Are',
 'Cowgirls (feat. ERNEST)',
 'Halfway To Hell',
 'TRUCK BED',
 'Need a Favor',
 'GOOD DAY',
 'I Can Do It With a Broken Heart',
 'Austin',
 'The Sound of Silence (CYRIL Remix)',
 'Praise (feat. Brandon Lake, Chris Brown & Chandler Moore)',
 'GO HOME W U',
 'Unwritten',
 'Dirt Cheap',
 'White Horse',
 'Austin',
 'Last Night',

In [10]:
#with the following line I'm able to get all artist, yet not clean
# soup.select("em.artist")

In [11]:
artist = []
for a in soup.select("em.artist"):
    artist.append(a.get_text())
    
artist

['Kendrick Lamar',
 'Shaboozey',
 'Teddy Swims',
 'Kendrick Lamar',
 'Kendrick Lamar',
 'Randy Travis',
 'Benson Boone',
 'Tommy Richman',
 'Hozier',
 'Drake',
 'Shaboozey',
 'Sabrina Carpenter',
 'BTS',
 'Black Oxygen',
 'Marshmello & Kane Brown',
 'Taylor Swift',
 'Future, Metro Boomin & Kendrick Lamar',
 'Gloc23',
 'Teddy Swims',
 'Tim McGraw',
 'Dua Lipa',
 'Black Oxygen',
 'The Red Clay Strays',
 'Dua Lipa',
 'Drake',
 'Artemas',
 'Jelly Roll',
 'Lainey Wilson',
 'Paul Russell',
 'Noah Kahan',
 'Luke Combs',
 'Morgan Wallen',
 'Jelly Roll',
 'HARDY',
 'Jelly Roll',
 'Forrest Frank',
 'Taylor Swift',
 'Dasha',
 'Disturbed',
 'Elevation Worship',
 'Keith Urban & Lainey Wilson',
 'Natasha Bedingfield',
 'Cody Johnson',
 'Chris Stapleton',
 'Dasha',
 'Morgan Wallen',
 'Beyoncé',
 'Jelly Roll',
 'Hank Williams, Jr.',
 'Luke Combs',
 'Jack Harlow',
 'Nate Smith',
 'Dua Lipa',
 'August Moon',
 'Ariana Grande',
 'Benson Boone',
 'Benson Boone',
 'Sukihana',
 'Zach Bryan',
 'Miley Cyrus',


In [12]:
df = pd.DataFrame({"title":titles,
                           "artist":artist
                          })
df

Unnamed: 0,title,artist
0,Not Like Us,Kendrick Lamar
1,A Bar Song (Tipsy),Shaboozey
2,Lose Control,Teddy Swims
3,meet the grahams,Kendrick Lamar
4,euphoria,Kendrick Lamar
...,...,...
95,MILLION DOLLAR BABY (VHS),Tommy Richman
96,Been Like This,Meghan Trainor & T-Pain
97,Angels (Don’t Always Have Wings),Thomas Rhett
98,Saturn,SZA


2) Create an input to search a song title from the dataframe created and propose a random suggestion the list, in case is in the list

In [17]:
#Creating function with conditional to propose most popular song
import random

def proposed_title(input_title, top_100):
    if input_title in top_100['title'].values:
        proposal = top_100[top_100['title'] != input_title].iloc[random.randint(0, len(top_100)-1)]
    else:
        proposal = 'There is no recommendation at this time'          
    return proposal

#Testing 
input_title = input("Enter a song title: ")  

reco = proposed_title(input_title, df)  
print(reco)


Enter a song title: aaaa
There is no recommendation at this time


# Lab | API wrappers - Create your collection of songs & audio features

In [18]:
#Authentication
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [19]:
secrets_file = open("secrets.txt","r")

In [20]:
string = secrets_file.read()

In [21]:
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        #print(line.split(':'))
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

In [22]:
#authentication with secrets
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret']))

### Collecting a playlist of songs

In [23]:
#Collecting a playlist of songs
playlist = sp.user_playlist_tracks("spotify", "5S8SJdl1BDc0ugpkEvFsIL")

In [24]:
#Printing playlist
playlist

{'href': 'https://api.spotify.com/v1/playlists/5S8SJdl1BDc0ugpkEvFsIL/tracks?offset=0&limit=100&additional_types=track',
 'items': [{'added_at': '2017-11-20T02:52:18Z',
   'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/twgeb7mzdcv4u8h191dxrvlpc'},
    'href': 'https://api.spotify.com/v1/users/twgeb7mzdcv4u8h191dxrvlpc',
    'id': 'twgeb7mzdcv4u8h191dxrvlpc',
    'type': 'user',
    'uri': 'spotify:user:twgeb7mzdcv4u8h191dxrvlpc'},
   'is_local': False,
   'primary_color': None,
   'track': {'preview_url': None,
    'available_markets': [],
    'explicit': False,
    'type': 'track',
    'episode': False,
    'track': True,
    'album': {'available_markets': [],
     'type': 'album',
     'album_type': 'single',
     'href': 'https://api.spotify.com/v1/albums/0HG8fMDhvN2tH5uPHFsyZP',
     'id': '0HG8fMDhvN2tH5uPHFsyZP',
     'images': [{'height': 640,
       'url': 'https://i.scdn.co/image/ab67616d0000b2734322e9bd7d57d061d0e19e1f',
       'width': 640},
      {

In [25]:
# Looking at items:
playlist.keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [26]:
# Looking at total:
playlist['total']

10000

In [27]:
#Checks for understanding steps completed in mornings lesson
playlist["next"]

'https://api.spotify.com/v1/playlists/5S8SJdl1BDc0ugpkEvFsIL/tracks?offset=100&limit=100&additional_types=track'

In [28]:
#Checks for understanding steps completed in mornings lesson
# playlist['items'] contains the tracks on the playlist
playlist['items']

[{'added_at': '2017-11-20T02:52:18Z',
  'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/twgeb7mzdcv4u8h191dxrvlpc'},
   'href': 'https://api.spotify.com/v1/users/twgeb7mzdcv4u8h191dxrvlpc',
   'id': 'twgeb7mzdcv4u8h191dxrvlpc',
   'type': 'user',
   'uri': 'spotify:user:twgeb7mzdcv4u8h191dxrvlpc'},
  'is_local': False,
  'primary_color': None,
  'track': {'preview_url': None,
   'available_markets': [],
   'explicit': False,
   'type': 'track',
   'episode': False,
   'track': True,
   'album': {'available_markets': [],
    'type': 'album',
    'album_type': 'single',
    'href': 'https://api.spotify.com/v1/albums/0HG8fMDhvN2tH5uPHFsyZP',
    'id': '0HG8fMDhvN2tH5uPHFsyZP',
    'images': [{'height': 640,
      'url': 'https://i.scdn.co/image/ab67616d0000b2734322e9bd7d57d061d0e19e1f',
      'width': 640},
     {'height': 300,
      'url': 'https://i.scdn.co/image/ab67616d00001e024322e9bd7d57d061d0e19e1f',
      'width': 300},
     {'height': 64,
      'url': 'ht

In [29]:
#Checks for understanding steps completed in mornings lesson
#Shows total number of items in each page
len(playlist['items'])

100

In [30]:
#Defining function to collect all item details of the 10,000 songs
from random import randint
from time import sleep

def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        results = sp.next(results)
        tracks = tracks + results['items']
        sleep(randint(1,3000)/1000) # respectful nap
    return tracks

In [31]:
#Calling the function to collect the 10,000 songs
all_tracks = get_playlist_tracks("5S8SJdl1BDc0ugpkEvFsIL")
#Double checking we collected the 10,000 songs
len(all_tracks)

10000

In [32]:
#Checking what has we got inside the function (a list, with nested dictionaries)
all_tracks

[{'added_at': '2017-11-20T02:52:18Z',
  'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/twgeb7mzdcv4u8h191dxrvlpc'},
   'href': 'https://api.spotify.com/v1/users/twgeb7mzdcv4u8h191dxrvlpc',
   'id': 'twgeb7mzdcv4u8h191dxrvlpc',
   'type': 'user',
   'uri': 'spotify:user:twgeb7mzdcv4u8h191dxrvlpc'},
  'is_local': False,
  'primary_color': None,
  'track': {'preview_url': None,
   'available_markets': [],
   'explicit': False,
   'type': 'track',
   'episode': False,
   'track': True,
   'album': {'available_markets': [],
    'type': 'album',
    'album_type': 'single',
    'href': 'https://api.spotify.com/v1/albums/0HG8fMDhvN2tH5uPHFsyZP',
    'id': '0HG8fMDhvN2tH5uPHFsyZP',
    'images': [{'height': 640,
      'url': 'https://i.scdn.co/image/ab67616d0000b2734322e9bd7d57d061d0e19e1f',
      'width': 640},
     {'height': 300,
      'url': 'https://i.scdn.co/image/ab67616d00001e024322e9bd7d57d061d0e19e1f',
      'width': 300},
     {'height': 64,
      'url': 'ht

In [34]:
#Normalizing list to creates a data frame, opening key nested dictionaries, with items of songs we want to collect
import pandas as pd
from pandas import json_normalize
tracks = json_normalize(all_tracks)

In [37]:
#Displaying tracks
pd.set_option("display.max_columns", 0)
tracks

Unnamed: 0,added_at,is_local,primary_color,added_by.external_urls.spotify,added_by.href,added_by.id,added_by.type,added_by.uri,track.preview_url,track.available_markets,track.explicit,track.type,track.episode,track.track,track.album.available_markets,track.album.type,track.album.album_type,track.album.href,track.album.id,track.album.images,track.album.name,track.album.release_date,track.album.release_date_precision,track.album.uri,track.album.artists,track.album.external_urls.spotify,track.album.total_tracks,track.artists,track.disc_number,track.track_number,track.duration_ms,track.external_ids.isrc,track.external_urls.spotify,track.href,track.id,track.name,track.popularity,track.uri,track.is_local,video_thumbnail.url
0,2017-11-20T02:52:18Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,,[],False,track,False,True,[],album,single,https://api.spotify.com/v1/albums/0HG8fMDhvN2t...,0HG8fMDhvN2tH5uPHFsyZP,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",...Ready For It?,2017-09-03,day,spotify:album:0HG8fMDhvN2tH5uPHFsyZP,[{'external_urls': {'spotify': 'https://open.s...,https://open.spotify.com/album/0HG8fMDhvN2tH5u...,1.0,[{'external_urls': {'spotify': 'https://open.s...,1,1,208198,USCJY1750003,https://open.spotify.com/track/7zgqtptZvhf8GEm...,https://api.spotify.com/v1/tracks/7zgqtptZvhf8...,7zgqtptZvhf8GEmdsM2vp2,...Ready For It?,0,spotify:track:7zgqtptZvhf8GEmdsM2vp2,False,
1,2017-11-20T02:42:15Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",album,album,https://api.spotify.com/v1/albums/4w5Jvreahp3y...,4w5Jvreahp3yvLqc4vCr9I,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Life Changes,2017-09-08,day,spotify:album:4w5Jvreahp3yvLqc4vCr9I,[{'external_urls': {'spotify': 'https://open.s...,https://open.spotify.com/album/4w5Jvreahp3yvLq...,14.0,[{'external_urls': {'spotify': 'https://open.s...,1,8,190226,USLXJ1707376,https://open.spotify.com/track/4Vxu50qVrQcycjR...,https://api.spotify.com/v1/tracks/4Vxu50qVrQcy...,4Vxu50qVrQcycjRyJQaZLC,Life Changes,60,spotify:track:4Vxu50qVrQcycjRyJQaZLC,False,
2,2017-11-20T02:36:40Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,https://p.scdn.co/mp3-preview/3a76820d510fa5f8...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",album,album,https://api.spotify.com/v1/albums/4PgleR09JVnm...,4PgleR09JVnm3zY1fW3XBA,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",24K Magic,2016-11-17,day,spotify:album:4PgleR09JVnm3zY1fW3XBA,[{'external_urls': {'spotify': 'https://open.s...,https://open.spotify.com/album/4PgleR09JVnm3zY...,9.0,[{'external_urls': {'spotify': 'https://open.s...,1,1,225983,USAT21602944,https://open.spotify.com/track/6b8Be6ljOzmkOmF...,https://api.spotify.com/v1/tracks/6b8Be6ljOzmk...,6b8Be6ljOzmkOmFslEb23P,24K Magic,80,spotify:track:6b8Be6ljOzmkOmFslEb23P,False,
3,2017-12-06T01:26:36Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,https://p.scdn.co/mp3-preview/9b7635464dc2caea...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",album,album,https://api.spotify.com/v1/albums/3T4tUhGYeRNV...,3T4tUhGYeRNVUGevb0wThu,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",÷ (Deluxe),2017-03-03,day,spotify:album:3T4tUhGYeRNVUGevb0wThu,[{'external_urls': {'spotify': 'https://open.s...,https://open.spotify.com/album/3T4tUhGYeRNVUGe...,16.0,[{'external_urls': {'spotify': 'https://open.s...,1,6,170826,GBAHS1700026,https://open.spotify.com/track/0afhq8XCExXpqaz...,https://api.spotify.com/v1/tracks/0afhq8XCExXp...,0afhq8XCExXpqazXczTSve,Galway Girl,78,spotify:track:0afhq8XCExXpqazXczTSve,False,
4,2017-11-20T02:36:14Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,https://p.scdn.co/mp3-preview/d90f4e5f15d8ed41...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",album,album,https://api.spotify.com/v1/albums/1xn54DMo2qIq...,1xn54DMo2qIqBuMqHtUsFd,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",x (Deluxe Edition),2014-06-21,day,spotify:album:1xn54DMo2qIqBuMqHtUsFd,[{'external_urls': {'spotify': 'https://open.s...,https://open.spotify.com/album/1xn54DMo2qIqBuM...,16.0,[{'external_urls': {'spotify': 'https://open.s...,1,6,258986,GBAHS1400094,https://open.spotify.com/track/1HNkqx9Ahdgi1Ix...,https://api.spotify.com/v1/tracks/1HNkqx9Ahdgi...,1HNkqx9Ahdgi1Ixy2xkKkL,Photograph,83,spotify:track:1HNkqx9Ahdgi1Ixy2xkKkL,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2021-08-24T02:02:35Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,,[],False,track,False,True,[],album,album,https://api.spotify.com/v1/albums/6Gp6qSE1ywXC...,6Gp6qSE1ywXCLjal5NUWUE,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Goodbye Yellow Brick Road (40th Anniversary Ce...,1973-10-05,day,spotify:album:6Gp6qSE1ywXCLjal5NUWUE,[{'external_urls': {'spotify': 'https://open.s...,https://open.spotify.com/album/6Gp6qSE1ywXCLja...,53.0,[{'external_urls': {'spotify': 'https://open.s...,1,1,666571,GBUM71304953,https://open.spotify.com/track/4UFlPCB4THnQ9Tl...,https://api.spotify.com/v1/tracks/4UFlPCB4THnQ...,4UFlPCB4THnQ9TlPHqIQow,Funeral For A Friend / Love Lies Bleeding,0,spotify:track:4UFlPCB4THnQ9TlPHqIQow,False,
9996,2021-08-24T02:02:43Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",album,album,https://api.spotify.com/v1/albums/4X87hQ57jTYQ...,4X87hQ57jTYQTcYTaJWK5w,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Even In The Quietest Moments,1977-01-01,day,spotify:album:4X87hQ57jTYQTcYTaJWK5w,[{'external_urls': {'spotify': 'https://open.s...,https://open.spotify.com/album/4X87hQ57jTYQTcY...,7.0,[{'external_urls': {'spotify': 'https://open.s...,1,7,652560,USAM17774668,https://open.spotify.com/track/5pSSEkT0963muzz...,https://api.spotify.com/v1/tracks/5pSSEkT0963m...,5pSSEkT0963muzzIjsVkrs,Fool's Overture,51,spotify:track:5pSSEkT0963muzzIjsVkrs,False,
9997,2021-08-24T02:02:45Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,https://p.scdn.co/mp3-preview/33479c82f0288da7...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",album,album,https://api.spotify.com/v1/albums/0dZF93WHyOhT...,0dZF93WHyOhTWjz5EWM7yG,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Fragile (Deluxe Edition),1971-11-26,day,spotify:album:0dZF93WHyOhTWjz5EWM7yG,[{'external_urls': {'spotify': 'https://open.s...,https://open.spotify.com/album/0dZF93WHyOhTWjz...,11.0,[{'external_urls': {'spotify': 'https://open.s...,1,9,634440,USEE10251893,https://open.spotify.com/track/7gC6Rbllqf1yXNC...,https://api.spotify.com/v1/tracks/7gC6Rbllqf1y...,7gC6Rbllqf1yXNC02e5jz2,Heart of the Sunrise - 2003 Remaster,44,spotify:track:7gC6Rbllqf1yXNC02e5jz2,False,
9998,2021-08-24T02:02:47Z,False,,https://open.spotify.com/user/twgeb7mzdcv4u8h1...,https://api.spotify.com/v1/users/twgeb7mzdcv4u...,twgeb7mzdcv4u8h191dxrvlpc,user,spotify:user:twgeb7mzdcv4u8h191dxrvlpc,,[],False,track,False,True,[],album,album,https://api.spotify.com/v1/albums/2GVLsiEMDZhx...,2GVLsiEMDZhxOMATIPBK4d,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Wings Over America (Remastered),1976-12-10,day,spotify:album:2GVLsiEMDZhxOMATIPBK4d,[{'external_urls': {'spotify': 'https://open.s...,https://open.spotify.com/album/2GVLsiEMDZhxOMA...,28.0,[{'external_urls': {'spotify': 'https://open.s...,1,1,620746,GBCCS1200188,https://open.spotify.com/track/6Ff77WXC58MkhLE...,https://api.spotify.com/v1/tracks/6Ff77WXC58Mk...,6Ff77WXC58MkhLE5A1qgY1,Venus And Mars / Rock Show / Jet - Live / Rema...,0,spotify:track:6Ff77WXC58MkhLE5A1qgY1,False,


In [39]:
#From the data frame tracks, checking the data within the key 'track.artists'. This is where the info I want to collect is located
tracks['track.artists'][0]

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/06HL4z0CvFAxyc27GXpf02'},
  'href': 'https://api.spotify.com/v1/artists/06HL4z0CvFAxyc27GXpf02',
  'id': '06HL4z0CvFAxyc27GXpf02',
  'name': 'Taylor Swift',
  'type': 'artist',
  'uri': 'spotify:artist:06HL4z0CvFAxyc27GXpf02'}]

In [40]:
#This is to understand what the for loop will iterate on below (from row 0 to 10,000)
tracks.index

RangeIndex(start=0, stop=10000, step=1)

In [43]:
artists_df = pd.DataFrame(columns=['href', 'id', 'name', 'type', 'uri', 'external_urls.spotify','song_id', 'song_name', 'popularity' ])
for i in tracks.index:
    artists_for_song = json_normalize(tracks.iloc[i]['track.artists'])
    artists_for_song['song_id']    = tracks.iloc[i]['track.id']         # we want to keep song_id, it is the sae for all artists
    artists_for_song['song_name']  = tracks.iloc[i]['track.name']       # we want to keep song_name, it is the sae for all artists
    artists_for_song['popularity'] = tracks.iloc[i]['track.popularity'] # same for popularity   
    artists_df = pd.concat([artists_df, artists_for_song], axis=0)

In [44]:
#Printing end result
artists_df

Unnamed: 0,href,id,name,type,uri,external_urls.spotify,song_id,song_name,popularity
0,https://api.spotify.com/v1/artists/06HL4z0CvFA...,06HL4z0CvFAxyc27GXpf02,Taylor Swift,artist,spotify:artist:06HL4z0CvFAxyc27GXpf02,https://open.spotify.com/artist/06HL4z0CvFAxyc...,7zgqtptZvhf8GEmdsM2vp2,...Ready For It?,0
0,https://api.spotify.com/v1/artists/6x2LnllRG5u...,6x2LnllRG5uGarZMsD4iO8,Thomas Rhett,artist,spotify:artist:6x2LnllRG5uGarZMsD4iO8,https://open.spotify.com/artist/6x2LnllRG5uGar...,4Vxu50qVrQcycjRyJQaZLC,Life Changes,60
0,https://api.spotify.com/v1/artists/0du5cEVh5yT...,0du5cEVh5yTK9QJze8zA0C,Bruno Mars,artist,spotify:artist:0du5cEVh5yTK9QJze8zA0C,https://open.spotify.com/artist/0du5cEVh5yTK9Q...,6b8Be6ljOzmkOmFslEb23P,24K Magic,80
0,https://api.spotify.com/v1/artists/6eUKZXaKkcv...,6eUKZXaKkcviH0Ku9w2n3V,Ed Sheeran,artist,spotify:artist:6eUKZXaKkcviH0Ku9w2n3V,https://open.spotify.com/artist/6eUKZXaKkcviH0...,0afhq8XCExXpqazXczTSve,Galway Girl,78
0,https://api.spotify.com/v1/artists/6eUKZXaKkcv...,6eUKZXaKkcviH0Ku9w2n3V,Ed Sheeran,artist,spotify:artist:6eUKZXaKkcviH0Ku9w2n3V,https://open.spotify.com/artist/6eUKZXaKkcviH0...,1HNkqx9Ahdgi1Ixy2xkKkL,Photograph,83
...,...,...,...,...,...,...,...,...,...
0,https://api.spotify.com/v1/artists/3JsMj0DEzyW...,3JsMj0DEzyWc0VDlHuy9Bx,Supertramp,artist,spotify:artist:3JsMj0DEzyWc0VDlHuy9Bx,https://open.spotify.com/artist/3JsMj0DEzyWc0V...,5pSSEkT0963muzzIjsVkrs,Fool's Overture,51
0,https://api.spotify.com/v1/artists/7AC976RDJzL...,7AC976RDJzL2asmZuz7qil,Yes,artist,spotify:artist:7AC976RDJzL2asmZuz7qil,https://open.spotify.com/artist/7AC976RDJzL2as...,7gC6Rbllqf1yXNC02e5jz2,Heart of the Sunrise - 2003 Remaster,44
0,https://api.spotify.com/v1/artists/4STHEaNw4mP...,4STHEaNw4mPZ2tzheohgXB,Paul McCartney,artist,spotify:artist:4STHEaNw4mPZ2tzheohgXB,https://open.spotify.com/artist/4STHEaNw4mPZ2t...,6Ff77WXC58MkhLE5A1qgY1,Venus And Mars / Rock Show / Jet - Live / Rema...,0
1,https://api.spotify.com/v1/artists/3sFhA6G1N0g...,3sFhA6G1N0gG1pszb6kk1m,Wings,artist,spotify:artist:3sFhA6G1N0gG1pszb6kk1m,https://open.spotify.com/artist/3sFhA6G1N0gG1p...,6Ff77WXC58MkhLE5A1qgY1,Venus And Mars / Rock Show / Jet - Live / Rema...,0


In [45]:
#Creating a new data frame with only these cols: 'song_name', 'name', 'song_id', 'popularity'. Also resetting index
artists_final = artists_df[['song_name', 'name', 'song_id', 'popularity']].reset_index(drop=True)
artists_final

Unnamed: 0,song_name,name,song_id,popularity
0,...Ready For It?,Taylor Swift,7zgqtptZvhf8GEmdsM2vp2,0
1,Life Changes,Thomas Rhett,4Vxu50qVrQcycjRyJQaZLC,60
2,24K Magic,Bruno Mars,6b8Be6ljOzmkOmFslEb23P,80
3,Galway Girl,Ed Sheeran,0afhq8XCExXpqazXczTSve,78
4,Photograph,Ed Sheeran,1HNkqx9Ahdgi1Ixy2xkKkL,83
...,...,...,...,...
12173,Fool's Overture,Supertramp,5pSSEkT0963muzzIjsVkrs,51
12174,Heart of the Sunrise - 2003 Remaster,Yes,7gC6Rbllqf1yXNC02e5jz2,44
12175,Venus And Mars / Rock Show / Jet - Live / Rema...,Paul McCartney,6Ff77WXC58MkhLE5A1qgY1,0
12176,Venus And Mars / Rock Show / Jet - Live / Rema...,Wings,6Ff77WXC58MkhLE5A1qgY1,0


In [57]:
#Are there any rows with no 'song_id'? In fact, yes. Dropping them to avoid issues when collecting features, and when merging data frames
artists_final.isna().sum()

song_name      0
name           0
song_id       20
popularity     0
dtype: int64

In [58]:
artists_final = artists_final.dropna()
artists_final

Unnamed: 0,song_name,name,song_id,popularity
0,...Ready For It?,Taylor Swift,7zgqtptZvhf8GEmdsM2vp2,0
1,Life Changes,Thomas Rhett,4Vxu50qVrQcycjRyJQaZLC,60
2,24K Magic,Bruno Mars,6b8Be6ljOzmkOmFslEb23P,80
3,Galway Girl,Ed Sheeran,0afhq8XCExXpqazXczTSve,78
4,Photograph,Ed Sheeran,1HNkqx9Ahdgi1Ixy2xkKkL,83
...,...,...,...,...
12173,Fool's Overture,Supertramp,5pSSEkT0963muzzIjsVkrs,51
12174,Heart of the Sunrise - 2003 Remaster,Yes,7gC6Rbllqf1yXNC02e5jz2,44
12175,Venus And Mars / Rock Show / Jet - Live / Rema...,Paul McCartney,6Ff77WXC58MkhLE5A1qgY1,0
12176,Venus And Mars / Rock Show / Jet - Live / Rema...,Wings,6Ff77WXC58MkhLE5A1qgY1,0


### Collecting Audio features

In [53]:
sp.audio_features('7zgqtptZvhf8GEmdsM2vp2')

[{'danceability': 0.615,
  'energy': 0.779,
  'key': 2,
  'loudness': -6.454,
  'mode': 1,
  'speechiness': 0.135,
  'acousticness': 0.0665,
  'instrumentalness': 0,
  'liveness': 0.155,
  'valence': 0.453,
  'tempo': 160.0,
  'type': 'audio_features',
  'id': '7zgqtptZvhf8GEmdsM2vp2',
  'uri': 'spotify:track:7zgqtptZvhf8GEmdsM2vp2',
  'track_href': 'https://api.spotify.com/v1/tracks/7zgqtptZvhf8GEmdsM2vp2',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/7zgqtptZvhf8GEmdsM2vp2',
  'duration_ms': 208198,
  'time_signature': 4}]

In [59]:
# Creating chunks to call sp.audio_features with chunck of 100 song_id's
# Following first the approach of the lesson

chunks = [(i, i+100) for i in range(0, len(artists_final), 100)] #this is a tuple
chunks

[(0, 100),
 (100, 200),
 (200, 300),
 (300, 400),
 (400, 500),
 (500, 600),
 (600, 700),
 (700, 800),
 (800, 900),
 (900, 1000),
 (1000, 1100),
 (1100, 1200),
 (1200, 1300),
 (1300, 1400),
 (1400, 1500),
 (1500, 1600),
 (1600, 1700),
 (1700, 1800),
 (1800, 1900),
 (1900, 2000),
 (2000, 2100),
 (2100, 2200),
 (2200, 2300),
 (2300, 2400),
 (2400, 2500),
 (2500, 2600),
 (2600, 2700),
 (2700, 2800),
 (2800, 2900),
 (2900, 3000),
 (3000, 3100),
 (3100, 3200),
 (3200, 3300),
 (3300, 3400),
 (3400, 3500),
 (3500, 3600),
 (3600, 3700),
 (3700, 3800),
 (3800, 3900),
 (3900, 4000),
 (4000, 4100),
 (4100, 4200),
 (4200, 4300),
 (4300, 4400),
 (4400, 4500),
 (4500, 4600),
 (4600, 4700),
 (4700, 4800),
 (4800, 4900),
 (4900, 5000),
 (5000, 5100),
 (5100, 5200),
 (5200, 5300),
 (5300, 5400),
 (5400, 5500),
 (5500, 5600),
 (5600, 5700),
 (5700, 5800),
 (5800, 5900),
 (5900, 6000),
 (6000, 6100),
 (6100, 6200),
 (6200, 6300),
 (6300, 6400),
 (6400, 6500),
 (6500, 6600),
 (6600, 6700),
 (6700, 6800),
 

In [61]:
audio_features_list = []
for chunk in chunks:
    id_list100 = artists_final['song_id'][chunk[0]:chunk[1]]
    audio_features_list = audio_features_list + sp.audio_features(id_list100)
    sleep(randint(1,3000)/1000)
len(audio_features_list)

12158

In [62]:
audio_features_df = json_normalize(audio_features_list)

In [63]:
audio_features_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.615,0.779,2.0,-6.454,1.0,0.1350,0.06650,0.000000,0.1550,0.453,160.000,audio_features,7zgqtptZvhf8GEmdsM2vp2,spotify:track:7zgqtptZvhf8GEmdsM2vp2,https://api.spotify.com/v1/tracks/7zgqtptZvhf8...,https://api.spotify.com/v1/audio-analysis/7zgq...,208198.0,4.0
1,0.687,0.845,7.0,-4.370,1.0,0.0575,0.10000,0.000000,0.0452,0.809,87.972,audio_features,4Vxu50qVrQcycjRyJQaZLC,spotify:track:4Vxu50qVrQcycjRyJQaZLC,https://api.spotify.com/v1/tracks/4Vxu50qVrQcy...,https://api.spotify.com/v1/audio-analysis/4Vxu...,190227.0,4.0
2,0.818,0.803,1.0,-4.282,1.0,0.0797,0.03400,0.000000,0.1530,0.632,106.970,audio_features,6b8Be6ljOzmkOmFslEb23P,spotify:track:6b8Be6ljOzmkOmFslEb23P,https://api.spotify.com/v1/tracks/6b8Be6ljOzmk...,https://api.spotify.com/v1/audio-analysis/6b8B...,225983.0,4.0
3,0.624,0.876,9.0,-3.374,1.0,0.1000,0.07350,0.000000,0.3270,0.781,99.943,audio_features,0afhq8XCExXpqazXczTSve,spotify:track:0afhq8XCExXpqazXczTSve,https://api.spotify.com/v1/tracks/0afhq8XCExXp...,https://api.spotify.com/v1/audio-analysis/0afh...,170827.0,4.0
4,0.614,0.379,4.0,-10.480,1.0,0.0476,0.60700,0.000464,0.0986,0.201,107.989,audio_features,1HNkqx9Ahdgi1Ixy2xkKkL,spotify:track:1HNkqx9Ahdgi1Ixy2xkKkL,https://api.spotify.com/v1/tracks/1HNkqx9Ahdgi...,https://api.spotify.com/v1/audio-analysis/1HNk...,258987.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12153,0.406,0.306,3.0,-10.482,1.0,0.0372,0.31300,0.007900,0.0727,0.073,135.272,audio_features,5pSSEkT0963muzzIjsVkrs,spotify:track:5pSSEkT0963muzzIjsVkrs,https://api.spotify.com/v1/tracks/5pSSEkT0963m...,https://api.spotify.com/v1/audio-analysis/5pSS...,652560.0,4.0
12154,0.362,0.507,1.0,-11.229,1.0,0.0394,0.01740,0.216000,0.1130,0.456,146.641,audio_features,7gC6Rbllqf1yXNC02e5jz2,spotify:track:7gC6Rbllqf1yXNC02e5jz2,https://api.spotify.com/v1/tracks/7gC6Rbllqf1y...,https://api.spotify.com/v1/audio-analysis/7gC6...,634440.0,3.0
12155,0.331,0.733,2.0,-8.671,1.0,0.0468,0.08870,0.001740,0.9470,0.380,128.512,audio_features,6Ff77WXC58MkhLE5A1qgY1,spotify:track:6Ff77WXC58MkhLE5A1qgY1,https://api.spotify.com/v1/tracks/6Ff77WXC58Mk...,https://api.spotify.com/v1/audio-analysis/6Ff7...,620747.0,4.0
12156,0.331,0.733,2.0,-8.671,1.0,0.0468,0.08870,0.001740,0.9470,0.380,128.512,audio_features,6Ff77WXC58MkhLE5A1qgY1,spotify:track:6Ff77WXC58MkhLE5A1qgY1,https://api.spotify.com/v1/tracks/6Ff77WXC58Mk...,https://api.spotify.com/v1/audio-analysis/6Ff7...,620747.0,4.0


In [65]:
audio_features_df.drop_duplicates(inplace=True) # duplicates because some songs have more artists

In [66]:
df_artists_ft = pd.merge(left=artists_final,
                        right=audio_features_df,
                        how='inner',
                        left_on='song_id',
                        right_on='id')
df_artists_ft

Unnamed: 0,song_name,name,song_id,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,...Ready For It?,Taylor Swift,7zgqtptZvhf8GEmdsM2vp2,0,0.615,0.779,2.0,-6.454,1.0,0.1350,0.06650,0.000000,0.1550,0.453,160.000,audio_features,7zgqtptZvhf8GEmdsM2vp2,spotify:track:7zgqtptZvhf8GEmdsM2vp2,https://api.spotify.com/v1/tracks/7zgqtptZvhf8...,https://api.spotify.com/v1/audio-analysis/7zgq...,208198.0,4.0
1,Life Changes,Thomas Rhett,4Vxu50qVrQcycjRyJQaZLC,60,0.687,0.845,7.0,-4.370,1.0,0.0575,0.10000,0.000000,0.0452,0.809,87.972,audio_features,4Vxu50qVrQcycjRyJQaZLC,spotify:track:4Vxu50qVrQcycjRyJQaZLC,https://api.spotify.com/v1/tracks/4Vxu50qVrQcy...,https://api.spotify.com/v1/audio-analysis/4Vxu...,190227.0,4.0
2,24K Magic,Bruno Mars,6b8Be6ljOzmkOmFslEb23P,80,0.818,0.803,1.0,-4.282,1.0,0.0797,0.03400,0.000000,0.1530,0.632,106.970,audio_features,6b8Be6ljOzmkOmFslEb23P,spotify:track:6b8Be6ljOzmkOmFslEb23P,https://api.spotify.com/v1/tracks/6b8Be6ljOzmk...,https://api.spotify.com/v1/audio-analysis/6b8B...,225983.0,4.0
3,Galway Girl,Ed Sheeran,0afhq8XCExXpqazXczTSve,78,0.624,0.876,9.0,-3.374,1.0,0.1000,0.07350,0.000000,0.3270,0.781,99.943,audio_features,0afhq8XCExXpqazXczTSve,spotify:track:0afhq8XCExXpqazXczTSve,https://api.spotify.com/v1/tracks/0afhq8XCExXp...,https://api.spotify.com/v1/audio-analysis/0afh...,170827.0,4.0
4,Photograph,Ed Sheeran,1HNkqx9Ahdgi1Ixy2xkKkL,83,0.614,0.379,4.0,-10.480,1.0,0.0476,0.60700,0.000464,0.0986,0.201,107.989,audio_features,1HNkqx9Ahdgi1Ixy2xkKkL,spotify:track:1HNkqx9Ahdgi1Ixy2xkKkL,https://api.spotify.com/v1/tracks/1HNkqx9Ahdgi...,https://api.spotify.com/v1/audio-analysis/1HNk...,258987.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12131,Fool's Overture,Supertramp,5pSSEkT0963muzzIjsVkrs,51,0.406,0.306,3.0,-10.482,1.0,0.0372,0.31300,0.007900,0.0727,0.073,135.272,audio_features,5pSSEkT0963muzzIjsVkrs,spotify:track:5pSSEkT0963muzzIjsVkrs,https://api.spotify.com/v1/tracks/5pSSEkT0963m...,https://api.spotify.com/v1/audio-analysis/5pSS...,652560.0,4.0
12132,Heart of the Sunrise - 2003 Remaster,Yes,7gC6Rbllqf1yXNC02e5jz2,44,0.362,0.507,1.0,-11.229,1.0,0.0394,0.01740,0.216000,0.1130,0.456,146.641,audio_features,7gC6Rbllqf1yXNC02e5jz2,spotify:track:7gC6Rbllqf1yXNC02e5jz2,https://api.spotify.com/v1/tracks/7gC6Rbllqf1y...,https://api.spotify.com/v1/audio-analysis/7gC6...,634440.0,3.0
12133,Venus And Mars / Rock Show / Jet - Live / Rema...,Paul McCartney,6Ff77WXC58MkhLE5A1qgY1,0,0.331,0.733,2.0,-8.671,1.0,0.0468,0.08870,0.001740,0.9470,0.380,128.512,audio_features,6Ff77WXC58MkhLE5A1qgY1,spotify:track:6Ff77WXC58MkhLE5A1qgY1,https://api.spotify.com/v1/tracks/6Ff77WXC58Mk...,https://api.spotify.com/v1/audio-analysis/6Ff7...,620747.0,4.0
12134,Venus And Mars / Rock Show / Jet - Live / Rema...,Wings,6Ff77WXC58MkhLE5A1qgY1,0,0.331,0.733,2.0,-8.671,1.0,0.0468,0.08870,0.001740,0.9470,0.380,128.512,audio_features,6Ff77WXC58MkhLE5A1qgY1,spotify:track:6Ff77WXC58MkhLE5A1qgY1,https://api.spotify.com/v1/tracks/6Ff77WXC58Mk...,https://api.spotify.com/v1/audio-analysis/6Ff7...,620747.0,4.0


In [67]:
df_artists_ft['name'].value_counts()

name
Taylor Swift             157
Charlie Brown             81
Lacuna Coil               73
Flyleaf                   54
Why Don't We              54
                        ... 
Brandon Miles Shelton      1
Insaneintherain            1
Local Natives              1
Lou Rawls                  1
Wings                      1
Name: count, Length: 4321, dtype: int64