# Capstone API Dataframe Construction

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import re
import numpy as np
import time
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
cid='59c22e75b26c4b1297d500b0ed55e9a7'
secret= ***

client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

### Creating our first Beatles catalog dataframe

In [3]:
## Request all Beatles albums

beatles_uri = 'spotify:artist:3WrFJ7ztbogyGnTHbHJFl2'

album_name = []
album_uri = []

results = sp.artist_albums(beatles_uri, album_type='album', limit=50)
albums = results['items']

for album in albums:
    album_name.append(album['name'])
    album_uri.append(album['id'])

In [4]:
album_name

['Get Back (Rooftop Performance)',
 'Let It Be (Super Deluxe)',
 'Abbey Road (Super Deluxe Edition)',
 'The Beatles',
 'Live At The Hollywood Bowl',
 'Let It Be... Naked (Remastered)',
 'Yellow Submarine Songtrack',
 'On Air - Live At The BBC (Vol.2)',
 '1 (Remastered)',
 'Live At The BBC (Remastered)',
 'Let It Be (Remastered)',
 'Abbey Road (Remastered)',
 'Yellow Submarine (Remastered)',
 'The Beatles (Remastered)',
 'Magical Mystery Tour (Remastered)',
 "Sgt. Pepper's Lonely Hearts Club Band (Deluxe Edition)",
 "Sgt. Pepper's Lonely Hearts Club Band (Deluxe Edition)",
 "Sgt. Pepper's Lonely Hearts Club Band (Remastered)",
 "Sgt. Pepper's Lonely Hearts Club Band (Super Deluxe Edition)",
 'Revolver (Remastered)',
 'Rubber Soul (Remastered)',
 'Help! (Remastered)',
 'Beatles For Sale (Remastered)',
 "A Hard Day's Night (Remastered)",
 'With The Beatles (Remastered)',
 'Please Please Me (Remastered)']

In [5]:
## remove albums that are compilations, rarities collections, deluxe edition, etc.
exclude_index =[0, 1, 2, 3, 4, 5, 6, 7, 9, 15, 16, 18]

for i in sorted(exclude_index, reverse=True):
    del album_name[i]
    del album_uri[i]

In [6]:
album_name

['1 (Remastered)',
 'Let It Be (Remastered)',
 'Abbey Road (Remastered)',
 'Yellow Submarine (Remastered)',
 'The Beatles (Remastered)',
 'Magical Mystery Tour (Remastered)',
 "Sgt. Pepper's Lonely Hearts Club Band (Remastered)",
 'Revolver (Remastered)',
 'Rubber Soul (Remastered)',
 'Help! (Remastered)',
 'Beatles For Sale (Remastered)',
 "A Hard Day's Night (Remastered)",
 'With The Beatles (Remastered)',
 'Please Please Me (Remastered)']

In [7]:
## request track ids for every album track
track_ids = []

for i in album_uri:
    tracks = sp.album_tracks(i)
    for t in range(len(tracks['items'])):
        track_ids.append(tracks['items'][t]['id'])    

In [None]:
## Request data on track_ids via 2 GET methods for pertinent info.
test1 = sp.track(track_ids[50])
test2 = sp.audio_features(track_ids[50])

In [None]:
test1['artists']

In [None]:
test2

In [None]:
test3 = sp.audio_analysis(track_ids[50])

In [None]:
test3['track']['key_confidence']

In [8]:
## Define function to collect all track data we want and put in list.
## Pertinent data exists in two different dictionary locations

def trackdata(id):
    info = sp.track(id)
    features = sp.audio_features(id)
    
    # sp.track data
    song = info['name']
    artist = info['artists'][0]['name']
    album = info['album']['name']
    duration = info['duration_ms']
    track_num = info['track_number']
    release_date = info['album']['release_date']
    popularity = info['popularity']
    
    # sp.audio_features data
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    key = features[0]['key']
    mode = features[0]['mode']
    speechiness = features[0]['speechiness']
    acousticness = features[0]['acousticness']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    valence = features[0]['valence']
    tempo = features[0]['tempo']
    
    #sp.audio_analysis data
    key_confidence = sp.audio_analysis(id)['track']['key_confidence']
    
    track = [song, artist, album, duration, track_num, release_date, popularity, danceability, energy, key, key_confidence, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo]
    return track

In [9]:
## use for loop and function to collect track data from all album tracks
tracks = []

for i in range(len(track_ids)):
    track = trackdata(track_ids[i])
    tracks.append(track)

In [10]:
## transform our lists into a dataframe
columns = ['song', 'artist', 'album', 'duration', 'track_num', 'release_date', 'popularity', 'danceability', 'energy', 'key', 'key_confidence', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

beatles_precision = pd.DataFrame(tracks, columns=columns)

In [11]:
beatles_precision

Unnamed: 0,song,artist,album,duration,track_num,release_date,popularity,danceability,energy,key,key_confidence,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Love Me Do - Mono / Remastered,The Beatles,1 (Remastered),140173,1,2000-11-13,57,0.524,0.880,0,0.563,1,0.0987,0.404,0.000191,0.1540,0.758,148.058
1,From Me To You - Mono / Remastered,The Beatles,1 (Remastered),116160,2,2000-11-13,59,0.581,0.821,0,0.000,1,0.0318,0.507,0.000000,0.1080,0.968,136.145
2,She Loves You - Mono / Remastered,The Beatles,1 (Remastered),140920,3,2000-11-13,66,0.377,0.916,4,0.000,0,0.0481,0.260,0.000000,0.0740,0.912,75.533
3,I Want To Hold Your Hand - Remastered 2015,The Beatles,1 (Remastered),145746,4,2000-11-13,71,0.490,0.715,7,0.560,1,0.0476,0.386,0.000000,0.3110,0.866,130.726
4,Can't Buy Me Love - Remastered 2015,The Beatles,1 (Remastered),131213,5,2000-11-13,56,0.426,0.760,0,0.712,1,0.0414,0.118,0.000000,0.3250,0.955,171.838
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215,Baby It's You - Remastered 2009,The Beatles,Please Please Me (Remastered),160520,10,1963-03-22,58,0.608,0.494,4,0.144,0,0.0345,0.778,0.000000,0.0926,0.879,112.421
216,Do You Want To Know A Secret - Remastered 2009,The Beatles,Please Please Me (Remastered),117013,11,1963-03-22,61,0.673,0.349,4,0.380,1,0.0368,0.608,0.000000,0.3800,0.609,124.451
217,A Taste Of Honey - Remastered 2009,The Beatles,Please Please Me (Remastered),123480,12,1963-03-22,48,0.420,0.372,1,0.395,0,0.0327,0.698,0.000000,0.1040,0.412,101.408
218,There's A Place - Remastered 2009,The Beatles,Please Please Me (Remastered),110493,13,1963-03-22,48,0.455,0.582,4,0.631,1,0.0292,0.629,0.000004,0.1720,0.927,140.928


In [13]:
beatles_precision.to_csv("beatles_precision.csv", sep = ',')

In [None]:
## further cleaning to do:
## - remove all 'remastered' bits from song and album titles
## - extract only year from data
## - find duplicates songs from the greatist hits '1' album

### Pulling solo career catalogs

In [None]:
## let's start with Paul

mccartney_uri = 'spotify:artist:4STHEaNw4mPZ2tzheohgXB'

solo_album_name = []
solo_album_uri = []

results = sp.artist_albums(mccartney_uri, album_type='album', limit=50, offset=15)
albums = results['items']

for album in albums:
    solo_album_name.append(album['name'])
    solo_album_uri.append(album['id'])

In [None]:
solo_album_name

In [None]:
exclude_index =[0, 1, 2, 3, 4, 6, 8, 9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 29, 31, 32, 33, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46, 48, 49]

for i in sorted(exclude_index, reverse=True):
    del solo_album_name[i]
    del solo_album_uri[i]

In [None]:
solo_album_name

In [None]:
## I also want to include Prolific Paul's band 'Wings'

wings_uri = 'spotify:artist:3sFhA6G1N0gG1pszb6kk1m'

solo2_album_name = []
solo2_album_uri = []

results = sp.artist_albums(wings_uri)
albums = results['items']

for album in albums:
    solo2_album_name.append(album['name'])
    solo2_album_uri.append(album['id'])

In [None]:
solo2_album_name

In [None]:
exclude_index = [1, 3, 4, 5, 6, 8, 9, 10, 12, 13, 16, 17, 18]

for i in sorted(exclude_index, reverse=True):
    del solo2_album_name[i]
    del solo2_album_uri[i]

In [None]:
solo2_album_name

In [None]:
solo_album_name.append(solo2_album_name)
solo_album_uri.append(solo2_album_uri)

In [None]:
## John's turn

lennon_uri = 'spotify:artist:4x1nvY2FN8jxqAFA0DA02H'

solo3_album_name = []
solo3_album_uri = []

results = sp.artist_albums(lennon_uri, album_type='album', limit=50)
albums = results['items']

for album in albums:
    solo3_album_name.append(album['name'])
    solo3_album_uri.append(album['id'])

In [None]:
solo3_album_name

In [None]:
exclude_index = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 18, 20, 21, 22]

for i in sorted(exclude_index, reverse=True):
    del solo3_album_name[i]
    del solo3_album_uri[i]

In [None]:
solo_album_name.append(solo3_album_name)
solo_album_uri.append(solo3_album_uri)

In [None]:
## George is up

harrison_uri = 'spotify:artist:7FIoB5PHdrMZVC3q2HE5MS'

solo4_album_name = []
solo4_album_uri = []

results = sp.artist_albums(harrison_uri, album_type='album', limit=50)
albums = results['items']

for album in albums:
    solo4_album_name.append(album['name'])
    solo4_album_uri.append(album['id'])

In [None]:
solo4_album_name

In [None]:
exclude_index = [0, 1, 2, 3, 4, 10, 12, 14, 17, 18, 19]

for i in sorted(exclude_index, reverse=True):
    del solo4_album_name[i]
    del solo4_album_uri[i]

In [None]:
solo_album_name.append(solo4_album_name)
solo_album_uri.append(solo4_album_uri)

In [None]:
## it would be rude to not include Ringo's solo career

ringo_uri = 'spotify:artist:6DbJi8AcN5ANdtvJcwBSw8'

solo5_album_name = []
solo5_album_uri = []

results = sp.artist_albums(ringo_uri, album_type='album', limit=50)
albums = results['items']

for album in albums:
    solo5_album_name.append(album['name'])
    solo5_album_uri.append(album['id'])

In [None]:
solo5_album_name

In [None]:
exclude_index = [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 13, 14, 22, 24]

for i in sorted(exclude_index, reverse=True):
    del solo5_album_name[i]
    del solo5_album_uri[i]

In [None]:
solo_album_name.append(solo5_album_name)
solo_album_uri.append(solo5_album_uri)

In [63]:
## flatten uri list (with function found somewhere on stack overflow) & we're ready for album tracks

def flatten_list(_2d_list):
    flat_list = []
    # Iterate through the outer list
    for element in _2d_list:
        if type(element) is list:
            # If the element is of type list, iterate through the sublist
            for item in element:
                flat_list.append(item)
        else:
            flat_list.append(element)
    return flat_list

solo_uri = flatten_list(solo_album_uri)

In [None]:
track_ids = []

for i in solo_uri:
    tracks = sp.album_tracks(i)
    for t in range(len(tracks['items'])):
        track_ids.append(tracks['items'][t]['id'])  

In [None]:
tracks = []

for i in range(len(track_ids)):
    time.sleep(.5)
    track = trackdata(track_ids[i])
    tracks.append(track)

solo_career = pd.DataFrame(tracks, columns=columns)

In [None]:
solo_career

In [None]:
solo_career.to_csv("solo_beatles.csv", sep = ',')

### Contemporaries of the Beatles

In [31]:
## the Rolling Stones
stones_uri = 'spotify:artist:22bE4uQ6baNwSHPVcDxLCe'

peer1_album_name = []
peer1_album_uri = []

results = sp.artist_albums(stones_uri, album_type='album', limit=50, offset=55)
albums = results['items']

for album in albums:
    peer1_album_name.append(album['name'])
    peer1_album_uri.append(album['id'])

In [32]:
peer1_album_name

['Goats Head Soup (Remastered 2009)',
 'Goats Head Soup',
 'Exile On Main Street (2010 Re-Mastered)',
 'Exile On Main Street (Bonus Track Version)',
 'Exile On Main Street (Deluxe Version)',
 'Jamming With Edward',
 'Sticky Fingers (Remastered)',
 'Sticky Fingers (Deluxe)',
 'Sticky Fingers (Super Deluxe)',
 'Get Yer Ya Yas Out',
 "Get Yer Ya-Ya's Out! (Remastered)",
 "Get Yer Ya-Ya's Out! The Rolling Stones In Concert (40th Anniversary Edition)",
 'Let It Bleed (50th Anniversary Edition / Remastered 2019)',
 'Let It Bleed',
 'Let It Bleed',
 'Beggars Banquet (50th Anniversary Edition)',
 'Beggars Banquet',
 'Their Satanic Majesties Request',
 'Their Satanic Majesties Request (50th Anniversary Special Edition / Remastered)',
 'Flowers',
 'Flowers',
 'Between The Buttons',
 'Between The Buttons (US track listing)',
 'Between The Buttons',
 'Between The Buttons (UK Version)',
 'Got Live if you want it!',
 'got LIVE if you want it!',
 'Aftermath',
 'Aftermath (UK Version)',
 'Aftermath (U

In [33]:
exclude_index = [0, 1, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 18, 19, 22, 23, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 39, 41]

for i in sorted(exclude_index, reverse=True):
    del peer1_album_name[i]
    del peer1_album_uri[i]

In [34]:
peer1_album_name

['Exile On Main Street (2010 Re-Mastered)',
 'Sticky Fingers (Remastered)',
 "Get Yer Ya-Ya's Out! (Remastered)",
 'Let It Bleed',
 'Beggars Banquet',
 'Their Satanic Majesties Request',
 'Flowers',
 'Between The Buttons',
 'Aftermath',
 'December’s Children (And Everybody’s)',
 'Now!',
 '12 X 5',
 'England’s Newest Hitmakers',
 'The Rolling Stones']

In [37]:
## the Beach Boys

beach_uri = 'spotify:artist:3oDbviiivRWhXwIE8hxkVV'

peer2_album_name = []
peer2_album_uri = []

results = sp.artist_albums(beach_uri, album_type='album', limit=50, offset=30)
albums = results['items']

for album in albums:
    peer2_album_name.append(album['name'])
    peer2_album_uri.append(album['id'])

In [40]:
peer2_album_name

['Love You (Remastered)',
 '15 Big Ones (Remastered)',
 'The Beach Boys In Concert',
 'Holland (2000 Remaster)',
 'Carl & The Passions - So Tough (Remastered)',
 "Surf's Up",
 'Sunflower',
 'Live In London (Live In London/2001 Remastered)',
 'Live in 1970',
 '20/20 (Remastered)',
 'Friends (Remastered)',
 'Wild Honey (Remastered)',
 'Smiley Smile (Mono & Stereo)',
 'Smiley Smile (Remastered)',
 'Pet Sounds (40th Anniversary / Stereo Mix)',
 'Pet Sounds (50th Anniversary Edition)',
 'Pet Sounds (Original Mono & Stereo Mix Versions)',
 'Beach Boys Party! (Remastered)',
 'Party! (Mono & Stereo)',
 'Summer Days (And Summer Nights) [Mono & Stereo]',
 'Summer Days (And Summer Nights) [Remastered]',
 'The Beach Boys Today! (Remastered)',
 'Today! (Mono & Stereo)',
 "The Beach Boys' Christmas Album",
 "The Beach Boys' Christmas Album (Mono & Stereo)",
 'Beach Boys Concert (Live / Remastered)',
 'All Summer Long',
 'All Summer Long (Mono & Stereo)',
 'Shut Down, Vol. 2 (Mono & Stereo)',
 'Shut 

In [41]:
exclude_index = [0, 1, 2, 3, 4, 7, 8, 12, 15, 16, 18, 20, 22, 24, 25, 26, 28, 30,32, 34, 36]

for i in sorted(exclude_index, reverse=True):
    del peer2_album_name[i]
    del peer2_album_uri[i]

In [43]:
peer2_album_name

["Surf's Up",
 'Sunflower',
 '20/20 (Remastered)',
 'Friends (Remastered)',
 'Wild Honey (Remastered)',
 'Smiley Smile (Remastered)',
 'Pet Sounds (40th Anniversary / Stereo Mix)',
 'Beach Boys Party! (Remastered)',
 'Summer Days (And Summer Nights) [Mono & Stereo]',
 'The Beach Boys Today! (Remastered)',
 "The Beach Boys' Christmas Album",
 'All Summer Long (Mono & Stereo)',
 'Shut Down, Vol. 2 (Remastered)',
 'Little Deuce Coupe (Remastered)',
 'Surfer Girl (Remastered)',
 "Surfin' USA (Remastered)"]

In [44]:
peer1_album_name.append(peer2_album_name)
peer1_album_uri.append(peer2_album_uri)

In [55]:
## the Kinks

kinks_uri = 'spotify:artist:1SQRv42e4PjEYfPhS0Tk9E'

peer3_album_name = []
peer3_album_uri = []

results = sp.artist_albums(kinks_uri, album_type='album', limit=50, offset=35)
albums = results['items']

for album in albums:
    peer3_album_name.append(album['name'])
    peer3_album_uri.append(album['id'])

In [59]:
len(peer3_album_name)

9

In [57]:
exclude_index = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 18, 19, 20, 21, 22, 23, 25, 30]

for i in sorted(exclude_index, reverse=True):
    del peer3_album_name[i]
    del peer3_album_uri[i]

In [58]:
peer3_album_name

['Muswell Hillbillies',
 'Lola Versus Powerman and the Moneygoround, Pt. One',
 'Arthur (Deluxe Edition)',
 'The Kinks Are The Village Green Preservation Society',
 'Something Else (Deluxe Edition)',
 'Face to Face (Deluxe Edition)',
 'The Kink Kontroversy',
 'Kinda Kinks',
 'Kinks']

In [60]:
peer1_album_name.append(peer3_album_name)
peer1_album_uri.append(peer3_album_uri)

In [61]:
peer1_album_name

['Exile On Main Street (2010 Re-Mastered)',
 'Sticky Fingers (Remastered)',
 "Get Yer Ya-Ya's Out! (Remastered)",
 'Let It Bleed',
 'Beggars Banquet',
 'Their Satanic Majesties Request',
 'Flowers',
 'Between The Buttons',
 'Aftermath',
 'December’s Children (And Everybody’s)',
 'Now!',
 '12 X 5',
 'England’s Newest Hitmakers',
 'The Rolling Stones',
 ["Surf's Up",
  'Sunflower',
  '20/20 (Remastered)',
  'Friends (Remastered)',
  'Wild Honey (Remastered)',
  'Smiley Smile (Remastered)',
  'Pet Sounds (40th Anniversary / Stereo Mix)',
  'Beach Boys Party! (Remastered)',
  'Summer Days (And Summer Nights) [Mono & Stereo]',
  'The Beach Boys Today! (Remastered)',
  "The Beach Boys' Christmas Album",
  'All Summer Long (Mono & Stereo)',
  'Shut Down, Vol. 2 (Remastered)',
  'Little Deuce Coupe (Remastered)',
  'Surfer Girl (Remastered)',
  "Surfin' USA (Remastered)"],
 ['Muswell Hillbillies',
  'Lola Versus Powerman and the Moneygoround, Pt. One',
  'Arthur (Deluxe Edition)',
  'The Kinks

In [64]:
peer_tracks = flatten_list(peer1_album_uri)

In [66]:
track_ids = []

for i in peer_tracks:
    tracks = sp.album_tracks(i)
    for t in range(len(tracks['items'])):
        track_ids.append(tracks['items'][t]['id'])

In [68]:
tracks = []

for i in range(len(track_ids)):
    time.sleep(.5)
    track = trackdata(track_ids[i])
    tracks.append(track)

peer_albums = pd.DataFrame(tracks, columns=columns)

In [69]:
peer_albums

Unnamed: 0,song,artist,album,duration,track_num,release_date,popularity,danceability,energy,key,key_confidence,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Rocks Off,The Rolling Stones,Exile On Main Street (2010 Re-Mastered),271933,1,1972-05-12,54,0.485,0.943,4,0.464,1,0.0416,0.0639,0.17900,0.1440,0.889,141.982
1,Rip This Joint,The Rolling Stones,Exile On Main Street (2010 Re-Mastered),143293,2,1972-05-12,46,0.548,0.952,2,0.809,1,0.0422,0.1180,0.83700,0.3010,0.906,98.040
2,Shake Your Hips,The Rolling Stones,Exile On Main Street (2010 Re-Mastered),179000,3,1972-05-12,43,0.374,0.787,2,0.043,1,0.0617,0.6900,0.83100,0.2030,0.763,180.496
3,Casino Boogie,The Rolling Stones,Exile On Main Street (2010 Re-Mastered),214466,4,1972-05-12,42,0.530,0.691,2,0.640,1,0.0307,0.3740,0.11100,0.2900,0.581,117.046
4,Tumbling Dice,The Rolling Stones,Exile On Main Street (2010 Re-Mastered),226333,5,1972-05-12,61,0.615,0.910,11,0.714,1,0.0347,0.5000,0.51100,0.2810,0.770,111.013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
627,I Gotta Go Now,The Kinks,Kinks,173226,22,1964-10-02,22,0.571,0.514,9,0.365,1,0.0481,0.2270,0.00001,0.0709,0.651,106.363
628,Things Are Getting Better,The Kinks,Kinks,114600,23,1964-10-02,21,0.559,0.869,2,0.443,1,0.1020,0.1890,0.00000,0.0716,0.451,118.696
629,I've Got That Feeling,The Kinks,Kinks,162600,24,1964-10-02,24,0.574,0.763,6,0.016,0,0.0389,0.1870,0.00000,0.0892,0.852,127.698
630,Too Much Monkey Business - Alternate Take,The Kinks,Kinks,127893,25,1964-10-02,20,0.562,0.878,5,0.451,1,0.1470,0.0213,0.00000,0.3690,0.958,117.194


In [71]:
peer_albums.to_csv("peer_groups.csv", sep = ',')