<img src="https://github.com/rjpost20/Onramp-Project/blob/main/data/pexels-vishnu-r-nair-1105666.jpg?raw=true">
Image by <a href="https://www.pexels.com/@vishnurnair/" >Vishnu R Nair</a> on <a href="https://www.pexels.com/photo/people-at-concert-1105666/" >Pexels.com</a>

# *Onramp x Vanguard Spotify Project*

## By Ryan Posternak

<br>

## Imports

In [148]:
import numpy as np
import pandas as pd
from pprint import pprint
import time
import re

# import http.client
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

<br>

# Section 1: Data Ingestion

### Establish connection to Spotify's API

In [29]:
# conn = http.client.HTTPConnection('https://api.spotify.com', 80, timeout=10)
# print(conn)

In [170]:
# Set API keys as environment variables (sensitive information!). Credentials are stored securely in a local file.
with open("API.txt") as f:
    text = f.readlines()
    client_id = text[0].strip()
    client_secret = text[1].strip()
    redirect_uri = text[2].strip()
    
# Assign API keys to a Spotipy credential manager
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, 
                                                      client_secret=client_secret, 
                                                      requests_timeout=100)  # Default timeout setting is too short

# Connect to Spotipy by passing in credential manager
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

## Part 1: Artists

### Obtain artist data for top 20 favorite artists

In [73]:
# Define list of 20 favorite artists
artists = ['Beyoncé', 'Billie Eilish', 'blackbear', 'Bob Dylan', 'Bob Marley', 'Cuco', 'Doja Cat', 'Drake', \
           'Ellie Goulding', 'J. Cole', 'Jack Johnson', 'Khalid', 'Kid Cudi', 'Pink Floyd', 'Post Malone', \
           'Simon & Garfunkel', 'The Beatles', 'The Chainsmokers', 'The Weeknd', 'Tove Lo']

assert len(artists) == 20

In [74]:
# Preview artist output format
preview = sp.search('The Beatles', limit=1, type='artist')
pprint(preview)

{'artists': {'href': 'https://api.spotify.com/v1/search?query=The+Beatles&type=artist&offset=0&limit=1',
             'items': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3WrFJ7ztbogyGnTHbHJFl2'},
                        'followers': {'href': None, 'total': 23541504},
                        'genres': ['beatlesque',
                                   'british invasion',
                                   'classic rock',
                                   'merseybeat',
                                   'psychedelic rock',
                                   'rock'],
                        'href': 'https://api.spotify.com/v1/artists/3WrFJ7ztbogyGnTHbHJFl2',
                        'id': '3WrFJ7ztbogyGnTHbHJFl2',
                        'images': [{'height': 640,
                                    'url': 'https://i.scdn.co/image/ab6761610000e5ebe9348cc01ff5d55971b22433',
                                    'width': 640},
                                   {'height': 

In [186]:
# Define dictionary to contain all artist data
artists_dict = {}

# Define list containers for artists info
artist_ids = []
artist_names = []
external_urls = []
genres = []
image_urls = []
followers = []
popularities = []
types = []
artist_uris = []

# Append artist info to each respective list
for artist in artists:
    artist_info = sp.search(artist, limit=1, type='artist')
    info_items = artist_info['artists']['items'][0]
    
    artist_ids.append(info_items['id'])
    artist_names.append(info_items['name'])
    external_urls.append(info_items['external_urls']['spotify'])
    genres.append(info_items['genres'][0])  # Take first genre from list
    image_urls.append(info_items['images'][0]['url'])  # Take first image url from list
    followers.append(info_items['followers']['total'])
    popularities.append(info_items['popularity'])
    types.append(info_items['type'])
    artist_uris.append(info_items['uri'])
    
    # Set a delay after each artist call (don't want to get in trouble with the API!)
    time.sleep(0.5)

# Add lists to dictionary holding compiled artist data
artists_dict['artist_id'] = artist_ids
artists_dict['artist_name'] = artist_names
artists_dict['external_url'] = external_urls
artists_dict['genre'] = genres
artists_dict['image_url'] = image_urls
artists_dict['followers'] = followers
artists_dict['popularity'] = popularities
artists_dict['type'] = types
artists_dict['artist_uri'] = artist_uris

In [116]:
# Compile into Pandas dataframe
artists_df = pd.DataFrame(data=artists_dict)
assert artists_df.shape[0] == 20

# Preview artists dataframe
artists_df.head()

Unnamed: 0,artist_id,artist_name,external_url,genre,image_url,followers,popularity,type,artist_uri
0,6vWDO969PvNqNYHIOW5v0m,Beyoncé,https://open.spotify.com/artist/6vWDO969PvNqNY...,dance pop,https://i.scdn.co/image/ab6761610000e5eb676338...,32114388,88,artist,spotify:artist:6vWDO969PvNqNYHIOW5v0m
1,6qqNVTkY8uBg9cP3Jd7DAH,Billie Eilish,https://open.spotify.com/artist/6qqNVTkY8uBg9c...,art pop,https://i.scdn.co/image/ab6761610000e5ebd8b998...,68569580,88,artist,spotify:artist:6qqNVTkY8uBg9cP3Jd7DAH
2,2cFrymmkijnjDg9SS92EPM,blackbear,https://open.spotify.com/artist/2cFrymmkijnjDg...,electropop,https://i.scdn.co/image/ab6761610000e5eb4f7d04...,4774278,80,artist,spotify:artist:2cFrymmkijnjDg9SS92EPM
3,74ASZWbe4lXaubB36ztrGX,Bob Dylan,https://open.spotify.com/artist/74ASZWbe4lXaub...,album rock,https://i.scdn.co/image/ab6772690000c46cf79ca0...,5774389,71,artist,spotify:artist:74ASZWbe4lXaubB36ztrGX
4,2QsynagSdAqZj3U9HgDzjD,Bob Marley & The Wailers,https://open.spotify.com/artist/2QsynagSdAqZj3...,reggae,https://i.scdn.co/image/b5aae2067db80f694a980e...,10849534,78,artist,spotify:artist:2QsynagSdAqZj3U9HgDzjD


## Part 2: Albums

### Obtain album data for six albums for each of top 20 favorite artists

In [164]:
# Preview albums output format
preview = sp.artist_albums(artist_id='6vWDO969PvNqNYHIOW5v0m', limit=10, country='US')
pprint(preview)

{'href': 'https://api.spotify.com/v1/artists/6vWDO969PvNqNYHIOW5v0m/albums?offset=0&limit=10&include_groups=album,single,compilation,appears_on&market=US',
 'items': [{'album_group': 'album',
            'album_type': 'album',
            'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6vWDO969PvNqNYHIOW5v0m'},
                         'href': 'https://api.spotify.com/v1/artists/6vWDO969PvNqNYHIOW5v0m',
                         'id': '6vWDO969PvNqNYHIOW5v0m',
                         'name': 'Beyoncé',
                         'type': 'artist',
                         'uri': 'spotify:artist:6vWDO969PvNqNYHIOW5v0m'}],
            'external_urls': {'spotify': 'https://open.spotify.com/album/6FJxoadUE4JNVwWHghBwnb'},
            'href': 'https://api.spotify.com/v1/albums/6FJxoadUE4JNVwWHghBwnb',
            'id': '6FJxoadUE4JNVwWHghBwnb',
            'images': [{'height': 640,
                        'url': 'https://i.scdn.co/image/ab67616d0000b2730e58a0f8308c1

In [169]:
for album in preview['items']:
    pprint(album['uri'])

'spotify:album:6FJxoadUE4JNVwWHghBwnb'
'spotify:album:3ctW8o8ABBCNWWkdIvEGgV'
'spotify:album:7kUuNU2LRmr9XbwLHXU9UZ'
'spotify:album:46356QHIb3lRX1M7SLydKF'
'spotify:album:552zi1M53PQAX5OH4FIdTx'
'spotify:album:35S1JCj5paIfElT2GODl6x'
'spotify:album:4tyEy1BUd2ZMgT3qP70T3F'
'spotify:album:7dK54iZuOxXFarGhXwEXfF'
'spotify:album:4X6b6POxbjX9inC7TWQd54'
'spotify:album:2UJwKSBUz6rtW4QLK74kQu'


**Remarks:**
- It looks like duplicate albums will be an issue. Further, it looks like slight variations on the name or edition (regular edition vs. deluxe edition for example) will also be an issue. We'll address this with a RegEx search to remove any text inside square brackets or parentheses when checking for duplicate albums, which should catch most of these duplicates.

In [166]:
for album in preview['items']:
    pprint(re.sub("[\(\[].*?[\)\]]", "", album['name']).strip())

'RENAISSANCE'
'RENAISSANCE'
'The Lion King: The Gift'
'The Lion King: The Gift'
'The Lion King: The Gift'
'HOMECOMING: THE LIVE ALBUM'
'HOMECOMING: THE LIVE ALBUM'
'Lemonade'
'Lemonade'
'BEYONCÉ'


In [167]:
for album in preview['items']:
    pprint(album['total_tracks'])

16
16
17
17
27
40
40
13
13
20


In [203]:
# Define dictionary to contain all album data
albums_dict = {}

# Set up containers for artists info
album_ids = []
album_names = []
external_urls = []
image_urls = []
release_dates = []
total_tracks = []
types = []
album_uris = []
album_artist_ids = []

# Append album info to each respective list
for artist_id in artist_ids:
    # API call for 7 albums by artist
    albums_info = sp.artist_albums(artist_id=artist_id, limit=10, country='US')
    
    # Prevent duplicate albums from being added
    dup_album_check = []
    
    # Retrieve info for each album
    for album in albums_info['items']:
        
        unique_album_name = re.sub("[\(\[].*?[\)\]]", "", album['name']).strip()
        if unique_album_name in dup_album_check:
            continue
            
        album_ids.append(album['id'])
        album_names.append(album['name'])
        external_urls.append(album['external_urls']['spotify'])
        image_urls.append(album['images'][0]['url'])  # Take first image url from list
        release_dates.append(album['release_date'])
        total_tracks.append(album['total_tracks'])
        types.append(album['type'])
        album_uris.append(album['uri'])
        album_artist_ids.append(artist_id)
        
        dup_album_check.append(unique_album_name)
    
    # Set a delay after each artist call
    time.sleep(0.5)
    
# Add lists to dictionary holding compiled albums data
albums_dict['album_id'] = album_ids
albums_dict['album_name'] = album_names
albums_dict['external_url'] = external_urls
albums_dict['image_url'] = image_urls
albums_dict['release_date'] = release_dates
albums_dict['total_tracks'] = total_tracks
albums_dict['type'] = types
albums_dict['album_uri'] = album_uris
albums_dict['artist_id'] = album_artist_ids

In [210]:
# Compile into Pandas dataframe
albums_df = pd.DataFrame(data=albums_dict)

# Preview albums dataframe
print('Albums:', albums_df.shape[0])
albums_df.head()

Albums: 135


Unnamed: 0,album_id,album_name,external_url,image_url,release_date,total_tracks,type,album_uri,artist_id
0,6FJxoadUE4JNVwWHghBwnb,RENAISSANCE,https://open.spotify.com/album/6FJxoadUE4JNVwW...,https://i.scdn.co/image/ab67616d0000b2730e58a0...,2022-07-29,16,album,spotify:album:6FJxoadUE4JNVwWHghBwnb,6vWDO969PvNqNYHIOW5v0m
1,7kUuNU2LRmr9XbwLHXU9UZ,The Lion King: The Gift [Deluxe Edition],https://open.spotify.com/album/7kUuNU2LRmr9Xbw...,https://i.scdn.co/image/ab67616d0000b27360e232...,2020-07-31,17,album,spotify:album:7kUuNU2LRmr9XbwLHXU9UZ,6vWDO969PvNqNYHIOW5v0m
2,35S1JCj5paIfElT2GODl6x,HOMECOMING: THE LIVE ALBUM,https://open.spotify.com/album/35S1JCj5paIfElT...,https://i.scdn.co/image/ab67616d0000b2738e5252...,2019-04-17,40,album,spotify:album:35S1JCj5paIfElT2GODl6x,6vWDO969PvNqNYHIOW5v0m
3,7dK54iZuOxXFarGhXwEXfF,Lemonade,https://open.spotify.com/album/7dK54iZuOxXFarG...,https://i.scdn.co/image/ab67616d0000b27389992f...,2016-04-23,13,album,spotify:album:7dK54iZuOxXFarGhXwEXfF,6vWDO969PvNqNYHIOW5v0m
4,2UJwKSBUz6rtW4QLK74kQu,BEYONCÉ [Platinum Edition],https://open.spotify.com/album/2UJwKSBUz6rtW4Q...,https://i.scdn.co/image/ab67616d0000b2730d1d6e...,2014-11-24,20,album,spotify:album:2UJwKSBUz6rtW4QLK74kQu,6vWDO969PvNqNYHIOW5v0m


In [208]:
albums_df

Unnamed: 0,album_id,album_name,external_url,image_url,release_date,total_tracks,type,album_uri,artist_id
0,6FJxoadUE4JNVwWHghBwnb,RENAISSANCE,https://open.spotify.com/album/6FJxoadUE4JNVwW...,https://i.scdn.co/image/ab67616d0000b2730e58a0...,2022-07-29,16,album,spotify:album:6FJxoadUE4JNVwWHghBwnb,6vWDO969PvNqNYHIOW5v0m
1,7kUuNU2LRmr9XbwLHXU9UZ,The Lion King: The Gift [Deluxe Edition],https://open.spotify.com/album/7kUuNU2LRmr9Xbw...,https://i.scdn.co/image/ab67616d0000b27360e232...,2020-07-31,17,album,spotify:album:7kUuNU2LRmr9XbwLHXU9UZ,6vWDO969PvNqNYHIOW5v0m
2,35S1JCj5paIfElT2GODl6x,HOMECOMING: THE LIVE ALBUM,https://open.spotify.com/album/35S1JCj5paIfElT...,https://i.scdn.co/image/ab67616d0000b2738e5252...,2019-04-17,40,album,spotify:album:35S1JCj5paIfElT2GODl6x,6vWDO969PvNqNYHIOW5v0m
3,7dK54iZuOxXFarGhXwEXfF,Lemonade,https://open.spotify.com/album/7dK54iZuOxXFarG...,https://i.scdn.co/image/ab67616d0000b27389992f...,2016-04-23,13,album,spotify:album:7dK54iZuOxXFarGhXwEXfF,6vWDO969PvNqNYHIOW5v0m
4,2UJwKSBUz6rtW4QLK74kQu,BEYONCÉ [Platinum Edition],https://open.spotify.com/album/2UJwKSBUz6rtW4Q...,https://i.scdn.co/image/ab67616d0000b2730d1d6e...,2014-11-24,20,album,spotify:album:2UJwKSBUz6rtW4QLK74kQu,6vWDO969PvNqNYHIOW5v0m
...,...,...,...,...,...,...,...,...,...
130,6YlDIxqEjvY63ffH6AwCjd,After Hours (Deluxe),https://open.spotify.com/album/6YlDIxqEjvY63ff...,https://i.scdn.co/image/ab67616d0000b27380880b...,2020-04-03,17,album,spotify:album:6YlDIxqEjvY63ffH6AwCjd,1Xyo4u8uXC1ZmMpatF05PJ
131,48I4Jtcqu5K5jZWadn035d,Sunshine Kitty (Paw Prints Edition),https://open.spotify.com/album/48I4Jtcqu5K5jZW...,https://i.scdn.co/image/ab67616d0000b27393d610...,2020-05-22,22,album,spotify:album:48I4Jtcqu5K5jZWadn035d,4NHQUGzhtTLFvgF5SZesLK
132,6jggnLM3SdDnjQ3GWmIZ4L,BLUE LIPS (lady wood phase II),https://open.spotify.com/album/6jggnLM3SdDnjQ3...,https://i.scdn.co/image/ab67616d0000b2735a032c...,2017-11-17,14,album,spotify:album:6jggnLM3SdDnjQ3GWmIZ4L,4NHQUGzhtTLFvgF5SZesLK
133,1tuekzsMZQOuiMejKP6t2Y,Lady Wood,https://open.spotify.com/album/1tuekzsMZQOuiMe...,https://i.scdn.co/image/ab67616d0000b2739f0c01...,2016-10-28,12,album,spotify:album:1tuekzsMZQOuiMejKP6t2Y,4NHQUGzhtTLFvgF5SZesLK
