In [1]:
import base64  # For converting to base64
import pandas as pd
import psycopg2 as pg2
import pyspark
from pyspark.sql import SparkSession
import requests
import time
import webbrowser

from urllib.parse import urlencode

In [2]:
# ID & Secret file
api_info = pd.read_excel('../logins.xlsx')

In [6]:
def request_user_auth(**kwargs):
    '''
    Requests authorization to access user's Spotify "app". Opens redirect_uri with code in url. Code is used for requesting authorization token. For example, given a redirect_uri of "http://localhost:7777/callback", a sucessful authorization request will open page 'http://localhost:7777/callback/code=XXXXX', with XXXX being the authorization code.  
    
    ARGUMENTS:
        keyword arguments, pass in the following keys:
            client_id: Spotify API project client id
            client_secret: Spotify API project client secret
            redirect_uri: Redirect url for obtaining authorization code
            scope: Scope of access
    '''
    webbrowser.open('https://accounts.spotify.com/authorize?' + urlencode(kwargs))

In [5]:
def encode_project_info(client_id, client_secret):
    '''
    Encodes string according to spotify web API documentation. "Basic + client_id:client_secret.

    ARGUMENTS:
        client_id: Client ID from Spotify API project session.
        client_secret: Client secret from Spotify API project session.
    '''
    encoded_client_info = client_id.encode() + b':' + client_secret.encode()
    return 'Basic ' + base64.b64encode(encoded_client_info).decode('utf-8')

In [7]:
def request_access_token(grant_type, code, redirect_uri, encoded_authorization, content_type):
    '''
    Exchanges the authorization code retrieved from requesting user access for an authentication token.

    ARGUMENTS:
        grant_type: Authorization token grant method 
        code: The auth code retrieved from request user auth for auth token exchange
        redirect_uri: Redirect URL according to spotify "app" redirect url
        encoded_authorization: Encoded client ID and secret according to Spotify documentation.
        content_type: "application/x-www-form-urlencoded" as required by Spotify
    '''
    # Request access token
    token_data = {
        'grant_type': grant_type,
        'code': code,
        'redirect_uri': redirect_uri
    }

    token_headers = {
        'Authorization': encoded_authorization,
        'Content-Type': content_type
    }
    
    # API token url, post request to /api/token endpoint
    return requests.post('https://accounts.spotify.com/api/token', data=token_data, headers=token_headers).json()

In [15]:
def get_recent_played_tracks(access_token):
    '''
    Uses access token to request most recently played songs from user.

    ARGUMENTS:    
      access_token: Access token for requesting access to user's Spotify information. 
    '''
    recent_songs_headers = {
    'Content-Type': 'application/json',
    'Authorization': 'Bearer ' + access_token
    }

    return requests.get('https://api.spotify.com/v1/me/player/recently-played?limit=50', headers=recent_songs_headers).json()

In [16]:
get_recent_played_tracks(token)

{'error': {'status': 403, 'message': 'Insufficient client scope'}}

In [10]:
client_id = api_info['client_id'][0]
client_secret = api_info['client_secret'][0]
redirect_uri = 'http://localhost:7777/callback'
response_type = 'code'
scope = 'user-read-recently-played'

request_user_auth(client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri, response_type=response_type, scope=scope)

In [11]:
# From redirect uri; save authorization code
code = 'AQDVo-EJB4fkEhvRRSi9xyiT9yd5trXMqk1PgXrNx-3QGPWUEO_5UivguZT8euwH31PEJ_kluk943iMniWW0yOq6u2AZzMXpH1794xOqmk876uHbSuNO5ZHKo26EuRCRS7h03JYdtQqvZIB2QfQa-jHV7TTseR_2-k3OO0cVO6vTjvi9FzN3G0PisLm88PrWUcFwiw6z00uV4lNKQg'

In [12]:
r = request_access_token(grant_type='authorization_code', code=code, redirect_uri=redirect_uri, encoded_authorization=encode_project_info(client_id, client_secret), content_type='application/x-www-form-urlencoded')

In [31]:
token = r['access_token']
recent_songs_request = get_recent_played_tracks(token)

In [42]:
recent_songs_request['items'][0]['track']['album']['id']

'6QX9efMZlYa0HQic1iO3PX'

In [34]:
# artist_headers = {
# 'Content-Type': 'application/json',
# 'Authorization': 'Bearer ' + token
# }
# artist_info = requests.get('https://api.spotify.com/v1/artists/5na1LmEmK2VzNLje9snJYW', headers=artist_headers).json()


In [39]:
# # artist_followers = artist_info['followers']['total']
# artist_popularity = artist_info['popularity']

65

In [43]:
# album_headers = {
# 'Content-Type': 'application/json',
# 'Authorization': 'Bearer ' + token
# }
# album_info = requests.get('https://api.spotify.com/v1/albums/6QX9efMZlYa0HQic1iO3PX', headers=album_headers).json()

In [None]:
# def create_rdd_tables():
#     'Creates resilient distributed datasets and populates tables with the maximum number of most recent songs played by Spotify user into three tables. '

In [None]:

# fact table stuff
    # track_id = recent_songs_request['items'][i]['track']['id]
    # artist_id = recent_songs_request['items'][i]['track']['album']['artists'][0]['id']
    # album_id = recent_songs_request['items'][i]['track']['album']['id']
    # track_name = recent_songs_request['items'][i]['track']['name']
    # track_uri = recent_songs_request['items'][i]['track']['uri']
    # track_length_ms = recent_songs_request['items'][i]['track']['duration_ms']
    # track_popularity = recent_songs_request['items'][i]['track']['popularity']
    # played_at = recent_songs_request['items'][0][]'played_at']

# artist stuff
    # artist_id = recent_songs_request['items'][i]['track']['album']['artists'][0]['id']
    # artist_name = recent_songs_request['items'][i]['track']['album']['artists'][0]['name']
    # artist_url = recent_songs_request['items'][i]['track']['artists'][0]['uri']
    # artist_info_requests = requests.get('https://api.spotify.com/v1/artists/5na1LmEmK2VzNLje9snJYW', headers=artist_headers).json()
    # artist_followers = artist_info['followers']['total']
    # artist_popularity = artist_info['popularity']


# album stuff 
    # album_id = recent_songs_request['items'][i]['track']['album']['id']
    # album_name = recent_songs_request['items'][i]['track']['album']['name']
    # album_url = recent_songs_request['items'][i]['track']['album']['uri']
    # album_info = requests.get('https://api.spotify.com/v1/albums/6QX9efMZlYa0HQic1iO3PX', headers=album_headers).json()
    # album_popularity = album_info['popularity']
    # album_total_tracks = recent_songs_request['items'][i]['track']['album']['total_tracks']
    # album_release_date = recent_songs_request['items'][i]['track']['album']['release_date']

# 
# functions for transforming extracted data or use sql stored procedure to change this
    # if album, track, or artist then concatenate
    # 'open.spotify.com/' + '{type_of_data}/' + album/song/track_uri.split(':')[-1]
    # date time needs to be 


# within items []
# id -> song id
# name -> band name 
# json file format hierarchy
#   items -> track -> album / artists 

In [None]:
spark = pyspark.Session.builder.master('local[4]') \
                       .appName('spotipy_etl') \
                        .getOrCreate()

AttributeError: module 'pyspark' has no attribute 'Session'