# Connecting to Spotify API and Scraping from playlists.

---

### In this notebook, we will explore the spotify API, extract songs from playlists, which will be used to build a database of music.

---

## Import Libraries

In [17]:
import pandas as pd
import re
import random
from random import randint
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from time import sleep
from tqdm import tqdm

## Import Spotify API credentials. 

In [2]:
 secrets_file = open("secrets.txt","r")

In [3]:
#string = secrets_file.read()
#string.split('\n')

['cid:dc735e5f93a34ef284c10ceabecd270e',
 'csecret:d5b2ce46ac224e3bb0b06504ca5c2c83']

In [4]:
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1]

In [5]:
#InitializeSpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['cid'],
                                                           client_secret=secrets_dict['csecret']))

## Extract song information from spotify "spotipy" api wrapper

In [9]:
def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        results = sp.next(results)
        tracks = tracks + results['items']
        sleep(randint(1,3))
    return tracks

In [10]:
playlist1 = get_playlist_tracks("6tIxyT1Gq6O7DK7rIEUEZo")
playlist2 = get_playlist_tracks("4pbDDX7np7Q1H0ghL7U9o7")

## Extract and transform the features, transforming everything into a dataframe

In [21]:
def artist_song(x):
    
    artist = []
    song = []
    features = []
    counter = 0
    
    for i in tqdm(x):
        song.append(x[counter]['track']['name'])
        artist.append(x[counter]['track']['artists'][0]['name'])
        features.append(x[counter]["track"]["uri"])
        
        features.append(sp.audio_features(x[counter]["track"]["uri"]))
        counter += 1
        if counter == 300:
            sleep()
    return pd.DataFrame({'artist': artist, 'song': song, 'features': features})

In [22]:
#%%time
playlist1df = artist_song(playlist1)

  0%|                                                  | 0/9999 [00:00<?, ?it/s]Max Retries reached
  0%|                                                  | 0/9999 [00:02<?, ?it/s]


SpotifyException: http status: 429, code:-1 - /v1/audio-features/?ids=5RYLa5P4qweEAKq5U1gdcK:
 Max Retries, reason: too many 429 error responses

In [None]:
%%time
playlist2df = artist_song(playlist2)

In [None]:
display(playlist1df.head(2))
display(playlist2df.head(2))

 **We can see that the features are packed into a list. We will need to unpack these and disperse them into columns**

In [None]:
#unpacking the features 
playlist1df['features'] = json_normalize(playlist1df['features'])
playlist2df['features'] = json_normalize(playlist2df['features'])

In [None]:
#creating new dataframes with the features in their own columns
playlist1features = pd.DataFrame.from_records(playlist1df.features.dropna().tolist())
playlist2features = pd.DataFrame.from_records(playlist2df.features.dropna().tolist())

**Concatenating the features dataframes with that of the song names and artists.**

In [None]:
#reset indices of both dataframes
playlist1df = playlist1df.reset_index(drop = True)
playlist1features = playlist1features.reset_index(drop = True)

#concatenate dataframes and drop 'features' column
playlist_db1 = pd.concat([playlist1df,playlist1features], axis = 1)
playlist_db1 = playlist_db1.drop(columns = 'features')

playlist_db1.head() 

In [None]:
#reset indices of both dataframes
playlist2df = playlist2df.reset_index(drop = True)
playlist2features = playlist2features.reset_index(drop = True)

#concatenate dataframes and drop 'features' column
playlist_db2 = pd.concat([playlist2df,playlist2features], axis = 1)
playlist_db2 = playlist_db2.drop(columns = 'features')

playlist_db2.head()

**Concatenating the two, prepared dataframes together into one master dataframe.**

In [None]:
playlist_db1 = playlist_db1.reset_index(drop = True)
playlist_db2 = playlist_db2.reset_index(drop = True)
final_p = pd.concat([playlist_db1,playlist_db2], axis = 0)
final_p = final_p.drop(columns= ["type","track_href","analysis_url", 'key', 'id', 'uri', 'duration_ms','mode', 'time_signature'], axis=1)
final_p = final_p.drop_duplicates()
final_p = final_p.reset_index(drop = True)
final_p

## Export dataframe to csv.

In [None]:
final_p.to_csv('song_recommender_playlist.csv',index = False)