## Merge Flute World and Spotify to SQL Database for Solo Flute Music
The same can be done for flute and piano music, etc.

In [1]:
# Import necessary packages
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
import spotipy, requests, re, json
from spotipy.oauth2 import SpotifyClientCredentials
import pickle

## Step 0
Scrape music from fluteworld.com with Selenium.

In [None]:
# Scrape FluteWorld for solo flute music. When finished, save output as txt file to be imported at Step 1.
path_to_firefoxDriver = 'yourPath_with_geckodriver'
driver = webdriver.Firefox(executable_path=path_to_firefoxDriver)
driver.get("https://www.fluteworld.com/Unaccompanied-Flute--03-.html")
assert "Flute" in driver.title

# wait for page to load
sleep(120)

# from stack overflow to scroll to bottom of page https://stackoverflow.com/a/28928684/1316860
SCROLL_PAUSE_TIME = 60

# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")

while True:
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # Wait to load page
    sleep(SCROLL_PAUSE_TIME)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

elem = driver.find_element_by_css_selector(".site-inner")
print(elem.text)

driver.close()

## Step 1

Import scraped music from fluteworld.com with selenium (Flute Solo Music) and get into useable list grouped by piece

In [70]:
# Import previously scraped Flute Solo Music from Fluteworld.com using Selenium

myFile = open('Flute World Flute Solo List')
readtext = myFile.readlines()

In [73]:
# Group entire webpage into groups by piece. 
# May include extraneous data or missing data 
# (ie no Grade, or a line with "DIGITAL DOWNLOAD ONLY")

groups = []
group = []
for line in readtext:
    group.append(line)
    if re.search(r'Our Price', line):
        groups.append(group)
        group = []
print(len(groups))

731


In [74]:
# Create list of pieces with title, composer, and grade from FW
pieces = []
id_num = 30000 # starts with 3 to indicate flute solo (naming scheme from FW)
for group in groups:
    # title of piece
    title = group[0].split('\n')[0]
    
    for entry in group:
        # grade of piece
        matchGrade = re.search(r'Grade\s*:\s*.*',entry)
        if matchGrade:
            gradeEntry = matchGrade.group(0).split(': ')
            grade = gradeEntry[1]
        
        # composer of piece
        matchComposer = re.search(r'Composer\(s\) : .*',entry)
        if matchComposer:
            composerEntry = matchComposer.group(0).split(': ')
            composer = composerEntry[1]
    
    piece = [str(id_num), title, composer, grade]
    
    id_num = id_num + 1
    
    pieces.append(piece)
    
    # Reset variables
    title = []
    composer = []
    grade = []


## Step 2
Use list from fluteworld to get audio information from spotify.

In [82]:
# Spotipy ID, secret for Spotify Web API
client_id = 'yourID'
client_secret = 'yourSecret'

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [83]:
# To Find Spotify Track ID
count = 0
count2 = 0

for piece in pieces:
    titleStr = piece[1]
    if '(' in titleStr:
        titleStr = titleStr.split('(')
        titleStr = titleStr[0]

    composerStr = piece[2]
    if ',' in composerStr:
        composerStr = composerStr.split(',')
        composerStr = composerStr[0] + ' ' + composerStr[1]
    
    searchStr = titleStr + ' ' + composerStr
    
    result = sp.search(searchStr)
    
    # Test if result is null - if not, continue to get audio analysis!
    resultTrack = result['tracks']
    if resultTrack['items'] == []:
        piece.append([])
        piece.append([])
        count = count + 1
    else:
        resultItem = resultTrack['items']
        resultItem1 = resultItem[0]
        piece.append(resultItem1['id'])
        piece.append(resultItem1['name'])
        
        count2 = count2 + 1      
print(count)
print(count2)


453
278


In [92]:
print(pieces[1:10])

[['30001', 'Breath of Souls: Five Waiata for Solo Flute', 'ADAMS,PETER', '3-3+', [], []], ['30002', 'Vocalise for solo flute', 'ADASKIN', '2-2+', [], []], ['30003', 'A Bonnie Tune', 'ADLER', '3', [], []], ['30004', 'Flaunting', 'ADLER', '3', [], []], ['30005', 'Solo III', 'AHO', '4+', '0Mturv73oMZdljedKgYvHs', 'Oboe Sonata: III. —'], ['30006', 'Icicle', 'AITKEN', '3+', '5YgaSMeO5yHTVtdyPieEnd', 'Icicle'], ['30007', 'Plainsong', 'AITKEN', '5', '5cxz2J7FpgkK2ZYytaHcqZ', 'Plainsong'], ['30008', 'Sephardic Anthology', 'AKIVA', '2-3', [], []], ['30009', 'Three Pieces for Flute Solo', 'AKIVA', '3+', '10A3bOPmyGvN5tWD4MzGAZ', 'Three pieces for flute solo: III. Ven querida']]


In [93]:
# Extract Audio Data
# CHANGE TESTSET!!!

prev = []
piecesAudio = []

for piece in pieces:
    track_id = piece[4]
    if track_id != []:
        if track_id != prev:
            prev = track_id
            
            track_info = sp.audio_analysis(track_id)
            trackDetails = track_info['track']
            
            duration = trackDetails['duration']
            tempo = trackDetails['tempo']
            tempo_confidence = trackDetails['tempo_confidence']
            time_signature = trackDetails['time_signature']
            time_signature_confidence = trackDetails['time_signature_confidence']
            key = trackDetails['key']
            key_confidence = trackDetails['key_confidence']
            mode = trackDetails['mode']
            mode_confidence = trackDetails['mode_confidence']
            
            pieceAudio = piece
            pieceAudio.append(duration)
            pieceAudio.append(tempo)
            pieceAudio.append(tempo_confidence)
            pieceAudio.append(time_signature)
            pieceAudio.append(time_signature_confidence)
            pieceAudio.append(key)
            pieceAudio.append(key_confidence)
            pieceAudio.append(mode)
            pieceAudio.append(mode_confidence)
            
            piecesAudio.append(pieceAudio)
        


## Step 3
Save data from above (audioPieces) with pickle.

In [104]:
pickling_on = open("piecesAudioALLSOLOFLUTE.pickle","wb")
pickle.dump(piecesAudio, pickling_on)
pickling_on.close()

## Step 4
Retrieve saved data from pickle

In [3]:
def unpickle(pickleFile):
    """Unpickle saved data set"""
    #pickleFile = "piecesAudioALLSOLO.pickle"
    pickle_off = open(pickleFile,"rb")
    piecesAudio = pickle.load(pickle_off)
    
    return piecesAudio

In [4]:
pickleFile = "piecesAudioALLSOLOFLUTE.pickle"
piecesAudio = unpickle(pickleFile)
print(len(piecesAudio))

219
