<span style="font-family:Lucida Sans Unicode; color:#a10a0a; font-size: 25px"> ▼ Create Database Tables  </span>

In [44]:
import sqlite3
%run py_utils/sqlite_utils.ipynb

def create_tables(con, cur):    
    create_artists = """CREATE TABLE IF NOT EXISTS artists(
                        artist_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                        artist_name TEXT NOT NULL, 
                        artist_path TEXT NOT NULL,
                        created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
                        updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
                        UNIQUE(artist_name, artist_path)
                        )"""    
    run_query(cur, con, create_artists)
    print(">> Artists:", *cur.execute("PRAGMA table_info(artists)"), sep="\n") 
    
    
    create_albums = """CREATE TABLE IF NOT EXISTS albums(
                       album_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                       album_name TEXT NOT NULL, 
                       album_year INTEGER,
                       artist_id INTEGER, 
                       created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, 
                       updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, 
                       FOREIGN KEY (artist_id) REFERENCES artists (artist_id),               
                       UNIQUE(album_name, artist_id)
                       )"""
    run_query(cur, con, create_albums)
    print("\n>> Albums:", *cur.execute("PRAGMA table_info(albums)"), sep="\n") 
         
    create_tracks = """CREATE TABLE IF NOT EXISTS tracks(
                        track_id INTEGER PRIMARY KEY AUTOINCREMENT, 
                        track_name TEXT NOT NULL, 
                        track_lyrics TEXT, 
                        track_credits TEXT, 
                        track_path TEXT NOT NULL,
                        artist_id INTEGER, 
                        album_id INTEGER, 
                        created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, 
                        updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, 
                        FOREIGN KEY (artist_id) REFERENCES artists (artist_id), 
                        FOREIGN KEY (album_id) REFERENCES albums (album_id),             
                        UNIQUE(track_name, track_path, artist_id, album_id)
                        )"""
    run_query(cur, con, create_tracks)
    print("\n>> Tracks:", *cur.execute("PRAGMA table_info(tracks)"), sep="\n") 
    
    
con, cur = connect_sqlite("../database/azlyrics.db") 
create_tables(con, cur)
con.close()


>> Artists:
(0, 'artist_id', 'INTEGER', 0, None, 1)
(1, 'artist_name', 'TEXT', 1, None, 0)
(2, 'artist_path', 'TEXT', 1, None, 0)
(3, 'created_at', 'DATETIME', 1, 'CURRENT_TIMESTAMP', 0)
(4, 'updated_at', 'DATETIME', 1, 'CURRENT_TIMESTAMP', 0)

>> Albums:
(0, 'album_id', 'INTEGER', 0, None, 1)
(1, 'album_name', 'TEXT', 1, None, 0)
(2, 'album_year', 'INTEGER', 0, None, 0)
(3, 'artist_id', 'INTEGER', 0, None, 0)
(4, 'created_at', 'DATETIME', 1, 'CURRENT_TIMESTAMP', 0)
(5, 'updated_at', 'DATETIME', 1, 'CURRENT_TIMESTAMP', 0)

>> Tracks:
(0, 'track_id', 'INTEGER', 0, None, 1)
(1, 'track_name', 'TEXT', 1, None, 0)
(2, 'track_lyrics', 'TEXT', 0, None, 0)
(3, 'track_credits', 'TEXT', 0, None, 0)
(4, 'track_path', 'TEXT', 1, None, 0)
(5, 'artist_id', 'INTEGER', 0, None, 0)
(6, 'album_id', 'INTEGER', 0, None, 0)
(7, 'created_at', 'DATETIME', 1, 'CURRENT_TIMESTAMP', 0)
(8, 'updated_at', 'DATETIME', 1, 'CURRENT_TIMESTAMP', 0)


<span style="font-family:Lucida Sans Unicode; color:#a10a0a; font-size: 25px"> ▼ Scrape Artists & Store in Database  </span>

In [None]:
%run crawlers/artist_scraping.ipynb
%run py_utils/scraping_utils.ipynb
%run py_utils/sqlite_utils.ipynb

def acquire_artists():
    return get_artists()
    
def store_artists(artist_data, con, cur):
    for artist in artist_data:
        try:
            query = "INSERT INTO artists (artist_name, artist_path) values ( ?, ? )"
            run_query(cur, con, query, values=(artist, artist_data[artist])) 
            print(">> {} added to database.".format(artist)) 
        except Exception as exc:
            print("! Exception: {}".format(exc))

con, cur = connect_sqlite("../database/azlyrics.db")  
artist_data = acquire_artists()  
store_artists(artist_data, con, cur)
con.close()


<span style="font-family:Lucida Sans Unicode; color:#a10a0a; font-size: 25px"> ▼ Artist Selection Interface </span>

In [45]:
import string
%run py_utils/widgets_utils.ipynb
%run py_utils/sqlite_utils.ipynb

user_artists = []
alphabets = [*string.ascii_uppercase, '#']
con, cur = connect_sqlite("../database/azlyrics.db")
all_artists = [data[0] for data in cur.execute("SELECT artist_name FROM artists")]
con.close()

def retrieve_alphabet(change):
    filter_artist = []
    selected_item = change.new    
    widget_output(output, "Alphabet {} selected".format(selected_item))  
    
    for artist in all_artists:
        if selected_item == artist[0].upper(): filter_artist.append(artist)            
        else:
            if artist[0].upper() not in alphabets and selected_item == '#': filter_artist.append(artist)    
    alphabet_dropdown.value = selected_item 
    artist_dropdown.options = filter_artist
    artist_dropdown.value = filter_artist[0]
    
def retrieve_artist(change): 
    widget_output(output, "{} selected".format(change.new))
    
def retrieve_data(_):    
    if artist_dropdown.value != '': user_artists.append(artist_dropdown.value)
    widget_output(output, 'Selection: • '+' • '.join(user_artists))

def clear_data(_):
    try:
        user_artists.pop()
        widget_output(output, 'Selection: '+', '.join(user_artists))        
    except Exception as exc: widget_output(output, '! Exception: '+str(exc))
    
output = create_output()
widget_output(output, "Please select an Artist:")
alphabet_dropdown = assign_dropdown(alphabets, "Alphabets", arg_function=retrieve_alphabet, observe=1)
artist_dropdown = assign_dropdown([''], "Artists", arg_function=retrieve_artist, observe=1)
dropdowns = create_gui(alphabet_dropdown, artist_dropdown, num=[0,1], wd='300', widget_type='H', gui_display=0)

select_button = create_button("Add Artist to Selection", arg_function=retrieve_data)
clear_button = create_button("Clear Last Selection", arg_function=clear_data)
buttons = create_gui(select_button, clear_button, num=[0,1], wd='300', widget_type='H', gui_display=0)
create_gui(output, dropdowns, buttons, widget_type='V', gui_display=1) 


VBox(children=(Output(), HBox(children=(Dropdown(description='Alphabets', layout=Layout(width='300px'), option…

<span style="font-family:Lucida Sans Unicode; color:#a10a0a; font-size: 25px"> ▼ Scrape Track Albums & Store in Database </span>

In [46]:
%run crawlers/album_scraping.ipynb
root_url = "https://www.azlyrics.com"
start_urls=[]; tmp_list = []

def get_urls(con, cur):
    for artist in user_artists:
        start_urls.append([*cur.execute("SELECT artist_path FROM artists WHERE artist_name=?", (artist,))][0][0])
        
def scrape_albums():
    return get_albums(start_urls)

def store_data(con, cur, artist_info):
    for artist in artist_info:
        artist_id = [*cur.execute("SELECT artist_id FROM artists WHERE artist_name=?", (artist,))][0][0]
        
        for i, track in enumerate(artist_info[artist]['track_names']):                  
            if artist_info[artist]['album_names'][i]+str(artist_id) not in tmp_list: 
                tmp_list.append(artist_info[artist]['album_names'][i]+str(artist_id))  
                try:
                    query = "INSERT INTO albums (album_name, album_year, artist_id) values ( ?, ?, ? )"
                    values = (artist_info[artist]['album_names'][i], artist_info[artist]['album_years'][i], artist_id)
                    run_query(cur, con, query, values=values)
                    print(">> Entries added:", values)                    
                except Exception as exc: print("! Exception: {}".format(exc)); print(values)                    
            try: 
                cur.execute("SELECT album_id FROM albums WHERE album_name=? AND artist_id=?", 
                            (artist_info[artist]['album_names'][i], artist_id, ))
                album_id = cur.fetchall()[0][0]               
                track_path = (lambda url: '' if url == root_url else url) (artist_info[artist]['track_urls'][i])
                
                query = "INSERT INTO tracks (track_name, track_path, artist_id, album_id) values ( ?, ?, ?, ? )"
                values = (track, track_path, artist_id, album_id)
                run_query(cur, con, query, values=values) 
                print(">> Entries added:", values)                
            except Exception as exc: print("! Exception: {}".format(exc))

                
con, cur = connect_sqlite("../database/azlyrics.db")
get_urls(con, cur)
artist_info = scrape_albums()
store_data(con, cur, artist_info) 
con.close()


['https://www.azlyrics.com/m/metallica.html', 'https://www.azlyrics.com/m/mauve.html']
>> Scraping https://www.azlyrics.com/m/metallica.html
! ValueError: invalid literal for int() with base 10: 'Reed'
! ValueError: invalid literal for int() with base 10: 'songs:'
>> Scraping https://www.azlyrics.com/m/mauve.html
! Exception: Invalid object type provided as 'soup': <class 'NoneType'>. Expected: bs4.BeautifulSoup or Tag.
>> Entries added: ("album: Kill 'Em All", 1983, 12420)
>> Entries added: ('Hit The Lights', 'https://www.azlyrics.com/lyrics/metallica/hitthelights.html', 12420, 29)
>> Entries added: ('The Four Horsemen', 'https://www.azlyrics.com/lyrics/metallica/thefourhorsemen.html', 12420, 29)
>> Entries added: ('Motorbreath', 'https://www.azlyrics.com/lyrics/metallica/motorbreath.html', 12420, 29)
>> Entries added: ('Jump In The Fire', 'https://www.azlyrics.com/lyrics/metallica/jumpinthefire.html', 12420, 29)
>> Entries added: ('Whiplash', 'https://www.azlyrics.com/lyrics/metallic

>> Entries added: ('Carpe Diem Baby', 'https://www.azlyrics.com/lyrics/metallica/carpediembaby.html', 12420, 36)
>> Entries added: ('Bad Seed', 'https://www.azlyrics.com/lyrics/metallica/badseed.html', 12420, 36)
>> Entries added: ('Where The Wild Things Are', 'https://www.azlyrics.com/lyrics/metallica/wherethewildthingsare.html', 12420, 36)
>> Entries added: ('Prince Charming', 'https://www.azlyrics.com/lyrics/metallica/princecharming.html', 12420, 36)
>> Entries added: ("Low Man's Lyric", 'https://www.azlyrics.com/lyrics/metallica/lowmanslyric.html', 12420, 36)
>> Entries added: ('Attitude', 'https://www.azlyrics.com/lyrics/metallica/attitude.html', 12420, 36)
>> Entries added: ('Fixxxer', 'https://www.azlyrics.com/lyrics/metallica/fixxxer.html', 12420, 36)
>> Entries added: ('compilation: Garage Inc.', 1998, 12420)
>> Entries added: ('Free Speech For The Dumb', 'https://www.azlyrics.com/lyrics/metallica/freespeechforthedumb.html', 12420, 37)
>> Entries added: ("It's Electric", 'http

>> Entries added: ('Murder One', 'https://www.azlyrics.com/lyrics/metallica/murderone.html', 12420, 42)
>> Entries added: ('Spit Out The Bone', 'https://www.azlyrics.com/lyrics/metallica/spitoutthebone.html', 12420, 42)
>> Entries added: ('Lords Of Summer(Deluxe Edition Bonus Track)', 'https://www.azlyrics.com/lyrics/metallica/lordsofsummer.html', 12420, 42)
>> Entries added: ('Ronnie Rising Medley(Deluxe Edition Bonus Track)', 'https://www.azlyrics.com/lyrics/metallica/ronnierisingmedley.html', 12420, 42)
>> Entries added: ('When A Blind Man Cries(Deluxe Edition Bonus Track)', 'https://www.azlyrics.com/lyrics/metallica/whenablindmancries.html', 12420, 42)
>> Entries added: ('Remember Tomorrow(Deluxe Edition Bonus Track)', 'https://www.azlyrics.com/lyrics/metallica/remembertomorrow.html', 12420, 42)
>> Entries added: ('album: 72 Seasons', 2023, 12420)
>> Entries added: ('72 Seasons', 'https://www.azlyrics.com/lyrics/metallica/72seasons.html', 12420, 43)
>> Entries added: ('Shadows Foll

<span style="font-family:Lucida Sans Unicode; color:#a10a0a; font-size: 25px"> ▼ Track Selection Interface </span>

In [47]:
%run py_utils/generic_utils.ipynb
%run py_utils/sqlite_utils.ipynb
%run py_utils/widgets_utils.ipynb

def retrieve_albums(change): 
    if selection_menu[0] == '-Artist dropdown': selection_menu.pop(0)
    artist_dropdown.options=selection_menu
    con, cur = connect_sqlite(db)
    artist_id.clear()
    artist_id.append([*cur.execute("SELECT artist_id FROM artists WHERE artist_name=?", (change.new, ))][0][0])
    albums = [data[0] for data in cur.execute("SELECT album_name FROM albums WHERE artist_id=?", (artist_id[0],))]
    con.close()
    album_dropdown.options = albums 
    album_dropdown.value = albums[0]

def retrieve_tracks(change):
    con, cur = connect_sqlite(db)
    album_id = [*cur.execute("SELECT album_id FROM albums WHERE album_name=? and artist_id=?", 
                             (change.new, artist_id[0], ))][0][0] 
    tracks = [data[0] for data in cur.execute("SELECT track_name FROM tracks WHERE album_id=?", (album_id,))]   
    con.close()
    track_dropdown.options = tracks 
    track_dropdown.value = tracks[0]
    
def selected_track(change):
    track_dropdown.value = change.new
    widget_output(output, track_dropdown.value)

def store_track(change):
    if track_dropdown.value != '-Track dropdown':
        user_tracks.append(track_dropdown.value)
        selected_artists.append(artist_dropdown.value)
        widget_output(output, 'Selection: • '+' • '.join(user_tracks))

def clear_data(change):
    user_tracks.pop()
    widget_output(output, 'Selection: • '+' • '.join(user_tracks))


db = "../database/azlyrics.db";  user_tracks = []; selected_artists =[]; artist_id = []; 
output = create_output(); selection_menu = user_artists
selection_menu.insert(0, '-Artist dropdown') if selection_menu[0]!="-Artist dropdown" else None
artist_dropdown = assign_dropdown(selection_menu, "Select Artist", arg_function=retrieve_albums, observe=1)    
album_dropdown = assign_dropdown(['-Album dropdown'], "Select Album", arg_function=retrieve_tracks, observe=1)    
track_dropdown = assign_dropdown(['-Track dropdown'], "Select Track", arg_function=selected_track, observe=1)

button_track = create_button("Store Track", arg_function=store_track)
button_reset = create_button("Clear Last Selection", arg_function=clear_data) 
dropdowns = create_gui(artist_dropdown, album_dropdown, track_dropdown, num=[0,1,2], wd='350', 
                       widget_type='H', gui_display=0)
buttons = create_gui(button_track, button_reset, num=[0,1], wd='250', widget_type='H', gui_display=0)
create_gui(dropdowns, buttons, output, widget_type='V', gui_display=1)


VBox(children=(HBox(children=(Dropdown(description='Select Artist', layout=Layout(width='350px'), options=('-A…

<span style="font-family:Lucida Sans Unicode; color:#a10a0a; font-size: 25px"> ▼ Scrape Lyrics and Update Database </span>

In [48]:
%run py_utils/sqlite_utils.ipynb
%run py_utils/widgets_utils.ipynb
%run crawlers/lyrics_scraping.ipynb
lyrics_data = {}; track_info = {}

def search_db_for_lyrics(con, cur, track, artist):  
    artist_id = [*cur.execute("SELECT artist_id FROM artists WHERE artist_name=?", (artist, ))][0][0]
    track_id = [*cur.execute("SELECT track_id FROM tracks WHERE track_name=? AND artist_id=?", (track, artist_id ))][0][0]
    lyrics = [*cur.execute("SELECT track_lyrics FROM tracks WHERE track_id=? AND artist_id=?", (track_id, artist_id, ))][0][0]
    track_info[track] = {'track_id': track_id, 'artist_id': artist_id}
    
    if lyrics == None:
        lyrics_data[track] = scrape_lyrics(con, cur, 
               [*cur.execute("SELECT track_path FROM tracks WHERE track_id=? ", (track_id, ))][0][0])
        store_lyrics(con, cur, track, track_id, lyrics_data[track])
        
    else: 
        print(">> Lyrics for '{}' already exists in database.".format(track))
        credits = [*cur.execute("SELECT track_credits FROM tracks WHERE track_id=? ", (track_id, ))][0][0] #
        lyrics_data[track] = {'lyrics': lyrics, 'credits': credits} #

def scrape_lyrics(con, cur, url):
    if url.startswith('https://www.azlyrics.com/lyrics'):
        return get_lyrics(url)
    elif url == '':
        return {'lyrics': 'Lyrics not available for this track.', 'credits': 'No credits mentioned.'}

def store_lyrics(con, cur, track, track_id, lyrics_data):
    lyrics_data['credits'] = lyrics_data['credits'].replace('Submit Corrections', '')
    
    try:
        query = "UPDATE tracks SET track_lyrics =?, track_credits =? WHERE track_id=?"
        run_query(cur, con, query, values=(lyrics_data['lyrics'], lyrics_data['credits'], track_id))
        print(">> Lyrics for '{}' has been updated.".format(track)) 
    except Exception as exc: print("! Exception: {}".format(exc))
    
con, cur = connect_sqlite("../database/azlyrics.db")
[search_db_for_lyrics(con, cur, track, selected_artists[i]) for i, track in enumerate(user_tracks)]
con.close()


>> Scraping: https://www.azlyrics.com/lyrics/metallica/itselectric.html
>> Lyrics for 'It's Electric' has been updated.
>> Scraping: https://www.azlyrics.com/lyrics/metallica/andjusticeforall.html
>> Lyrics for '...And Justice For All' has been updated.
>> Scraping: https://www.azlyrics.com/lyrics/mauve/falling.html
>> Lyrics for 'Falling' has been updated.


<span style="font-family:Lucida Sans Unicode; color:#a10a0a; font-size: 25px"> ▼ Lyrics Selection Interface </span>

In [55]:
%run py_utils/widgets_utils.ipynb
%run py_utils/sqlite_utils.ipynb

def retrieve_lyrics(change):
    track_dropdown.value = change.new

def load_lyrics(_):
    con, cur = connect_sqlite(db)
    track_id = track_info[track_dropdown.value]['track_id']
    artist_id = track_info[track_dropdown.value]['artist_id']
    try:
        lyrics = [*con.execute("SELECT track_lyrics FROM tracks WHERE track_id=? AND artist_id=?", (track_id, artist_id, ))][0][0]
        credits = [*con.execute("SELECT track_credits FROM tracks WHERE track_id=? AND artist_id=?", (track_id, artist_id, ))][0][0]
        album_id = [*con.execute("SELECT album_id FROM tracks WHERE track_id=? AND artist_id=?", (track_id, artist_id, ))][0][0]
        album_name = [*con.execute("SELECT album_name FROM albums WHERE album_id=? AND artist_id=?", (album_id, artist_id, ))][0][0]
        album_year = [*con.execute("SELECT album_year FROM albums WHERE album_id=?", (album_id, ))][0][0]
        artist_name = [*con.execute("SELECT artist_name FROM artists WHERE artist_id=?", (artist_id, ))][0][0]
        widget_sequence_output(output, [">>Track: '{}' by {}\n>>{} [{}]"
                                        .format(track_dropdown.value, artist_name, album_name, album_year),  
                                        '>>Lyrics:\n'+lyrics, credits])
    except Exception as exc: print("! Exception: {}".format(exc))
    finally: con.close()    

    
db = "../database/azlyrics.db"; output = create_output()
track_dropdown = assign_dropdown(user_tracks, "Select Track", arg_function=retrieve_lyrics, observe=1)   
lyrics_button = create_button("Load Discography", arg_function=load_lyrics)
dropdown_button = create_gui(track_dropdown, lyrics_button, widget_type='H', num=[0,1], wd='400', gui_display=0)
create_gui(dropdown_button, output, widget_type='V', num=[1], wd='400', gui_display=1)  


VBox(children=(HBox(children=(Dropdown(description='Select Track', layout=Layout(width='400px'), options=("It'…

<span style="font-family:Lucida Sans Unicode; color:#a10a0a; font-size: 25px"> ▼ Generate Database Docs </span>

In [57]:
def generate_docs(db, docs, title):
    file = open(docs, "w+")
    file.write(title + "\n")
    table_tmp = "|{}|{}|\n| :-: | :-: |\n".format("Column", "Description")

    con, cur = connect_sqlite(db)
    cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
    for table in cur.fetchall():
        table_name = table[0].upper()        
        file.write("### {}\n".format(table_name))
        file.write(table_tmp)
        
        cur.execute("SELECT * FROM {};".format(table_name))
        columns = [description[0] for description in cur.description]
        
        for column in columns:
            file.write("| {} | |\n".format(column))
        file.write('\n')
    print(">> Docs generated.")
    file.close()    
    
generate_docs("../database/azlyrics.db", "../docs/AZlyrics_template.md", "# AZlyrics Database Documentation")
    

>> Docs generated.
