In [None]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
import sqlite3

In [None]:
PATH_DB = './spotify.sqlite'

In [None]:
con = sqlite3.connect(f'file:{PATH_DB}?mode=ro', uri=True)
df_db = pd.read_sql_query('''
    SELECT
        name, sql
    FROM
        sqlite_master
''', con)
con.close()
# View the database before optimization
df_db

In [None]:
con = sqlite3.connect(PATH_DB)
con.text_factory = bytes
cur = con.cursor()

print('Resetting database to initial state...')
cur.execute('DROP TABLE IF EXISTS audio_features_indexed')
cur.execute('DROP TABLE IF EXISTS tracks_indexed')
cur.execute('DROP TABLE IF EXISTS albums_indexed')
cur.execute('DROP TABLE IF EXISTS artists_indexed')
cur.execute('DROP TABLE IF EXISTS r_artist_genre_indexed')
cur.execute('DROP TABLE IF EXISTS r_albums_tracks_indexed')
cur.execute('DROP TABLE IF EXISTS r_track_artist_indexed')
con.commit()

# r_track_artist_indexed
print('\nr_track_artist_indexed')
cur.execute('''
    CREATE TABLE IF NOT EXISTS r_track_artist_indexed (
        track_id TEXT NOT NULL,
        artist_id TEXT NOT NULL,
        PRIMARY KEY (track_id, artist_id)
    )
''')
print('Copying data...')
cur.execute('''
    INSERT INTO r_track_artist_indexed (
        track_id,
        artist_id
    )
    SELECT
        track_id,
        artist_id
    FROM
        r_track_artist
    WHERE
        track_id IS NOT NULL
        AND artist_id IS NOT NULL
''')
con.commit()

# r_albums_tracks_indexed
print('\nr_albums_tracks_indexed')
cur.execute('''
    CREATE TABLE r_albums_tracks_indexed (
        album_id TEXT NOT NULL,
        track_id TEXT NOT NULL,
        PRIMARY KEY (album_id, track_id)
    )
''')
print('Copying data...')
cur.execute('''
    INSERT INTO r_albums_tracks_indexed (
        album_id,
        track_id
    )
    SELECT
        album_id,
        track_id
    FROM
        r_albums_tracks
    WHERE
        album_id IS NOT NULL
        AND track_id IS NOT NULL
''')
con.commit()

# r_artist_genre_indexed
print('\nr_artist_genre_indexed')
cur.execute('''
    CREATE TABLE r_artist_genre_indexed (
        artist_id TEXT NOT NULL,
        genre TEXT NOT NULL,
        PRIMARY KEY (artist_id, genre)
    )
''')
print('Copying data...')
cur.execute('''
    INSERT INTO r_artist_genre_indexed (
        artist_id,
        genre
    )
    SELECT
        artist_id,
        genre_id
    FROM
        r_artist_genre
    WHERE
        artist_id IS NOT NULL
        AND genre_id IS NOT NULL
''')
con.commit()

# artists_indexed
print('\nartists_indexed')
cur.execute('''
    CREATE TABLE artists_indexed (
        id TEXT,
        name TEXT NOT NULL,
        followers INTEGER NOT NULL,
        popularity INTEGER NOT NULL,
        PRIMARY KEY (id)
    )
''')
print('Copying data...')
cur.execute('''
    INSERT INTO artists_indexed (
        id,
        name,
        followers,
        popularity
    )
    SELECT
        id,
        name,
        followers,
        popularity
    FROM
        artists
    WHERE
        id IS NOT NULL
        AND name IS NOT NULL
        AND followers IS NOT NULL
        AND popularity IS NOT NULL
''')
con.commit()

# albums_indexed
print('\nalbums_indexed')
cur.execute('''
    CREATE TABLE albums_indexed (
        id TEXT,
        name TEXT NOT NULL,
        album_type TEXT NOT NULL,
        release_date TEXT NOT NULL,
        popularity INTEGER NOT NULL,
        PRIMARY KEY (id)
    )
''')
print('Copying data...')
cur.execute('''
    INSERT INTO albums_indexed (
        id,
        name,
        album_type,
        release_date,
        popularity
    )
    SELECT
        id,
        name,
        album_type,
        release_date,
        popularity
    FROM
        albums
    WHERE
        id IS NOT NULL
        AND name IS NOT NULL
        AND album_type IS NOT NULL
        AND release_date IS NOT NULL
        AND popularity IS NOT NULL
''')
con.commit()

# tracks_indexed
print('\ntracks_indexed')
cur.execute('''
    CREATE TABLE tracks_indexed (
        id TEXT,
        name TEXT NOT NULL,
        duration INTEGER NOT NULL,
        explicit INTEGER NOT NULL,
        popularity INTEGER NOT NULL,
        PRIMARY KEY (id)
    )
''')
print('Copying data...')
cur.execute('''
    INSERT INTO tracks_indexed (
        id,
        name,
        duration,
        explicit,
        popularity
    )
    SELECT
        id,
        name,
        duration,
        explicit,
        popularity
    FROM
        tracks
    WHERE
        id IS NOT NULL
        AND name IS NOT NULL
        AND duration IS NOT NULL
        AND explicit IS NOT NULL
        AND popularity IS NOT NULL
        AND audio_feature_id = id
''')
con.commit()

# audio_features_indexed
print('\naudio_features_indexed')
cur.execute('''
    CREATE TABLE audio_features_indexed (
        id TEXT,
        loudness REAL NOT NULL,
        tempo REAL NOT NULL,
        time_signature INTEGER NOT NULL,
        key INTEGER NOT NULL,
        mode INTEGER NOT NULL,
        acousticness REAL NOT NULL,
        danceability REAL NOT NULL,
        energy REAL NOT NULL,
        instrumentalness REAL NOT NULL,
        liveness REAL NOT NULL,
        speechiness REAL NOT NULL,
        valence REAL NOT NULL,
        PRIMARY KEY (id),
        UNIQUE (analysis_url)
    )
''')
print('Copying data...')
cur.execute('''
    INSERT INTO audio_features_indexed (
        id,
        loudness,
        tempo,
        time_signature,
        key,
        mode,
        acousticness,
        danceability,
        energy,
        instrumentalness,
        liveness,
        speechiness,
        valence
    )
    SELECT
        id,
        loudness,
        tempo,
        time_signature,
        key,
        mode,
        acousticness,
        danceability,
        energy,
        instrumentalness,
        liveness,
        speechiness,
        valence
    FROM
        audio_features
    WHERE
        id IS NOT NULL
        AND loudness IS NOT NULL
        AND tempo IS NOT NULL
        AND time_signature IS NOT NULL
        AND key IS NOT NULL
        AND mode IS NOT NULL
        AND acousticness IS NOT NULL
        AND danceability IS NOT NULL
        AND energy IS NOT NULL
        AND instrumentalness IS NOT NULL
        AND liveness IS NOT NULL
        AND speechiness IS NOT NULL
        AND valence IS NOT NULL
''')
con.commit()

# Clean up
print('\nCleaning up database...')
cur.execute('VACUUM')
con.commit()

cur.close()
con.close()

In [None]:
con = sqlite3.connect(f'file:{PATH_DB}?mode=ro', uri=True)
df_db = pd.read_sql_query('''
    SELECT
        name, sql
    FROM
        sqlite_master
''', con)
con.close()
# View the database after optimization
df_db