In [1]:
import sqlite3
import pandas as pd

In [2]:
PATH_DB = './spotify.sqlite'
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [3]:
con = sqlite3.connect(f'file:{PATH_DB}?mode=ro', uri=True)
df_db = pd.read_sql_query('SELECT * FROM sqlite_master', con)
con.close()
# View database before optimization
df_db

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,albums,albums,2,"CREATE TABLE albums ([id], [name], [album_group], [album_type], [release_date], [popularity])"
1,table,artists,artists,3,"CREATE TABLE artists ([name], [id], [popularity], [followers])"
2,table,audio_features,audio_features,4,"CREATE TABLE audio_features ([id], [acousticness], [analysis_url], [danceability], [duration], [energy], [instrumentalness], [key], [liveness], [loudness], [mode], [speechiness], [tempo], [time_signature], [valence])"
3,table,genres,genres,5,CREATE TABLE genres ([id])
4,table,r_albums_artists,r_albums_artists,6,"CREATE TABLE r_albums_artists ([album_id], [artist_id])"
5,table,r_albums_tracks,r_albums_tracks,7,"CREATE TABLE r_albums_tracks ([album_id], [track_id])"
6,table,r_artist_genre,r_artist_genre,8,"CREATE TABLE r_artist_genre ([genre_id], [artist_id])"
7,table,r_track_artist,r_track_artist,9,"CREATE TABLE r_track_artist ([track_id], [artist_id])"
8,table,tracks,tracks,11,"CREATE TABLE tracks ([id], [disc_number], [duration], [explicit], [audio_feature_id], [name], [preview_url], [track_number], [popularity], [is_playable])"


In [4]:
con = sqlite3.connect(PATH_DB)
cur = con.cursor()

# Reset database to initial state
cur.execute('DROP TABLE IF EXISTS audio_features_indexed')
cur.execute('DROP TABLE IF EXISTS tracks_indexed')
cur.execute('DROP TABLE IF EXISTS albums_indexed')
cur.execute('DROP TABLE IF EXISTS artists_indexed')
con.commit()

# artists_indexed
cur.execute('''
    CREATE TABLE artists_indexed (
        id TEXT PRIMARY KEY,
        name BLOB NOT NULL,
        followers INTEGER,
        popularity INTEGER NOT NULL
    )
''')
cur.execute('''
    INSERT INTO artists_indexed (
        id,
        name,
        followers,
        popularity
    )
    SELECT
        id,
        name,
        followers,
        popularity
    FROM
        artists
''')
con.commit()

# albums_indexed
cur.execute('''
    CREATE TABLE albums_indexed (
        id TEXT PRIMARY KEY,
        name BLOB NOT NULL,
        album_type TEXT,
        release_date TEXT,
        popularity INTEGER NOT NULL
    )
''')
cur.execute('''
    INSERT INTO albums_indexed (
        id,
        name,
        album_type,
        release_date,
        popularity
    )
    SELECT
        id,
        name,
        album_type,
        release_date,
        popularity
    FROM
        albums
''')
con.commit()

# tracks_indexed
cur.execute('''
    CREATE TABLE tracks_indexed (
        id TEXT PRIMARY KEY,
        name BLOB NOT NULL,
        duration INTEGER,
        explicit INTEGER,
        audio_feature_id TEXT NOT NULL,
        popularity INTEGER NOT NULL
    )
''')
cur.execute('''
    INSERT INTO tracks_indexed (
        id,
        name,
        duration,
        explicit,
        audio_feature_id,
        popularity
    )
    SELECT
        tracks.id,
        tracks.name,
        tracks.duration,
        tracks.explicit,
        tracks.audio_feature_id,
        tracks.popularity
    FROM
        tracks
''')
con.commit()

# audio_features_indexed
cur.execute('''
    CREATE TABLE audio_features_indexed (
        id TEXT PRIMARY KEY,
        analysis_url TEXT,
        loudness REAL,
        tempo REAL,
        time_signature INTEGER,
        key INTEGER,
        mode INTEGER,
        acousticness REAL,
        danceability REAL,
        energy REAL,
        instrumentalness REAL,
        liveness REAL,
        speechiness REAL,
        valence REAL
    )
''')
cur.execute('''
    INSERT INTO audio_features_indexed (
        id,
        analysis_url,
        loudness,
        tempo,
        time_signature,
        key,
        mode,
        acousticness,
        danceability,
        energy,
        instrumentalness,
        liveness,
        speechiness,
        valence
    )
    SELECT
        id,
        analysis_url,
        loudness,
        tempo,
        time_signature,
        key,
        mode,
        acousticness,
        danceability,
        energy,
        instrumentalness,
        liveness,
        speechiness,
        valence
    FROM
        audio_features
''')
con.commit()

# Clean up
cur.execute('VACUUM')
con.commit()

cur.close()
con.close()

In [5]:
con = sqlite3.connect(f'file:{PATH_DB}?mode=ro', uri=True)
df_db = pd.read_sql_query('SELECT * FROM sqlite_master', con)
con.close()
# View database after optimization
df_db

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,albums,albums,2,"CREATE TABLE albums ([id], [name], [album_group], [album_type], [release_date], [popularity])"
1,table,artists,artists,3,"CREATE TABLE artists ([name], [id], [popularity], [followers])"
2,table,audio_features,audio_features,4,"CREATE TABLE audio_features ([id], [acousticness], [analysis_url], [danceability], [duration], [energy], [instrumentalness], [key], [liveness], [loudness], [mode], [speechiness], [tempo], [time_signature], [valence])"
3,table,genres,genres,5,CREATE TABLE genres ([id])
4,table,r_albums_artists,r_albums_artists,6,"CREATE TABLE r_albums_artists ([album_id], [artist_id])"
5,table,r_albums_tracks,r_albums_tracks,7,"CREATE TABLE r_albums_tracks ([album_id], [track_id])"
6,table,r_artist_genre,r_artist_genre,8,"CREATE TABLE r_artist_genre ([genre_id], [artist_id])"
7,table,r_track_artist,r_track_artist,9,"CREATE TABLE r_track_artist ([track_id], [artist_id])"
8,table,tracks,tracks,11,"CREATE TABLE tracks ([id], [disc_number], [duration], [explicit], [audio_feature_id], [name], [preview_url], [track_number], [popularity], [is_playable])"
9,table,artists_indexed,artists_indexed,13,"CREATE TABLE artists_indexed (\n id TEXT PRIMARY KEY,\n name BLOB NOT NULL,\n followers INTEGER,\n popularity INTEGER NOT NULL\n )"
