In [1]:
import configparser
import sys
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sqlalchemy
import sqlite3

%load_ext sql

In [2]:
sys.path.append(str(Path.cwd().parent / 'src'))

import sqlite_utils

In [3]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.style.use('raph-base')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 800)

np.random.seed(666)

In [4]:
print(sys.executable)
print(sys.version)
print(f'Pandas {pd.__version__}')

C:\Users\r2d4\miniconda3\envs\py3\python.exe
3.8.3 (default, May 19 2020, 06:50:17) [MSC v.1916 64 bit (AMD64)]
Pandas 1.1.3


## Connect to DB

- With sqlite you can connect very lightweight to your DB (see first cell below). 
- But to be able to use sqlalchemy or ipython-sql you need a 'proper' connection string, see second cell.

In [5]:
# Connect to DiscoBase.db the sqlite way

db_path = Path.cwd().parent / "db_aka_discobase/DiscoBase.db"
conn = sqlite3.connect(db_path, isolation_level=None)  # auto commit mode
cur = conn.cursor()

conn, cur

(<sqlite3.Connection at 0x217167dc8a0>, <sqlite3.Cursor at 0x2171686b260>)

In [6]:
# Create a connection string and engine for sqlalchemy and ipython-sql

sqlite_conn_str = f"sqlite:///{db_path}"

sqlite_conn_str

'sqlite:///C:\\Users\\r2d4\\OneDrive\\code\\projects\\20-02_disco\\db_aka_discobase\\DiscoBase.db'

In [7]:
# "Connect" to sqlalchemy 

engine = sqlalchemy.create_engine(sqlite_conn_str)

engine

Engine(sqlite:///C:\Users\r2d4\OneDrive\code\projects\20-02_disco\db_aka_discobase\DiscoBase.db)

In [8]:
# Connect to ipython-sql

%sql $sqlite_conn_str

'Connected: @C:\\Users\\r2d4\\OneDrive\\code\\projects\\20-02_disco\\db_aka_discobase\\DiscoBase.db'

## Query DB

I show 5 of the possible ways to query the DB
1. using ipython-sql
2. using sqlite functionality 'raw'
3. using my query_read wrapper from sqlite_utils
4. using pandas with sqlite connection
5. using pandas with sqlalchemy engine (sqlalchemy core, but not SQL Expression Language)
6. (using sqlalchemy SQL Expression Language)
7. (using sqlalchemy ORM)


In [9]:
query = (
    """
    SELECT * 
      FROM records 
      -- WHERE title = 'Only Self'
      LIMIT 3
    ;"""
)

In [10]:
# 1. Read using sqlite connection
# Returns a cursor containing the data tuples

%sql $query

 * sqlite:///C:\Users\r2d4\OneDrive\code\projects\20-02_disco\db_aka_discobase\DiscoBase.db
Done.


record_id,artist_id,title,genre_id,label,year,format_id,vinyl_color,lim_edition,number,remarks,purchase_date,price,digitized,rating,active
1,1,Ultimate Aggression,1,,2020,1,red,200,,blah blah,5,20.0,1,9,1


In [11]:
# 2. Read using sqlite connection
# Returns a cursor containing the data tuples

with conn:
    data = conn.execute(query)
    for row in data:
        print(row)

(1, 1, 'Ultimate Aggression', 1, None, 2020, 1, 'red', '200', None, 'blah blah', '5', 20.0, 1, 9, 1)


In [12]:
# 3. Read using cursor and my utils function
# Returns a list of tuples

# print(sqlite_utils.query_read(query, cur))

In [13]:
# 4. Read with pandas, using sqlite connection
# Returns a dataframe

pd.read_sql(query, conn, index_col="record_id")

Unnamed: 0_level_0,artist_id,title,genre_id,label,year,format_id,vinyl_color,lim_edition,number,remarks,purchase_date,price,digitized,rating,active
record_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,1,Ultimate Aggression,1,,2020,1,red,200,,blah blah,5,20.0,1,9,1


In [14]:
# 5. Read with pandas, using sqlalchemy engine
# Returns a dataframe

pd.read_sql(query, conn, index_col="record_id")

Unnamed: 0_level_0,artist_id,title,genre_id,label,year,format_id,vinyl_color,lim_edition,number,remarks,purchase_date,price,digitized,rating,active
record_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,1,Ultimate Aggression,1,,2020,1,red,200,,blah blah,5,20.0,1,9,1


## Inspect sqlite Metadata

In [16]:
query = ("""
SELECT 
  m.name AS table_name, 
  p.name AS column_name,
  p.type AS data_type,
  p.'notnull' AS null_constraint,
  p.dflt_value AS default_value,
  p.pk AS primary_key
FROM 
  sqlite_master AS m
JOIN 
  pragma_table_info(m.name) AS p
ORDER BY 
  m.name, 
  p.cid
;"""
)

In [17]:
%sql $query

 * sqlite:///C:\Users\r2d4\OneDrive\code\projects\20-02_disco\db_aka_discobase\DiscoBase.db
Done.


table_name,column_name,data_type,null_constraint,default_value,primary_key
artist_genre_link,artist_id,INTEGER,1,,1
artist_genre_link,genre_id,INTEGER,1,,2
artist_label_link,artist_id,INTEGER,1,,1
artist_label_link,label_id,INTEGER,1,,2
artists,artist_id,INTEGER,1,,1
artists,artist_name,TEXT,1,,0
artists,artist_country,TEXT,0,,0
formats,format_id,INTEGER,1,,1
formats,format_name,TEXT,1,,0
genre_label_link,genre_id,INTEGER,1,,1


In [18]:
conn.close()