In [None]:
# default_exp core

# 01 Prodb Benchmarks

> Which database is faster? The highly optimised SQLAlchemy library, or the library that literally saves and loads the entire csv every time you modify it?

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#hide
import pandas as pd
import arrow
import os
from time import gmtime, strftime
import sys; sys.path.append('../')
from prodb.core import generate_db, insert_row, insert_rows, utc_now, readable_df

# 1. Prodb API Benchmarks

In [None]:
# ================================================================ #
data = {'name': ['Sam', 'Grant'],
        'mood': ['😊', '😵'],
        'message': ['hello from London, UK', 'hello from Christchurch, NZ'],
        'time_utc' : [utc_now(), utc_now()]}
# ================================================================ #

Duration to insert, append, save, and reload the dataframe 100 times.

In [None]:
%%time
df = generate_db(dbpath='benchmark_db.csv', cols='name mood message'.split())
for i in range(100):
    df = insert_rows(df, data)
print(df.shape)
display(df.tail())

✓💾 benchmark_db.csv (19 kb)
(200, 4)


Unnamed: 0,name,mood,message,time_utc
195,Grant,😵,"hello from Christchurch, NZ",2021-11-06 20:53:58
196,Sam,😊,"hello from London, UK",2021-11-06 20:53:58
197,Grant,😵,"hello from Christchurch, NZ",2021-11-06 20:53:58
198,Sam,😊,"hello from London, UK",2021-11-06 20:53:58
199,Grant,😵,"hello from Christchurch, NZ",2021-11-06 20:53:58


Wall time: 168 ms


In [None]:
df.to_csv('tmp.csv')
print(df.shape)

(200, 4)


In [None]:
%%timeit
df = pd.read_csv('tmp.csv')

1.24 ms ± 24.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
%%timeit
df.iloc[50, :]

78.2 µs ± 778 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
df.head()

Unnamed: 0,name,mood,message,time_utc
0,Sam,😊,"hello from London, UK",2021-11-06 20:53:58
1,Grant,😵,"hello from Christchurch, NZ",2021-11-06 20:53:58
2,Sam,😊,"hello from London, UK",2021-11-06 20:53:58
3,Grant,😵,"hello from Christchurch, NZ",2021-11-06 20:53:58
4,Sam,😊,"hello from London, UK",2021-11-06 20:53:58


# 2. SQAlchemy / SQLModel Benchmarks
`SQLModel` code and functions from [lukexyz/sqlmodel-streamlit/app.py](https://github.com/lukexyz/sqlmodel-streamlit/blob/main/app.py)

In [None]:
from sqlmodel import Field, Session, SQLModel, create_engine, select
from typing import Optional


class Hero(SQLModel, table=True):
    __table_args__ = {'extend_existing': True}  # required for streamlit refreshing
    id: Optional[int] = Field(default=None, primary_key=True)
    name: str
    secret_name: str
    age: Optional[int] = None


import logging
logging.basicConfig()
logging.getLogger('sqlalchemy').setLevel(logging.ERROR)


def create_db_and_tables():
    SQLModel.metadata.create_all(engine)


def commit_heroes():
    hero_1 = Hero(name="Rusty-Man", secret_name="Tommy Sharp", age=36)
    hero_2 = Hero(name="Dr. Weird", secret_name="Steve Weird", age=33)
    with Session(engine) as session:
        session.add(hero_1)
        session.add(hero_2)
        session.commit()


def get_db_size():
    with Session(engine) as session:
        heroes = session.exec(select(Hero)).all()
    return len(heroes)


def select_heros():
    with Session(engine) as session:
        statement = select(Hero).where(Hero.age <= 35)
        results = session.exec(statement)
        for hero in results:
            print(hero)


def show_table():
    with Session(engine) as session:
        heroes = session.exec(select(Hero)).all()
        display(pd.DataFrame([s.dict() for s in heroes[-5:]]))

def get_table():
    with Session(engine) as session:
        heroes = session.exec(select(Hero)).all()
        return pd.DataFrame([s.dict() for s in heroes])

def delete_db():
    with Session(engine) as session:
        heroes = session.exec(select(Hero)).all()
        for hero in heroes:
            session.delete(hero)  
        session.commit()  

def commit_new_row():
    hero = Hero(name='Luke', secret_name='Luke Woods', age=23)
    hero_1 = Hero(name="Rusty-Man", secret_name="Tommy Sharp", age=36)
    with Session(engine) as session:
        session.add(hero)
        session.add(hero_1)
        session.commit()

  DeclarativeMeta.__init__(cls, classname, bases, dict_used, **kw)


In [None]:
create_db_and_tables()
commit_heroes()
show_table()
print(f'db length {get_db_size()}')

2021-11-06 20:54:40,005 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-11-06 20:54:40,005 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("hero")
2021-11-06 20:54:40,006 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-11-06 20:54:40,007 INFO sqlalchemy.engine.Engine COMMIT
2021-11-06 20:54:40,010 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-11-06 20:54:40,011 INFO sqlalchemy.engine.Engine INSERT INTO hero (name, secret_name, age) VALUES (?, ?, ?)
2021-11-06 20:54:40,011 INFO sqlalchemy.engine.Engine [generated in 0.00044s] ('Rusty-Man', 'Tommy Sharp', 36)
2021-11-06 20:54:40,013 INFO sqlalchemy.engine.Engine INSERT INTO hero (name, secret_name, age) VALUES (?, ?, ?)
2021-11-06 20:54:40,014 INFO sqlalchemy.engine.Engine [cached since 0.002923s ago] ('Dr. Weird', 'Steve Weird', 33)
2021-11-06 20:54:40,015 INFO sqlalchemy.engine.Engine COMMIT
2021-11-06 20:54:40,152 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-11-06 20:54:40,153 INFO sqlalchemy.engine.Engine SELECT

Unnamed: 0,age,id,name,secret_name
0,33,18,Dr. Weird,Steve Weird
1,36,19,Rusty-Man,Tommy Sharp
2,33,20,Dr. Weird,Steve Weird
3,36,21,Rusty-Man,Tommy Sharp
4,33,22,Dr. Weird,Steve Weird


2021-11-06 20:54:40,159 INFO sqlalchemy.engine.Engine ROLLBACK
2021-11-06 20:54:40,160 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-11-06 20:54:40,161 INFO sqlalchemy.engine.Engine SELECT hero.id, hero.name, hero.secret_name, hero.age 
FROM hero
2021-11-06 20:54:40,162 INFO sqlalchemy.engine.Engine [no key 0.00067s] ()
2021-11-06 20:54:40,164 INFO sqlalchemy.engine.Engine ROLLBACK
db length 22


### Run 100 iteration benchmark

In [None]:
%%time
delete_db()
create_db_and_tables()
for k in range(10):
    commit_heroes()
    #write_new_row()  # add two rows to db
df = get_table()
print(df.shape)
df.tail()

2021-11-06 20:54:51,549 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-11-06 20:54:51,550 INFO sqlalchemy.engine.Engine SELECT hero.id, hero.name, hero.secret_name, hero.age 
FROM hero
2021-11-06 20:54:51,551 INFO sqlalchemy.engine.Engine [no key 0.00044s] ()
2021-11-06 20:54:51,553 INFO sqlalchemy.engine.Engine DELETE FROM hero WHERE hero.id = ?
2021-11-06 20:54:51,554 INFO sqlalchemy.engine.Engine [generated in 0.00063s] ((1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,)  ... displaying 10 of 22 total bound parameter sets ...  (21,), (22,))
2021-11-06 20:54:51,555 INFO sqlalchemy.engine.Engine COMMIT
2021-11-06 20:54:51,684 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-11-06 20:54:51,685 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("hero")
2021-11-06 20:54:51,685 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-11-06 20:54:51,686 INFO sqlalchemy.engine.Engine COMMIT
2021-11-06 20:54:51,687 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-11-06 20:54:51,688 INFO sqla

Unnamed: 0,age,id,name,secret_name
15,33,16,Dr. Weird,Steve Weird
16,36,17,Rusty-Man,Tommy Sharp
17,33,18,Dr. Weird,Steve Weird
18,36,19,Rusty-Man,Tommy Sharp
19,33,20,Dr. Weird,Steve Weird


# SQLAlchemy
* Code from the good man Jcharis on [github](https://github.com/Jcharis/streamlit_todo_crud_app/blob/main/db_fxns.py)

In [None]:
#!pip install sqlalchemy

In [None]:
import pandas as pd
import json
import sqlite3

In [None]:
conn = sqlite3.connect('sqlite.db',check_same_thread=False)
c = conn.cursor()

def create_table():
    c.execute('CREATE TABLE IF NOT EXISTS taskstable(name TEXT,mood TEXT,message TEXT)')

def add_data(name,mood,message):
    c.execute('INSERT INTO taskstable(name,mood,message) VALUES (?,?,?)',(name,mood,message))
    conn.commit()
    
def view_all_data():
    c.execute('SELECT * FROM taskstable')
    data = c.fetchall()
    return data


In [None]:
create_table()

In [None]:
name = "luke"
mood = "wow"
message = "a real database"
add_data(name, mood, message)

In [None]:
view_all_data()

[('luke', 'wow', 'a real database')]

In [None]:
%%time
for i in range(100):
    add_data(name, mood, message)
res = view_all_data()

Wall time: 11.1 s
