In [1]:
import os
import sys
import subprocess as sp
import json
import random
import time
import hashlib
import base64
from contextlib import closing

import mariadb
import mariadb.constants.CLIENT
import pymysql
import pymysql.constants.CLIENT

### Get a quick mariadb instance:
---

```bash

$ docker run --rm -it --name m105 --cpus 8 --network host -e MYSQL_ROOT_PASSWORD=bobz1234 \
--tmpfs /mem_vol/ mariadb:10.5 --datadir='/mem_vol' --port=10306 --innodb-buffer-pool-size=8GB

# mem db + sync off, ie a reporting db:
$ docker run --rm -it --name m105 --cpus 8 --network host -e MYSQL_ROOT_PASSWORD=bobz1234 \
--tmpfs /mem_vol/ mariadb:10.5 --datadir='/mem_vol' --port=10306 --innodb-buffer-pool-size=8GB \
--innodb-flush-log-at-trx-commit=2 --sync-binlog=0

```

In [2]:
# setting autocommit is diff between psycopg2, pymysql, or old mysql connector.
# the 1st is psycopg2, the latter 4 pymysql:
# dbconn.autocommit = True
# dbconn.autocommit(True)

In [3]:
_CONN_INFO = {
    
    "host": "127.0.0.1",
    "port": 10306,
    
    # "unix_socket": "/var/run/mysqld/mysqld.sock",
    # "unix_socket": "/run/mysqld/mysqld.sock",

    "user": "root",
    "password": "bobz1234",
    
    "database": "mysql",
}

_PYMYSQL_CONN_INFO = _CONN_INFO.copy()
_MARIADB_CONN_INFO = _CONN_INFO.copy()

# allow cur.execute to execute multiple statements
_PYMYSQL_CONN_INFO["client_flag"] = pymysql.constants.CLIENT.MULTI_STATEMENTS
_MARIADB_CONN_INFO["client_flag"] = mariadb.constants.CLIENT.MULTI_STATEMENTS

In [4]:
pymy_conn = pymysql.connect(**_PYMYSQL_CONN_INFO)

In [5]:
mdb_conn = pymysql.connect(**_MARIADB_CONN_INFO)

In [6]:
# autocommit info
print(pymy_conn.autocommit)  # its a method not field.
print(f"pymy autocommit status: {pymy_conn.autocommit_mode}")
pymy_conn.autocommit(True)
print(f"pymy autocommit status: {pymy_conn.autocommit_mode}")

# mdb
print(f"mdb autocommit status: {mdb_conn.autocommit_mode}")
mdb_conn.autocommit(True)
print(f"mdb autocommit status: {mdb_conn.autocommit_mode}")

<bound method Connection.autocommit of <pymysql.connections.Connection object at 0x7f43181c88d0>>
pymy autocommit status: False
pymy autocommit status: True
mdb autocommit status: False
mdb autocommit status: True


In [7]:
def exec_q(q):
    with closing(pymy_conn.cursor()) as tmp_cur:
        tmp_cur.execute(q)
        
        col_names = []
        if tmp_cur.description:
            for desc in tmp_cur.description:
                col_names.append(desc[0])
        if col_names:
            hdr = "|".join(col_names)
            print(hdr)
            print('-' * len(hdr))
        
        # rows
        results = tmp_cur.fetchall()
        if results:
            for row in results:
                print(row)
        # sep
        print("")


# Run some test queries

In [8]:
exec_q("SHOW DATABASES;")

Database
--------
('information_schema',)
('mysql',)
('performance_schema',)



In [9]:
exec_q("SELECT @@innodb_buffer_pool_size / (1024*1024) AS innodb_buffer_MB;")
# exec_q("SELECT @@innodb_buffer_pool_size / (1024*1024*1024) AS innodb_buffer_GB;")

innodb_buffer_MB
----------------
(Decimal('8192.0000'),)



In [10]:
exec_q("SELECT @@SQL_MODE;")
exec_q("SELECT @@GLOBAL.SQL_MODE")

@@SQL_MODE
----------
('STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION',)

@@GLOBAL.SQL_MODE
-----------------
('STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION',)



# Generate sample 

In [11]:
# mariadb INT type is ranged: -2 147 483 648 to 2 147 483 647
def get_random_int32():
    tmp = 2 * 1000 * 1000 * 1000
    return random.randint(0, tmp)

def get_random_int64():
    tmp = 80 * 1000 * 1000 * 1000 * 1000
    return random.randint(0, tmp)

def get_random_str(str_len=48):
    assert isinstance(str_len, int)
    assert str_len < 8192

    return base64.b64encode(os.urandom(str_len), altchars=b"AZ").decode('ascii')

In [24]:
tmp_table = []
for _ in range(5):
    tmp_table.append((get_random_str(48), get_random_int32()))

for row in tmp_table:
    print(row)

('G1MOZp2mIoB31ALG4AcRzin6c2Pm9EvMr8QeA1m85PpEAahpzRSucnIRfN1DZJp1', 1032680321)
('idQ4yb2lqQcrrEbYZrBOkO0cGHGRgeBjfZ7MZEEFbK84dQb0N9qA6d4QvFYdDpZl', 1368615492)
('djZSi0sAN06sy2CWG1TXtqxJkAiWqZ8GxAmkuw1oMjO6RKRCG809gTZmajSvJ37L', 1739498502)
('lWAii7R74gkXbDaStIcJfTq7hwhj71pYTaaIHfYaoopYdD4yCjvyBbWy9Sd5ccWv', 1692318112)
('fWlZts23keMDDgnKC9R3oD3ed1HqzFVlS6VZBcr1sE7HgN2tL2U7D3CeVYXukZec', 300110909)


In [16]:
len('KCrOThHmQkH87ADoG0cpN5SyToCn3sA2un5I8JofDa4IEZL8pAHgmRgPzZoeArfn')

64

# Create BENCHMARK tables

In [29]:
refresh_schema_q = """
DROP TABLE IF EXISTS xb_bench1;
DROP TABLE IF EXISTS xb_bench2;

CREATE TABLE IF NOT EXISTS xb_bench1(
     brid INT NOT NULL AUTO_INCREMENT,
     val_1 CHAR(64) NOT NULL,
     num_1 INT NOT NULL,   
     PRIMARY KEY (brid)
) AUTO_INCREMENT = 1000;

CREATE TABLE IF NOT EXISTS xb_bench2(
     brid INT NOT NULL AUTO_INCREMENT,
     val_1 VARCHAR(128) NOT NULL,
     num_1 INT NOT NULL,
     PRIMARY KEY (brid)
) AUTO_INCREMENT = 1000;
"""
exec_q(refresh_schema_q)




In [30]:
# test insert some records

In [31]:
pymy_cur = pymy_conn.cursor()

In [32]:
q = """ INSERT INTO xb_bench1(val_1, num_1) VALUES 
('Q8M5rzhQveJJClZAGhXtNmWCgRm2ETnSeZb2KdXy1vDVlnj3tYOlOZulh2Fc7T1p', '2120724'),
('q0kAiKIuAhhJKWlHdLZxhwh0CnF5vawN5YowLZx3AmbwIZID8lzzGkT2vfWTDAAo', '2020921');
"""

pymy_cur.execute(q)

2

In [33]:
exec_q("select * from xb_bench1;")

brid|val_1|num_1
----------------
(1000, 'Q8M5rzhQveJJClZAGhXtNmWCgRm2ETnSeZb2KdXy1vDVlnj3tYOlOZulh2Fc7T1p', 2120724)
(1001, 'q0kAiKIuAhhJKWlHdLZxhwh0CnF5vawN5YowLZx3AmbwIZID8lzzGkT2vfWTDAAo', 2020921)



In [34]:
q = "INSERT INTO xb_bench1(val_1, num_1) VALUES (%s, %s);"
record = ('onuqwddqobqubffqubf', 13400111)
pymy_cur.execute(q, record)

1

In [35]:
exec_q("select * from xb_bench1;")

brid|val_1|num_1
----------------
(1000, 'Q8M5rzhQveJJClZAGhXtNmWCgRm2ETnSeZb2KdXy1vDVlnj3tYOlOZulh2Fc7T1p', 2120724)
(1001, 'q0kAiKIuAhhJKWlHdLZxhwh0CnF5vawN5YowLZx3AmbwIZID8lzzGkT2vfWTDAAo', 2020921)
(1002, 'onuqwddqobqubffqubf', 13400111)



# BENCHMARK

In [22]:
# sample data
tmp_table = []
for _ in range(2000):
    tmp_table.append((get_random_str(48), get_random_int32()))

print('sample data:')
for row in tmp_table[:3]:
    print(row)

sample data:
('XRfeKamwZgkeM8cuq7zWNEIGqnmHdGhQz9CZC2lXlmQqXgRbnXyybLW3S0mlWb4u', 1090368357)
('CPVJpN6u6AggwN5qQN2C5Tmnp8ZvMRkUKsOLjOmcrRBtShZpwjyJdJAjhNZZmhAn', 975817519)
('2Z7z6WgnBiBtBjdeVCa8cTmWgueZNhi6Bo1kqxRDGrUyQRwyymHFxat6UyIkTpZN', 1715380455)


In [32]:
# ---------------------------------------- pymy execute
# drop tables and start fresh
tmp_cur = pymy_conn.cursor()
tmp_cur.execute(refresh_schema_q)

# sample data
num_records = 20 * 1000
tmp_table = []
for _ in range(num_records):
    tmp_table.append((get_random_str(48), get_random_int32()))

# query
q = "INSERT xb_bench1(val_1, num_1) VALUES (%s, %s);"

# *** start timer
start_time = time.perf_counter()
for record in tmp_table:
    tmp_cur.execute(q, record)

# stop time
elapsed_time = time.perf_counter() - start_time

print(f"time: {elapsed_time}")
print(f"ips: {num_records/elapsed_time}")

tmp_cur.execute("select count(*) from xb_bench1;")
print(f"\nActual count(*): {tmp_cur.fetchone()}")

tmp_cur.close()

time: 2.626749580958858
ips: 7613.9728526003155

Actual count(*): (20000,)


In [33]:
# ---------------------------------------- mdb execute
# drop tables and start fresh
tmp_cur = mdb_conn.cursor()
tmp_cur.execute(refresh_schema_q)

# sample data
num_records = 20 * 1000
tmp_table = []
for _ in range(num_records):
    tmp_table.append((get_random_str(48), get_random_int32()))

# query
q = "INSERT xb_bench1(val_1, num_1) VALUES (%s, %s);"

# *** start timer
start_time = time.perf_counter()
for record in tmp_table:
    tmp_cur.execute(q, record)
# stop timer
elapsed_time = time.perf_counter() - start_time

print(f"time: {elapsed_time}")
print(f"ips: {num_records/elapsed_time}")

tmp_cur.execute("select count(*) from xb_bench1;")
print(f"actual count(*): {tmp_cur.fetchone()}")

tmp_cur.close()

time: 2.6188691640272737
ips: 7636.88399356468
actual count(*): (20000,)


In [35]:
# ---------------------------------------- pymy executemany
# drop tables and start fresh
tmp_cur = pymy_conn.cursor()
tmp_cur.execute(refresh_schema_q)

# sample data
num_records = 1 * 1000 * 1000
tmp_table = []
for _ in range(num_records):
    tmp_table.append((get_random_str(48), get_random_int32()))

print(f"sample data rdy ...")
# query
q = "INSERT xb_bench1(val_1, num_1) VALUES (%s, %s);"

# *** start timer
start_time = time.perf_counter()
tmp_cur.executemany(q, tmp_table)

# stop time
elapsed_time = time.perf_counter() - start_time

print(f"time: {elapsed_time}")
print(f"ips: {num_records/elapsed_time}")

tmp_cur.execute("select count(*) from xb_bench1;")
print(f"\nActual count(*): {tmp_cur.fetchone()}")

tmp_cur.close()

sample data rdy ...
time: 9.793820724007674
ips: 102105.19757102474

Actual count(*): (1000000,)


In [36]:
# ---------------------------------------- mdb executemany
# drop tables and start fresh
tmp_cur = mdb_conn.cursor()
tmp_cur.execute(refresh_schema_q)

# sample data
num_records = 1 * 1000 * 1000
tmp_table = []
for _ in range(num_records):
    tmp_table.append((get_random_str(48), get_random_int32()))

print(f"sample data rdy ...")
# query
q = "INSERT xb_bench1(val_1, num_1) VALUES (%s, %s);"

# *** start timer
start_time = time.perf_counter()
tmp_cur.executemany(q, tmp_table)

# stop time
elapsed_time = time.perf_counter() - start_time

print(f"time: {elapsed_time}")
print(f"ips: {num_records/elapsed_time}")

tmp_cur.execute("select count(*) from xb_bench1;")
print(f"\nActual count(*): {tmp_cur.fetchone()}")

tmp_cur.close()

sample data rdy ...
time: 9.574791940045543
ips: 104440.91174635419

Actual count(*): (1000000,)


# Conclusions
---