# Install Arcion, YCSB and SQL Server

In [3]:
%pip install ipywidgets
from libpython.arcion_control import *    
from libpython.ycsb_control import *    
show_arcion_config()
show_ycsb_config()

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [16]:
import subprocess
print (subprocess.run("bin/download-jars.sh",stdout=subprocess.PIPE).stdout.decode('utf-8'))
print (subprocess.run("bin/install-arcion.sh",stdout=subprocess.PIPE).stdout.decode('utf-8'))
print (subprocess.run("bin/install-ycsb.sh",stdout=subprocess.PIPE).stdout.decode('utf-8'))
print (subprocess.run("bin/install-sqlserver.sh",stdout=subprocess.PIPE).stdout.decode('utf-8'))

deltalake /opt/stage/libs/SparkJDBC42.jar found
lakehouse  /opt/stage/libs/DatabricksJDBC42.jar found
postgres  /opt/stage/libs/postgresql-42.7.1.jar found
mariadb  /opt/stage/libs/mariadb-java-client-3.3.2.jar found
oracle /opt/stage/libs/ojdbc8.jar found
log4j /opt/stage/libs/log4j-1.2.17.jar found

arcion  /opt/stage/arcion/replicant-cli/bin/replicant found
checking jar(s) in /opt/stage/arcion/24.01.25.1/lib for updates
checking jar(s) in /opt/stage/arcion/replicant-cli/lib for updates
checking jar(s) in /opt/stage/arcion/replicate-cli-23.05.31.29/lib for updates
checking jar(s) in /opt/stage/arcion/23.05.31.31/lib for updates
checking jar(s) in /opt/stage/arcion/23.09.29.11/lib for updates

YCSB  /opt/stage/ycsb/ycsb-jdbc-binding-0.18.0-SNAPSHOT  found
checking jar(s) in /opt/stage/ycsb/ycsb-jdbc-binding-0.18.0-SNAPSHOT/lib for updates

sqlserver found



# Customize YCSB workload characteristics

In [15]:
from libpython.ycsb_control import *    
VBox([HBox([Label('Sparse'), sparse_cnt, sparse_fields, sparse_field_len]),
    HBox([Label('Dense'),  dense_cnt,  dense_fields, dense_field_len])])

VBox(children=(HBox(children=(Label(value='Sparse'), BoundedIntText(value=1, description='Instances:', min=1),…

## Create SQL Server user, create and load YCSB data sets

In [6]:
import subprocess
print (sparse_cnt.value)

print (subprocess.run(""". ./demo/sqlserver/run-ycsb-sqlserver-source.sh; 
    create_user;
    load_dense_data;
    load_sparse_data""",
    shell=True,executable="/usr/bin/bash",stdout=subprocess.PIPE).stdout.decode('utf-8'))


3



real	0m0.119s
user	0m0.082s
sys	0m0.020s


replicant
24.01.25.1 24.01
PATH=/opt/stage/bin/jsqsh-dist-3.0-SNAPSHOT/bin added
Msg 15025, Level 16, State 1, Server ron, Line 2
The server principal 'arcsrc' already exists.
Msg 1801, Level 16, State 3, Server ron, Line 1
Database 'arcsrc' already exists. Choose a different database name.
Changed database context to 'arcsrc'.
Msg 15023, Level 16, State 5, Server ron, Line 1
User, group, or role 'arcsrc' already exists in the current database.
Starting dense table 1
CREATE TABLE YCSBDENSE (
	YCSB_KEY INT,
	FIELD0 TEXT, FIELD1 TEXT,
	FIELD2 TEXT, FIELD3 TEXT,
	FIELD4 TEXT, FIELD5 TEXT,
	FIELD6 TEXT, FIELD7 TEXT,
	FIELD8 TEXT, FIELD9 TEXT,
	PRIMARY KEY (YCSB_KEY)
)
go
Msg 2714, Level 16, State 6, Server ron, Line 2
There is already an object named 'YCSBDENSE' in the database.
14.0
11
1       SQLCHAR             0       12      ","    1     YCSB_KEY                             ""
2       SQLCHAR             0       0       ","    2     FIELD0                               SQL_Latin1_Gene


real	0m0.196s
user	0m0.143s
sys	0m0.004s


# Run YCSB and Arcion in the background

## Start/Restart YCSB workload at 1 TPS
1. Adjust the TPS (throughput per second) via the UI
   1. 0=fast as possible
   2. 1=1 TPS
   3. 10=10 TPS
2. Run YCSB for sparse and dense tables

In [14]:
from libpython.ycsb_control import *    
VBox([HBox([Label('Sparse'), sparse_tps, sparse_threads]), HBox([Label('Dense'),  dense_tps, dense_threads])])

VBox(children=(HBox(children=(Label(value='Sparse'), BoundedIntText(value=1, description='TPS:', max=1000), Bo…

In [21]:
import subprocess
from libpython.ycsb_control import *    

# print(f"y_target_sparse={sparse_tps.value} y_target_dense={dense_tps.value} y_threads_sparse={sparse_threads.value} y_threads_dense={dense_threads.value}")
print (subprocess.run(f""". ./demo/sqlserver/run-ycsb-sqlserver-source.sh; 
    kill_ycsb;
    y_target_sparse={sparse_tps.value} y_target_dense={dense_tps.value} y_threads_sparse={sparse_threads.value} y_threads_dense={dense_threads.value} start_ycsb;""",
    shell=True,executable="/usr/bin/bash",stdout=subprocess.PIPE).stdout.decode('utf-8'))

replicant
24.01.25.1 24.01
PATH=/opt/stage/bin/jsqsh-dist-3.0-SNAPSHOT/bin added
dense
ycsb dense pid 210365
ycsb dense log is at /home/rslee/github/dbx/ingestion/demo/sqlserver/logs/ycsb.dense.log
ycsb dense can be killed with . ./demo/sqlserver/run-ycsb-sqlserver-source.sh; kill_recurse $(cat $PROG_DIR/logs/ycsb.dense.pid)
sparse
ycsb sparse pid 210369
ycsb sparse log is at /home/rslee/github/dbx/ingestion/demo/sqlserver/logs/ycsb.sparse.log
ycsb sparse can be killed with . ./demo/sqlserver/run-ycsb-sqlserver-source.sh; kill_recurse $(cat $PROG_DIR/logs/ycsb.sparse.pid)



## Start Arcion

In [23]:
HBox([Label('Arcion'), repl_mode, cdc_mode])

HBox(children=(Label(value='Arcion'), Dropdown(description='Replication:', options=('snapshot', 'real-time', '…

In [17]:
import subprocess

print (f"""{cdc_mode.value} {repl_mode.value}""")

print (subprocess.run(f""". ./demo/sqlserver/run-ycsb-sqlserver-source.sh; 
    echo $PROG_DIR;
    cd $PROG_DIR;
    kill_arcion;
    a_repltype={repl_mode.value} start_{cdc_mode.value}_arcion;""",
    shell=True,executable="/usr/bin/bash",stdout=subprocess.PIPE).stdout.decode('utf-8'))


change snapshot
replicant
24.01.25.1 24.01
PATH=/opt/stage/bin/jsqsh-dist-3.0-SNAPSHOT/bin added
/home/rslee/github/dbx/ingestion/demo/sqlserver
replicant
arcion pid 181597
arcion log is at /home/rslee/github/dbx/ingestion/demo/sqlserver/logs/arcion.log
arcion can be killed with . ./demo/sqlserver/run-ycsb-sqlserver-source.sh; kill_recurse $(cat $PROG_DIR/logs/arcion.pid)



In [9]:

cluster_id = spark.conf.get("spark.databricks.clusterUsageTags.clusterId")

workspace_id =spark.conf.get("spark.databricks.clusterUsageTags.clusterOwnerOrgId")

# clusterName = spark.conf.get("spark.databricks.clusterUsageTags.clusterName")

workspaceUrl = spark.conf.get("spark.databricks.workspaceUrl") # host name

http_path = f"sql/protocolv1/o/{workspace_id}/{cluster_id}"

spark_url=f"jdbc:spark://{workspaceUrl}:443/default;transportMode=http;ssl=1;httpPath={http_path};AuthMech=3"
databricks_url=f"jdbc:databricks://{workspaceUrl}:443/default;transportMode=http;ssl=1;httpPath={http_path};AuthMech=3"

NameError: name 'spark' is not defined