Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 89 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,94 @@ The structure of the repo is:
All the tests were run using [MongoDB Atlas](https://www.mongodb.com/cloud/atlas?jmp=VLDB2019).
Use code `VLDB2019` to get $150 credit to get started with MongoDB Atlas.

## Sharded MongoDB Driver

1. Create ana activate a python env.

```bash
mkdir ~/python_envs
cd ~/python_envs
~/python_envs$ python -m venv py-tpcc-env
source ~/python_envs/py-tpcc-env/bin/activate
```
2. Install pymongo

```bash
pip install pymongo
```

3. Print your config.

```bash
cd ~/py-tpcc/pytpcc
~/py-tpcc/pytpcc$ python ./tpcc.py --print-config mongodb > mongodb.config
```

4. Edit the configuraiton for Postgres in the mongodb.config.
* Change shards to the number of `shards`
* Change the mongodb connection `uri` string
* Change the database `name`

```bash
# MongodbDriver Configuration File
# Created 2025-10-08 14:18:24.378446
[mongodb]

# The mongodb connection string or URI
uri = mongodb://user:pass@10.2.1.119:27017/admin?ssl=true&tlsAllowInvalidHostnames=true&tlsAllowInvalidCertificates=true

# Database name
name = tpcc

# If true, data will be denormalized using MongoDB schema design best practices
denormalize = True

# If true, transactions will not be used (benchmarking only)
notransactions =

# If true, all things to update will be fetched via findAndModify
findandmodify = True

# If true, aggregation queries will be used
agg =

# If true, we will allow secondary reads
secondary_reads = True

# If true, we will enable retryable writes
retry_writes = True

# If true, we will perform causal reads
causal_consistency = True

# If true, we will have use only one 'unsharded' items collection
no_global_items =

# If > 0 then sharded
shards = 3
```

4. Run pytpcc using --warehouses=XXX

* Reset the database and load the data
```bash
python ./tpcc.py --reset --no-execute --clients=100 --duration=10 --warehouses=21 --config=mongodb.config mongodb --stop-on-error
```

* Only load the data
```bash
python ./tpcc.py --no-execute --clients=100 --duration=10 --warehouses=21 --config=mongodb.config mongodb --stop-on-error
```

* Execute the tests without loading data.
```bash
python ./tpcc.py --no-load --clients=100 --duration=10 --warehouses=21 --config=mongodb.config mongodb --stop-on-error
```

* Execute the tests with loading
```bash
python ./tpcc.py --clients=100 --duration=10 --warehouses=21 --config=mongodb.config mongodb --stop-on-error
```

## Postgres JSONB Driver

Expand Down Expand Up @@ -88,4 +176,4 @@ postgres=# \l+

# For any SQL command first use the database
\c tpcc;
```
```
24 changes: 14 additions & 10 deletions pytpcc/coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import execnet
import worker
import message
from ConfigParser import SafeConfigParser
from configparser import ConfigParser
from pprint import pprint, pformat

from util import *
Expand Down Expand Up @@ -80,7 +80,7 @@ def startLoading(scalParameters,args,config,channels):
for w_id in range(scaleParameters.starting_warehouse, scaleParameters.ending_warehouse+1):
idx = w_id % procs
w_ids[idx].append(w_id)
print w_ids
print(w_ids)

load_start=time.time()
for i in range(len(channels)):
Expand Down Expand Up @@ -116,7 +116,7 @@ def startExecution(scaleParameters, args, config,channels):
aparser = argparse.ArgumentParser(description='Python implementation of the TPC-C Benchmark')
aparser.add_argument('system', choices=getDrivers(),
help='Target system driver')
aparser.add_argument('--config', type=file,
aparser.add_argument('--config', type=str,
help='Path to driver configuration file')
aparser.add_argument('--reset', action='store_true',
help='Instruct the driver to reset the contents of the database')
Expand All @@ -132,6 +132,8 @@ def startExecution(scaleParameters, args, config,channels):
aparser.add_argument('--clientprocs', default=1, type=int, metavar='N',
help='Number of processes on each client node.')

aparser.add_argument('--samewh', default=85, type=float, metavar='PP',
help='Percent paying same warehouse')
aparser.add_argument('--stop-on-error', action='store_true',
help='Stop the transaction execution when the driver throws an exception.')
aparser.add_argument('--no-load', action='store_true',
Expand All @@ -153,15 +155,16 @@ def startExecution(scaleParameters, args, config,channels):
assert driver != None, "Failed to create '%s' driver" % args['system']
if args['print_config']:
config = driver.makeDefaultConfig()
print driver.formatConfig(config)
print
print(driver.formatConfig(config))
print()
sys.exit(0)

## Load Configuration file
if args['config']:
logging.debug("Loading configuration file '%s'" % args['config'])
configFilePath = args['config']
if configFilePath:
logging.debug("Loading configuration file '%s'" % configFilePath)
cparser = ConfigParser()
cparser.read(os.path.realpath(args['config'].name))
cparser.read(os.path.realpath(configFilePath))
config = dict(cparser.items(args['system']))
else:
logging.debug("Using default configuration for %s" % args['system'])
Expand All @@ -171,6 +174,7 @@ def startExecution(scaleParameters, args, config,channels):
config['load'] = False
config['execute'] = False
if config['reset']: logging.info("Reseting database")
config['warehouses'] = args['warehouses']
driver.loadConfig(config)
logging.info("Initializing TPC-C benchmark using %s" % driver)

Expand Down Expand Up @@ -208,8 +212,8 @@ def startExecution(scaleParameters, args, config,channels):
if not args['no_execute']:
results = startExecution(scaleParameters, args, config,channels)
assert results
logging.info(results.show(load_time, driver, len(channels)))
print results.show(load_time, driver, len(channels))
logging.info(results.show(load_time, driver, len(channels), args['samewh']))
print(results.show(load_time, driver, len(channels), args['samewh']))
## IF

## MAIN
Loading