Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1eb568c
commit f051677
Showing
203 changed files
with
7,193 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#!/bin/bash | ||
|
||
# "Strict bash mode" | ||
set -eu -o pipefail | ||
IFS=$'\n\t' | ||
|
||
# Compute paths | ||
pushd "$(dirname "$0")" > /dev/null | ||
SCRIPTPATH=$(pwd) | ||
popd > /dev/null | ||
|
||
|
||
# VARS | ||
REPO="$SCRIPTPATH/../" | ||
EPOCH=$(date +%s) | ||
|
||
#------------------------------------------------------------------------------- | ||
# Collect results | ||
echo "Collect results" | ||
mkdir -p "$REPO/COLLECTED/RESULTS" | ||
# For each database in the results | ||
RESULTS="$REPO/runtime/results" | ||
if [[ -f "$RESULTS" ]]; then | ||
echo "$RESULTS" | ||
if grep -c '^gremlin' "${RESULTS}" ; then | ||
grep '^gremlin' "$RESULTS" | awk -F',' '{print $1}' |\ | ||
sort | uniq | while read -r DB | ||
do | ||
echo "Collecting results for $DB" | ||
grep "^$DB," "$RESULTS" > "$REPO/COLLECTED/RESULTS/${EPOCH}_${DB}_results.csv" | ||
done | ||
fi | ||
fi | ||
|
||
echo "Collect timeouts" | ||
# For each database in the timeout | ||
TIMEOUTS="$REPO/timeout.log" | ||
if [[ -f "$TIMEOUTS" ]]; then | ||
awk -F',' '{print $2}' "$TIMEOUTS" | awk -F' ' '{print $NF}' |\ | ||
sort | uniq | while read -r DB | ||
do | ||
echo "Collecting timeouts for $DB" | ||
TO_RES="$REPO/COLLECTED/RESULTS/${EPOCH}_${DB}_timeouts.csv" | ||
grep "$DB," "$TIMEOUTS" >> "$TO_RES" | ||
done | ||
fi | ||
|
||
|
||
#------------------------------------------------------------------------------- | ||
# Collect raw files | ||
|
||
echo "Collect raw" | ||
|
||
RAW="$REPO/COLLECTED/RAW/${EPOCH}/" | ||
mkdir -p "$RAW" | ||
|
||
for fname in timeout docker test; do | ||
[[ -f "$REPO"/${fname}.log ]] && mv -f "$REPO"/${fname}.log "$RAW" | ||
done | ||
|
||
for fname in results errors; do | ||
[[ -f "$REPO"/runtime/${fname} ]] && mv -f "$REPO"/runtime/${fname} "$RAW" | ||
done | ||
|
||
|
||
# This is needed to have the script end with a success status | ||
echo "Done!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
run.sh | ||
|
||
# editors | ||
*.sw? | ||
~* | ||
*~ | ||
|
||
#MacOS | ||
*.DS_Store | ||
.AppleDouble | ||
.LSOverride | ||
|
||
# Icon must end with two \r | ||
Icon | ||
|
||
|
||
# Thumbnails | ||
._* | ||
|
||
# Files that might appear in the root of a volume | ||
.DocumentRevisions-V100 | ||
.fseventsd | ||
.Spotlight-V100 | ||
.TemporaryItems | ||
.Trashes | ||
.VolumeIcon.icns | ||
.com.apple.timemachine.donotpresent | ||
|
||
#Python | ||
*~ | ||
*.pyc | ||
|
||
# Python libs | ||
.venv | ||
|
||
# Datasets (TB of data) | ||
#runtime/data/ | ||
|
||
# Logs (commit only collected) | ||
timeout.log | ||
docker.log | ||
test.log | ||
runtime/results | ||
runtime/errors | ||
runtime/data | ||
runtime/logs/* | ||
|
||
# Ignoring presampled, use `git add -f` when really want to add them. | ||
runtime/presampled | ||
|
||
# Ignoring COLLECTED, use `git add -f` when really want to add them. | ||
COLLECTED |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# Directory Structure and File Tree | ||
|
||
## Overview | ||
. | ||
├── README.md # General INFOS | ||
├── FILES.md # This file, contents of repo | ||
├── RUN.md # Instructions for running exps | ||
├── Makefile # MAKE things Work | ||
├── requirements.txt # Python Packages needed | ||
│ | ||
├── images # Docker Images | ||
│ ├── Makefile # Build Docker Images | ||
│ ├── init/ # Some GDB need initializations | ||
│ ├── extra/ # files needed by Docker Images | ||
│ ├── gremlin-2to3.dockerfile # Uses Neo4J to convert Tp2 Data | ||
│ ├── gremlin-arangodb.dockerfile | ||
│ ├── gremlin-blazegraph.dockerfile | ||
│ ├── gremlin-neo4j-tp3.dockerfile | ||
│ ├── gremlin-neo4j.dockerfile | ||
│ ├── gremlin-orientdb.dockerfile | ||
│ ├── gremlin-sparksee.dockerfile | ||
│ ├── gremlin-titan-tp3.dockerfile | ||
│ └── gremlin-titan.dockerfile | ||
│ | ||
├── runtime # This dir is mounted inside each | ||
│ │ # image and scrirpts are called | ||
│ ├── converter.groovy # Used for Tp2 to Tp3 conversion | ||
│ ├── data/ # The datasets to be imported | ||
│ ├── confs/ # Conf files for GDBs | ||
│ ├── meta/ # Parameters for the queries | ||
│ ├── presampled/ # Sampled nodes/edges/labels | ||
│ ├── tp2/ # Queries compatible with Tp2 | ||
│ └── tp3/ # Queries compatible with Tp3 | ||
│ | ||
├── settings.json # Which datasets and queries to run | ||
│ | ||
└── test.py # The test runner: | ||
# Spanws the Docker Virtual Image | ||
# Runs the queries | ||
|
||
## Details | ||
|
||
- `Makefile` Usually used to clean up before running an experiment, also has command to collect results | ||
- `test.py` It's the main script. | ||
It manages the Docker container, parses the metadata, and supervise the queries. | ||
Usage: `python test.py -i [image name] [options]` Es: | ||
|
||
```bash | ||
python test.py -d -i dbtrento/gremlin-neo4j -v /dfs/vol3/ -e JAVA_OPTS="-Xms1G -Xmn128M -Xmx120G" | ||
``` | ||
- `settings.json` This file is read by `test.py`. | ||
Contains the name of the dataset and queries we are testing. | ||
It is used to actually point to the dataset and also to just infer the list of datasets. | ||
Same for the queries. Here queries do not specify if Tp2 or Tp3, only the names. | ||
|
||
|
||
- `images/` | ||
Contains the dockerfiles for each database we are going to test. | ||
* `gremlin-*.dockerfile` are the images which are going to be used, they can have the GDB with gremlin embedded mode (same VM) or they start a server internally and pass queries through a client. | ||
* `Makefile` build the images, naming them with the conventions used through the project. | ||
* `init/` contains init `.sh` scripts for a specific image, some databases need to start services before a query can be processed | ||
* `extra/` some images requires extra files during installation, e.g., `arangodb_converter.go` file for graph format conversion for ArangoDB. | ||
|
||
- `runtime/` this folder is mounted inside every docker container. Contains configurations, queries and execution scripts | ||
* `converter.groovy` this is a groovy scripts with gremlin commands for Tp2 to load and export a dataset in a format readable by Tp3. | ||
* `confs/` if any configuration is needed and could be changed without rebuilding the docker image, it should stay here | ||
* `meta/` queries run based on some parameters, those are stored here | ||
* `presampled/` queries require some node id, edge id, or label from the dataset, those are stored here. A file `.json` for every dataset, and a file for every graph db to store **Local IDs** (LIDs), both generated by `sampler.groovy`. | ||
The files are `json` serialization of the arrays of randomly selected nodes. This is done in order to provide consistency between runs and to have comparable tests when running again different databases. | ||
The file named `samples_[DatasetName]` contains the set of nodes, edges and labels chosen from the specific dataset. | ||
Then exists a file named: `lids_[DatasetName]_[DBName]_[MD5]` which contains the internal reference (Internal IDs) of each node/edge chosen, these are unique internal identifier for the same nodes/edges but assigned by each database. | ||
|
||
|
||
* `data/` loaders expect datasets to be here in `GraphSONMode.EXTENDED` format which is readable both by Tp2 and Tp3 systems. | ||
* `tp2/` Tinkerpop 2 queries, almost all databases support this | ||
* `tp3/` Tinkerpop 3 queries, a different version of groovy requires functions implemented differently, and some methods have changed in this version | ||
|
||
|
||
|
||
## About Query implementations | ||
Once the container is created, the database engine are manually started (if required) by an init script in the docker image (see `images/init`), eventually the main script (`execute.sh`) is invoked. | ||
|
||
It is responsible fot the query creation and execution: | ||
first it creates an empty file at `/tmp/query` the executes `header.groovy.sh` | ||
which accordingly to the current environment variables injects the proper headers (import, functions, ecc.). | ||
|
||
Then the content of the query file, located at `$QUERY` is appended to the file. | ||
With the only exception of the loading query: when working in native loading mode the `loader.groovy` file is not appended; | ||
furthermore the content of the `sampler.groovy` file is always appended to allow the `ID -> LID` mapping. | ||
|
||
Finally `gremlin.sh` is invoked providing as argument the `/tmp/query` file and the command output is filtered accordingly the `$DEBUG` variable. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
help: | ||
@echo "Results" | ||
@echo "\t follow: \t follows all logs stream" | ||
@echo "\t follow_short: \t follows result and error logs stream" | ||
@echo "\t collect: \t move results and log into COLLECTED folder" | ||
@echo "\t rm_log: \t remove logs, but do not touch containers" | ||
@echo "\t clean: \t clean up environement for next experiment" | ||
@echo "\t purge: \t like clean but remove (not collect) the results" | ||
@echo "" | ||
@echo "Containers management command" | ||
@echo "\t rm_dead: \t Remove dead containers" | ||
@echo "\t stop: \t Stop all running containers" | ||
@echo "\t kill: \t Kill all running containers" | ||
@echo "" | ||
@echo "Image management command" | ||
@echo "\t rm_notag:\t Remove images without a tag" | ||
@echo "\t rm_noname:\t Remove images without a name" | ||
@echo "" | ||
@echo "Dangerous management command" | ||
@echo "\t destroy:\t Reset the docker installation." | ||
@echo "\t \t rm -rf images and containers" | ||
|
||
|
||
# ------------------------------------------------------------------------------ | ||
# Images | ||
rm_noname: | ||
docker images | grep -e "^<none" | awk -F' ' '{print $$3}' | xargs docker rmi || echo "no image to remove" | ||
|
||
rm_notag: | ||
docker images | grep -e "^<none>\s\+<none>" | awk -F' ' '{print $$3}' | xargs docker rmi || echo "no image to remove" | ||
|
||
|
||
# ------------------------------------------------------------------------------ | ||
# Container | ||
rm_dead: | ||
docker ps -a | grep -v ' Up ' | tail -n+2 | awk -F' ' '{print $$1}' | xargs docker rm || echo "no container to remove" | ||
|
||
stop: | ||
docker ps -a | grep ' Up ' | tail -n+1 | awk -F' ' '{print $$1}' | xargs docker stop || echo "no container to stop" | ||
|
||
kill: | ||
docker ps -a | grep ' Up ' | tail -n+1 | awk -F' ' '{print $$1}' | xargs docker kill || echo "no container to kill" | ||
|
||
|
||
# Destroy the world | ||
destroy: stop rm_dead | ||
docker images | tail -n+2 | awk -F' ' '{print $$3}' | xargs docker rmi -f || echo "no image to remove" | ||
|
||
|
||
# ------------------------------------------------------------------------------ | ||
# Results management | ||
|
||
collect: | ||
@echo "Collecting results" | ||
.bin/collect.sh | ||
|
||
clean: stop kill rm_dead rm_notag collect | ||
# | ||
|
||
rm_log: | ||
rm -fv timeout.log docker.log test.log runtime/results runtime/errors runtime/logs/* | ||
|
||
purge: stop kill rm_dead rm_notag rm_log | ||
# | ||
|
||
# ------------------------------------------------------------------------------ | ||
# logs | ||
follow: | ||
tail -f *.log runtime/errors runtime/results | ||
|
||
follow_short: | ||
tail -f runtime/errors runtime/results | ||
|
||
.PHONY: help rm_noname rm_notag rm_dead stop kill destroy collect clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,5 @@ | ||
# graph-databases-testsuite | ||
# Graph databases Test Suite | ||
Docker Images, installation scripts, and testing & benchmarking suite for Graph Databases | ||
|
||
* How to run use the test suite: [RUN.md](RUN.md) | ||
* Insights on the folder structure and scripts role: [FILES.md](FILES.md) |
Oops, something went wrong.