This readme consolidates a number of different processes for deploying ontologies to SciGraph into a single workflow that avoids most of the hidden pitfalls we have encountered over the past 4 years.
NOTE: if you are using a non-standard location for ~/git
then the best
way to fix this file at the moment is to do a global find and replace.
Before you can use this file you will need to complete the Setup section.
You can either tangle
this file in emacs using C-c C-v t
or you can tangle
the whole file from the command line using with the following.
: ${THIS_FILE:="README.org"} # set to the local path to this file
emacs --batch \
--load org \
--load ob-shell \
--eval "(unless ((lambda (a b) (or (string> a b) (string= a b))) (org-version) \"9.3\") (message (format \"your org is too old! %s < 9.3 try loading ~/git/orgstrap/init.el\" (org-version))) (kill-emacs 1))" \
--eval "(org-babel-tangle-file \"${THIS_FILE}\")"
The core functionality is tangled to ./bin/scigraph-functions.sh.
It can be sourced in a shell or from a script by running \
source ~/git/pyontutils/nifstd/scigraph/bin/scigraph-functions.sh
\
to make the functions defined in this file available for use.
If you are using emacs, then flycheck-mode
will run shellcheck
for you!
You don’t even have to run it independently.
You should also run shellcheck
on the tangled file if you make any changes.
You don’t usually have to worry about green issues, and there are a number of
false positives because we are dealing with some crazy local vs remote expansion.
shellcheck "<<path-scigraph-functions()>>"
layman -a tgbugs-overlay
emerge \
pyontutils \
rpmdevtools \
scigraph-bin \
yq
apt install \
rpm \
python3-dev \
python3-pip
pip install --user \
pyontutils \
yq
layman -a tgbugs-overlay
emerge \
scigraph-bin \
yq
yum install -y \
python3 \
python3-pip
pip3 install \
yq # yq is usually only needed on config build systems
# however if something goes wrong and an in place change
# needs to be made then editing the raw and regenerating
# is the preferred course of action
There are currently still a couple of repos that are needed for these workflows. Most are needed configuration files, but pyontutils is needed for this README as well.
pushd ~/git
git clone https://github.com/tgbugs/orgstrap.git
git clone https://github.com/tgbugs/pyontutils.git
git clone https://github.com/tgbugs/tgbugs-overlay.git
git clone https://github.com/SciCrunch/sparc-curation.git
popd
~/git/orgstrap/orgstrap --user # needed to obtain newer version of org-mode
When viewing this file in emacs tangle this block after setting the requisite values below in the variables section (not exported). Alternately, copy the block as is an make the changes manually. Then open ~/.ssh/config.scigraph.example and copy the sections into your ssh config file.
If you are deploying to an aws server (e.g. aws-scigraph or aws-scigraph-data),
the IdentityFile
entry must point to a key that has admin access via the aws console.
# enable connection multiplexing for all hosts
Host *
ServerAliveInterval 60
ControlMaster auto
ControlPath ~/.ssh_tmp/master-%r@%h:%p
# jump host should be whitelisted in the target's firewall
Host <<jump-host()>>
HostName <<ip-jump-host()>>
User <<user-jump-host()>>
IdentityFile <<path-jump-identity-file()>>
PreferredAuthentications publickey
Host aws-scigraph
HostName <<ip-aws-scigraph()>>
User ec2-user
IdentityFile <<path-target-identity-file()>>
PreferredAuthentications publickey
ProxyCommand ssh <<jump-host()>> -W %h:%p
Host aws-scigraph-scigraph
HostName <<ip-aws-scigraph()>>
User scigraph
IdentityFile ~/.ssh/id_ed25519.aws.scigraph
PreferredAuthentications publickey
ProxyCommand ssh <<jump-host()>> -W %h:%p
Host aws-scigraph-data
HostName <<ip-aws-scigraph-data()>>
User ec2-user
IdentityFile <<path-target-identity-file()>>
PreferredAuthentications publickey
ProxyCommand ssh <<jump-host()>> -W %h:%p
Host aws-scigraph-data-scigraph
HostName <<ip-aws-scigraph-data()>>
User scigraph
IdentityFile ~/.ssh/id_ed25519.aws.scigraph
PreferredAuthentications publickey
ProxyCommand ssh <<jump-host()>> -W %h:%p
Make ${HOME}/.ssh_tmp folder to hold multiplexed sockets.
mkdir ~/.ssh_tmp
chmod 0700 ~/.ssh_tmp
Once you have everything above configured you can run the following to create and deploy ssh key for the scigraph deploy user.
<<&source-scigraph-functions>>
ssh-keygen-simple \
--server aws-scigraph \
--svc-user scigraph \
--keyname aws.scigraph && \
ssh-copy-id-simple
ssh-copy-id-simple \
--server aws-scigraph-data \
--svc-user scigraph \
--keyname aws.scigraph
For the implementation of the *-simple
commands see ssh setup.
Set ip-aws-scigraph
here
127.0.0.1
Set ip-aws-scigraph-data
here
127.0.0.1
Set path-target-identity-file
here
/dev/null
Set path-jump-identity-file
here
/dev/null
Set jump-host
aka bastion server name here
localhost
Set ip-jump-host
here
127.0.0.1
Set user-jump-host
here
nobody
Cut out the graphload.yaml
middle man.
Note this currently can’t use shell redirection to do things like
load-graphs <(git show branch:path/to/file.ttl)
though it would
be quite useful to be able to do so.
Load one or more RDF/OWL graphs into a SciGraph database
Usage:
scigraph-load-graphs [options] <path-or-iri>...
Examples:
scigraph-load-graphs --path-output loaded-graph.zip path/to/my/ontology.ttl
scigraph-load-graphs ttl/nif.ttl ~/git/NIF-Ontology/extra.ttl http://www.w3.org/2002/07/owl#
Options:
-h --help print this message
--path-output=PATH full path to the output zip file [default: ./]
--folder-name-graph=NAME name of the folder holding the neo4j database [default: test]
--path-graphload=PATH full path to the graphload template [default: <<path-graphload-template()>>]
<<&source-scigraph-functions>>
load-graphs $@
function load-graphs () {
local HELP="\
<<&help-load-graphs>>"
<<&vars-load-graphs>>
echo ontologies: > "${PATH_ONTOLOGIES}"
for PATH_INPUT in "${POSITIONAL[@]}"; do
echo " - url: ${PATH_INPUT}" >> "${PATH_ONTOLOGIES}"
echo " reasonerConfiguration:" >> "${PATH_ONTOLOGIES}"
echo " factory: org.semanticweb.elk.owlapi.ElkReasonerFactory" >> "${PATH_ONTOLOGIES}"
echo " addDirectInferredEdges: true" >> "${PATH_ONTOLOGIES}"
echo " removeUnsatisfiableClasses: true" >> "${PATH_ONTOLOGIES}"
done
load-graph
CODE=$?
if [ $CODE -ne 0 ]; then
echo "Loading failed with error ${CODE}"
else
mv $(realpath "${PATH_BUILD}/LATEST") "${PATH_OUTPUT}"
fi
rm -r "${PATH_BUILD}"
return $CODE
}
<<&source-scigraph-functions>>
load-graph-ontload \
--path-build <<path-build-nifstd()>> \
--path-ontologies <<path-ontologies-nifstd()>> \
--git-ref <<git-ref-nifstd()>> \
$@ \
<<&source-scigraph-functions>>
build-services \
--path-build <<path-build-nifstd()>> \
--path-cypher-resources <<path-cypher-resources-nifstd()>> \
path-build-nifstd
here
/tmp/scigraph-build/nifstd
Set path-ontologies-nifstd
here
~/git/pyontutils/nifstd/scigraph/ontologies-nifstd.yaml
Set git-ref-nifstd
here
master
Set path-cypher-resources-nifstd
here
~/git/pyontutils/nifstd/scigraph/cypher-resources.yaml
import pathlib as pl
from pyontutils.core import OntGraph
from pyontutils.namespaces import ilxtr, ILX
g = OntGraph(path=pl.Path('~/git/NIF-Ontology/ttl/sparc-community-terms.ttl').expanduser()).parse()
[g.add((o, ilxtr.includedForSPARCUnder, s)) for s, o in g[:ILX['0738400']:]]
g.write()
Run this block
pushd ~/git/NIF-Ontology/ttl
git checkout sparc || exit $? # can fail if your working copy has stuff saved
git pull || exit $?
curl https://uri.interlex.org/sparc/ontologies/community-terms.ttl -o sparc-community-terms.ttl || exit $?
{ python || exit $?; } <<EOF
<<&materialize-ifsu>>
EOF
git add sparc-community-terms.ttl
git commit -m "update to latest version of sparc-community-terms"
git diff HEAD~1 HEAD | tee sct-diff.patch
popd
Review the output of this block or ${HOME}/git/NIF-Ontology/ttl/sct-diff.patch Make any manual changes, commit, and push. You will also need to merge sparc into dev for the changes to be included in the SciGraph load. i.e., the following:
git checkout dev
git merge sparc
<<&source-scigraph-functions>>
load-graph-ontload \
--path-build <<path-build-sparc()>> \
--path-ontologies <<&path-ontologies-sparc()>> \
--git-ref <<git-ref-sparc()>> \
$@ \
<<&source-scigraph-functions>>
deploy-graph \
--server aws-scigraph \
--path-build <<path-build-sparc()>>
run-load-deploy-graph-sparc
tramp:/ssh:aws-scigraph-scigraph:services.yaml
<<&source-scigraph-functions>>
build-services \
--path-build <<path-build-sparc()>> \
--svc-host <<host-services-sparc()>> \
--path-cypher-resources <<path-cypher-resources-sparc()>> \
<<&source-scigraph-functions>>
deploy-services \
--config <<path-build-sparc()>>/<<file-stem-services-output()>>.yaml \
--server aws-scigraph
<<run-build-services-sparc>>
|| exit $?
<<run-deploy-services-sparc>>
Set path-build-sparc
here
/tmp/scigraph-build/sparc
Set path-ontologies-sparc
here
~/git/sparc-curation/resources/scigraph/ontologies-sparc.yaml
Set git-ref-sparc
here
dev
Set path-cypher-resources-sparc
here
~/git/sparc-curation/resources/scigraph/cypher-resources.yaml
Set host-services-sparc
here
scigraph.scicrunch.ioSee https://github.com/SciCrunch/sparc-curation/blob/master/docs/apinatomy.org for instructions about converting exporting and deploying ApiNATOMY graphs for loading into SciGraph.
<<&source-scigraph-functions>>
# XXX temporary fix for scigraph brokeness
mkdir -p <<&path-build-sparc-data()>>
cp "$(dirname <<&path-ontologies-sparc-data()>>)/sparc-data.ttl" <<&path-build-sparc-data()>>
sed -i 's/ilxtr:imports-dev/owl:imports/g' <<&path-build-sparc-data()>>/sparc-data.ttl
curl https://cassava.ucsd.edu/sparc/preview/exports/curation-export.ttl | \
pypy3 -m ttlser.ttlfmt -f ttl -t xml -o <<&path-build-sparc-data()>>/curation-export.owl
load-graph \
--path-build <<&path-build-sparc-data()>> \
--folder-name-graph <<folder-name-graph-sparc-data()>> \
--path-ontologies <<&path-ontologies-sparc-data()>> \
$@ \
<<&source-scigraph-functions>>
deploy-graph \
--server aws-scigraph-data \
--path-build <<&path-build-sparc-data()>>
<<run-load-graph-sparc-data>>
|| exit $?
<<run-deploy-graph-sparc-data>>
tramp:/ssh:aws-scigraph-data-scigraph:services.yaml
<<&source-scigraph-functions>>
build-services \
--path-build <<&path-build-sparc-data()>> \
--svc-host <<host-services-sparc-data()>> \
--path-cypher-resources <<path-cypher-resources-sparc-data()>> \
<<&source-scigraph-functions>>
deploy-services \
--config <<&path-build-sparc-data()>>/<<file-stem-services-output()>>.yaml \
--server aws-scigraph-data
<<run-build-services-sparc-data>>
|| exit $?
<<run-deploy-services-sparc-data>>
Set path-build-sparc-data
here
/tmp/scigraph-build/sparc-data
Set folder-name-graph-sparc-data
here
sparc-data
Set path-ontologies-sparc-data
here
~/git/sparc-curation/resources/scigraph/ontologies-sparc-data.yaml
Set path-cypher-resources-sparc-data
here
~/git/sparc-curation/resources/scigraph/cypher-resources.yaml
Set host-services-sparc-data
here
sparc-data.scicrunch.io
<<&source-scigraph-functions>>
# XXX temporary fix for scigraph brokeness and dev graph
PY_X=$(command -v pypy3 || command -v python)
mkdir -p <<&path-build-sparc-data-dev()>>
cat <<&path-ontologies-sparc-data()>> | \
sed 's/sparc-data/sparc-data-dev/g' > \
<<&path-build-sparc-data-dev()>>/ontologies-sparc-data-dev.yaml
cat "$(dirname <<&path-ontologies-sparc-data()>>)/sparc-data.ttl" | \
sed 's/sparc-data/sparc-data-dev/g' | \
sed 's/ilxtr:imports-dev/owl:imports/g' | \
sed 's,https://cassava.ucsd.edu/ApiNATOMY/ontologies/,file://<<&path-build-sparc-data-dev()>>/,g' > \
<<&path-build-sparc-data-dev()>>/sparc-data-dev.ttl
# TODO consider an option to exclude npo, curation-export, and protcur
curl https://cassava.ucsd.edu/sparc/preview/exports/curation-export.ttl | \
${PY_X} -m ttlser.ttlfmt -f ttl -t xml -o <<&path-build-sparc-data-dev()>>/curation-export.owl
pushd <<&path-build-sparc-data-dev()>>
if [ ! -f urinary-omega-tree-test.ttl ]; then
curl https://cassava.ucsd.edu/ApiNATOMY/ontologies/urinary-omega-tree-test.ttl -O
fi
popd
pushd ~/git/apinatomy-models/models/
for model_id in $(ls -d *); do
cp ${model_id}/derived/${model_id}.ttl <<&path-build-sparc-data-dev()>>/;
done
popd
load-graph \
--path-build <<&path-build-sparc-data-dev()>> \
--folder-name-graph <<folder-name-graph-sparc-data-dev()>> \
--path-ontologies <<&path-ontologies-sparc-data-dev()>> \
$@ \
Set path-build-sparc-data-dev
here
/tmp/scigraph-build/sparc-data-dev
Set folder-name-graph-sparc-data-dev
here
sparc-data-dev
Set path-ontologies-sparc-data-dev
here
/tmp/scigraph-build/sparc-data-dev/ontologies-sparc-data-dev.yaml
@prefix ilxtr: <http://uri.interlex.org/tgbugs/uris/readable/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
ilxtr:build-prov rdfs:label "build provenace record" ;
ilxtr:build-id ilxtr:build-id/??? ;
ilxtr:build-epoch 0 ;
ilxtr:build-datetime "0000-00-00T00:00:00.000000000Z" ;
ilxtr:build-date "0000-00-00" ;
ilxtr:build-time "00:00:00.000000000Z" ;
ilxtr:build-system "???" ;
ilxtr:build-type "SciGraph|Blazegraph" .
<<&source-scigraph-functions>>
PY_X=$(command -v pypy3 || command -v python)
# XXX temporary fix for scigraph brokeness
mkdir -p <<&path-build-sparc-sckan()>>
# FIXME need a sane way to share the ontologies here with the blazegraph load
cat "$(dirname <<&path-ontologies-sparc-sckan()>>)/sparc-data.ttl" | \
sed 's,<https://raw.githubusercontent.com/SciCrunch/NIF.\+ttl/,<http://ontology.neuinfo.org/NIF/ttl/,' | \
sed 's/ilxtr:imports-\(big\|rel\)/owl:imports/g' | \
sed 's,https://cassava.ucsd.edu/ApiNATOMY/ontologies/,file://<<&path-build-sparc-sckan()>>/,g' | \
sed 's,https://cassava.ucsd.edu/sparc/ontologies/,file://<<&path-build-sparc-sckan()>>/,g' | \
sed 's,https://cassava.ucsd.edu/sparc/preview/exports/,file://<<&path-build-sparc-sckan()>>/,g' | \
sed 's,protcur.ttl,protcur.owl,g' | \
sed 's,sparc-data,sparc-sckan,g' > \
<<&path-build-sparc-sckan()>>/sparc-sckan.ttl
# ontload handles this part recursively
#sed 's,http://ontology.neuinfo.org/NIF/,file://<<&path-build-sparc-sckan()>>/NIF-Ontology/,g' > \
pushd <<&path-build-sparc-sckan()>>
if [ -d ~/git/NIF-Ontology ]; then
git clone ~/git/NIF-Ontology # FIXME this fails if repo exists
else
git clone https://github.com/SciCrunch/NIF-Ontology.git
fi
# TODO need to add logic to stash and pull
# flow replicated from prrequaestor
pushd NIF-Ontology
popd; popd
# FIXME this fails if a previous load was run and not cleaned up
# XXX this should probably be run prior to calling release.org build --sckan --no-blaze
${PY_X} -m pyontutils.ontload graph \
--fix-imports-only \
--org SciCrunch \
NIF-Ontology \
NIF \
--git-local <<&path-build-sparc-sckan()>> \
--branch dev \
--graphload-ontologies <<&path-ontologies-sparc-sckan()>>
# XXX release.org --sckan --no-load # FIXME circularity still a bit of an issue
_sckanl="$(ls -d /tmp/build/release-*-sckan | sort -u | tail -n 1)"
pushd "${_sckanl}"
cp -a data/*.ttl <<&path-build-sparc-sckan()>>/
popd
pushd <<&path-build-sparc-sckan()>>
cat curation-export-published.ttl | sed 's,\x00,^@,g' | \
${PY_X} -m ttlser.ttlfmt -f ttl -t xml -o <<&path-build-sparc-sckan()>>/curation-export-published.owl || exit 1
cat protcur.ttl | \
${PY_X} -m ttlser.ttlfmt -f ttl -t xml -o <<&path-build-sparc-sckan()>>/protcur.owl || exit 2
${PY_X} -m pyontutils.ontload prov scigraph prov-record.ttl prov-record.ttl || exit 3 # overwrite to change type
popd
#curl https://cassava.ucsd.edu/sparc/preview/exports/curation-export-published.ttl | \
# ${PY_X} -m ttlser.ttlfmt -f ttl -t xml -o <<&path-build-sparc-sckan()>>/curation-export-published.owl
load-graph \
--path-build <<&path-build-sparc-sckan()>> \
--folder-name-graph <<folder-name-graph-sparc-sckan()>> \
--path-ontologies <<&path-ontologies-sparc-sckan()>> \
--git-ref <<git-ref-sparc-sckan()>> \
$@ \
<<&source-scigraph-functions>>
# XXXXXXXXXXXXXXXXXXXXXXXXXXX NOTE aws-scigraph NOT aws-scigraph-sckan
deploy-graph \
--server aws-scigraph \
--path-build <<&path-build-sparc-sckan()>>
<<run-load-graph-sparc-sckan>>
|| exit $?
<<run-deploy-graph-sparc-sckan>>
Set path-build-sparc-sckan
here
/tmp/scigraph-build/sparc-sckan
Set folder-name-graph-sparc-sckan
here
sparc-sckan
Set path-ontologies-sparc-sckan
here
~/git/sparc-curation/resources/scigraph/ontologies-sparc-sckan.yaml
Set git-ref-sparc-sckan
here
dev
<<&source-scigraph-functions>>
deploy-graph \
--path-build <<path-build-sparc()>>
<<&source-scigraph-functions>>
deploy-services --config <<path-build-sparc()>>/<<file-stem-services-output()>>.yaml
<<&source-scigraph-functions>>
deploy-graph \
--path-build <<&path-build-sparc-data()>> \
--folder-name-runtime <<folder-name-runtime-sparc-data-local()>>
<<&source-scigraph-functions>>
./build-services \
--path-build <<&path-build-sparc-data()>> \
--path-cypher-resources <<path-cypher-resources-sparc-data()>> \
--file-stem <<file-stem-sparc-data-local()>> \
--folder-name-runtime <<folder-name-runtime-sparc-data-local()>> \
<<&source-scigraph-functions>>
deploy-services --config <<&path-build-sparc-data()>>/<<file-stem-sparc-data-local()>>.yaml
folder-name-runtime-sparc-data-local
here
sparc-data
Set file-stem-sparc-data-local
here
services-sparcThere are three main components for SciGraph infrastructure: code, graph, and services config. Each has a creation step (compile, load, and build respectively) and a deploy section.
Unless you need to fix some of the code here, this heading is probably not relevant.
Code is compiled once for all deployments.
The easiest way to deploy SciGraph to RHEL-like systems is to build an RPM using ./scigraph.spec.If you are building on a system that does not have an RPM based package manager then you will need the second bit to prevent builds from failing due to missing dependencies.
function build-scigraph-rpm () {
pushd rpmbuild
# pull changes if the local repo already exists
if [ -d BUILD/SciGraph/.git ]; then
git -C BUILD/SciGraph stash
git -C BUILD/SciGraph pull
git -C BUILD/SciGraph remote add scicrunch https://github.com/SciCrunch/SciGraph.git
git -C BUILD/SciGraph fetch scicrunch
git -C BUILD/SciGraph checkout cypher-execute-fix
git -C BUILD/SciGraph stash
fi
# build the rpm
spectool -g -R SPECS/scigraph.spec
rpmbuild --nodeps --define "_topdir `pwd`" -ba SPECS/scigraph.spec
# inspect the preinstall script
rpm -qp --scripts RPMS/noarch/scigraph-9999-0.noarch.rpm
popd
}
/tmp/scigraph-build/scigraph
Set up a virtualenv if required.
pushd venvs
mkdir scigraph-build
pushd scigraph-build
pipenv --python 3.7
pipenv shell
pip install pyontutils
mkdir build
Compile SciGraph.
<<&source-scigraph-functions>>
compile-scigraph-git
By default this will install scigraph-load in
/tmp/scigraph-build/scigraph/bin/. You should be able to link that
file in your personal ${HOME}/bin folder or add /tmp/scigraph-build/scigraph/bin/
to your path. You can move the jar file that it points to, if you do just make sure
to update the scigraph-load
script to point to the new location.
mkdir ~/bin # copy and paste insurance
export PATH=~/bin:${PATH} # should be set in ~/.bashrc or similar
cp <<path-build-scigraph()>>/bin/scigraph-load ~/bin/
# OR
ln -s <<path-build-scigraph()>>/bin/scigraph-load ~/bin/
# OR
export PATH=<<path-build-scigraph()>>/bin:${PATH}
The definition of compile-scigraph-git
for reference.
function compile-scigraph-git () {
<<&vars-compile-scigraph-git>>
ontload scigraph \
--zip-location ${PATH_ZIP} ${IFS# build artifacts will be deposited here} \
--git-local ${PATH_GIT} ${IFS# remote repos will be cloned here} \
--scigraph-branch ${GIT_REF} \
${POSITIONAL[*]}
}
A gentoo based image https://hub.docker.com/r/tgbugs/musl/tags?name=scigraph and its underlying file https://github.com/tgbugs/dockerfiles/blob/master/source.org#scigraph are also available.
SERVER
is the name in the .ssh/config
function run-deploy-scigraph-rpm () {
echo TODO
}
run-deploy-scigraph-rpm --server aws-scigraph-data
--path-rpm ~/git/pyontutils/nifstd/scigraph/rpmbuild/RPMS/noarch/scigraph-9999-0.noarch.rpm
# TODO backup the old rpm?
rsync --rsh ssh ~/git/pyontutils/nifstd/scigraph/rpmbuild/RPMS/noarch/scigraph-9999-0.noarch.rpm ${SERVER}:
ssh ${SERVER} '
sudo <<rpm-install>>'
ssh ${SERVER} '
sudo systemctl restart scigraph'
If you want to have more than one service or have a different name for services.yaml
then take a look at /lib/systemd/system/scigraph.service
and take what you want to
customize and put it in /etc/systemd/system/scigraph.service.d/scigraph.conf
(retaining the section headers).
Beware when updating, if you have made any changes to /lib/systemd/system/scigraph.service
in order to tweak the JVM those changes will be klobbered when running yum reinstall -y scigraph*.rpm
.
Until I can figure out how to get systemd to accept the JVM config options as something that can,
you know, be configured instead of something that has to be static, this is going to be a problem.
function load-graph () {
<<&vars-load-graph>>
if [[ -z "${PATH_ONTOLOGIES}" ]]; then
echo no ontologies section specified
exit 1
fi
if [[ -d "${LOAD_GRAPH_PATH}" ]]; then
rm "${LOAD_GRAPH_PATH}" -r
fi
mkdir -p "${LOAD_GRAPH_PATH}" || return $?
cat "${PATH_GRAPHLOAD}" "${PATH_ONTOLOGIES}" > "${PATH_CONFIG_YAML_RAW}" || return $?
yq -Y ".graphConfiguration.location = \"${LOAD_GRAPH_PATH}\"" \
"${PATH_CONFIG_YAML_RAW}" > "${PATH_CONFIG_YAML}" || return $?
# FIXME I think this line is bugged YES BUT ALSO BY THE VERY COMMENT ITSELF
scigraph-load -c "${PATH_CONFIG_YAML}" || return $?
pushd "${PATH_BUILD}" || return $?
# mv prevents accidentally deploying the same graph twice
# but use cp -r for development to avoid continual rebuild
cp -r ${FOLDER_NAME_GRAPH} ${STAMPED} || return $?
zip -r ${FILE_NAME_ZIP} ${STAMPED} || return $?
unlink "${LATEST}" > /dev/null 2>&1
ln -sT "${FILE_NAME_ZIP}" "${LATEST}" || return $?
popd > /dev/null || return $?
}
function load-graph-ontload () {
<<&vars-load-graph-ontload>>
ontload graph \
--org SciCrunch ${IFS# github organization} \
NIF-Ontology ${IFS# repo name} \
NIF ${IFS# pattern for remote base (e.g. http://) to swap for local file://,
NIF automatically expands to http://ontology.neuinfo.org/NIF} \
--zip-location ${PATH_ZIP} ${IFS# output folder where the loaded graph zip will be exported} \
--git-local ${PATH_GIT} ${IFS# location where git repo will be cloned} \
--branch ${GIT_REF} ${IFS# git ref (branch, commit, etc.) from which to build} \
--graphload-config ${PATH_GRAPHLOAD} ${IFS# path to graphload-base-template.yaml} \
--graphload-ontologies ${PATH_ONTOLOGIES} ${IFS# path to ontologies-graph-name.yaml} \
${POSITIONAL[*]} ${IFS# pass any other unhandled arguments along}
}
If loading fails, then you probably need to patch something in which case you will
need the following commands. See an example setup in nifstd/patches.
If --patch
is enabled and the patch config cannot be found you will get an error.
<<&source-scigraph-functions>>
load-graph-ontload \
--patch ${IFS# do apply patches} \
--patch-config patches.yaml ${IFS# path to patche files} \
$@
Any of the run-load-graph-*
commands defined below can be run with one additional argument
to use scigraph-load
built from git. If you have moved scigraph-load
to ~/bin
you do not
need to do this unless you are doing this to test a specific version of SciGraph or similar.
run-load-graph-sparc --path-build-scigraph <<path-build-scigraph()>>
cp ${PATH_ZIP} ${PATH_RUNTIME} || return $?
pushd ${PATH_RUNTIME} || return $?
unzip -n ${FILE_NAME_ZIP} || return $?
chown -R ${SVC_USER}:${SVC_USER} ${STAMPED} || return $?
service-manager scigraph stop
unlink ${FOLDER_NAME_RUNTIME} > /dev/null 2>&1
ln -sT ${STAMPED} ${FOLDER_NAME_RUNTIME} || echo relink failed
service-manager scigraph start
popd
function deploy-graph () {
<<&vars-deploy-graph>>
echo Will deploy graph to ${SERVER}:${PATH_RUNTIME}
echo $PATH_ZIP
<<&are-you-sure>>
if [ "${SERVER}" = "localhost" ]; then
su - root -c "$(typeset -f service-manager);
<<&local-deploy-command>>" || return $?
else
rsync --rsh ssh "${PATH_ZIP}" "${SERVER_AS_SCIGRAPH}:${PATH_RUNTIME}" || return $?
ssh ${SERVER_AS_SCIGRAPH} "pushd \"${PATH_RUNTIME}\"; unzip -n \"${FILE_NAME_ZIP}\"" || return $?
# FIXME if this fails it is extremely difficult/annoying to restart
ssh -t ${SERVER} "$(typeset -f service-manager); service-manager scigraph stop" || return $?
ssh ${SERVER_AS_SCIGRAPH} "
unlink \"${PATH_RUNTIME}/${FOLDER_NAME_RUNTIME}\"
ln -sT \"${PATH_RUNTIME}/${STAMPED}\" \"${PATH_RUNTIME}/${FOLDER_NAME_RUNTIME}\"" || echo relink failed
ssh -t ${SERVER} "$(typeset -f service-manager); service-manager scigraph start"
fi
}
path-build-graph
here
/tmp/scigraph-build
Set path-graphload-template
here
~/git/pyontutils/nifstd/scigraph/graphload-base-template.yaml
Set folder-name-graph
here
graph
Set path-runtime
here
/var/lib/scigraph
Set folder-name-runtime
here
graph
function build-services () {
<<&vars-build-services>>
# make build location
mkdir -p "${PATH_BUILD}"
# TODO allow urls?
# make raw
cat "${PATH_SERVICES}" > "${PATH_YAML_RAW}" || return $?
yq -Sy '' "${PATH_CURIES}" | sed 's/^/ /' >> "${PATH_YAML_RAW}" || return $?
cat "${PATH_CYPHER_RESOURCES}" >> "${PATH_YAML_RAW}" || return $?
# make services header
echo "# ${YAML_RAW}" > "${PATH_YAML}"
# make services
yq -Y ".graphConfiguration.location = \"${PATH_RUNTIME_FOLDER_GRAPH}\" |
.serviceMetadata.view.url = \"${URL_VIEW}\" |
.serviceMetadata.preview.url = \"${URL_PREVIEW}\"
" "${PATH_YAML_RAW}" >> "${PATH_YAML}" || return $?
}
Once services.yaml
has been created rsync the raw and expanded configs
to the target host.
function deploy-services () {
# NOTE it is on YOU to make sure you are deploying the right config
# to the right server, under normal circumstances you should
# be running a run-build-deploy-services-* command
<<&vars-deploy-services>>
echo Will deploy services config to $TARGET
echo $CONFIG_RAW
echo $CONFIG
<<&are-you-sure>>
local CMD='
<<&deploy-services-backup-config>>'
echo $CONFIG_RAW $TARGET
if [ ${SERVER} = localhost ]; then
backup-services-config "${EXISTING}" "${PATH_RUNTIME}" "${YAML}" || return $?
cp "${CONFIG_RAW}" "${TARGET}" || return $?
cp "${CONFIG}" "${TARGET}" || return $?
${CMD_RESTART} || return $?
else
# if only we could rsync
ssh ${SERVER_AS_SCIGRAPH} "$(typeset -f backup-services-config); backup-services-config ${EXISTING} ${PATH_RUNTIME} ${YAML}" || return $?
rsync --rsh ssh "${CONFIG_RAW}" "${TARGET}" || return $?
rsync --rsh ssh "${CONFIG}" "${TARGET}" || return $?
ssh -t ${SERVER} "$(typeset -f service-manager); service-manager scigraph restart" || return $?
fi
}
function backup-services-config () {
local EXISTING=${1}
local PATH_RUNTIME=${2}
local YAML=${3}
local E_YAML_RAW=$(head -n 1 "${EXISTING}" | grep "^#" | cut -b3-)
if [ -n "${E_YAML_RAW}" ]; then
local YAML_BACKUP="${PATH_RUNTIME}/${E_YAML_RAW//.raw/}"
else
local mod_dt=$(stat -c %y "${EXISTING}" | cut -d "." -f1 | sed "s/[-\:]//g" | sed "s/\ /T/")
local YAML_BACKUP="${PATH_RUNTIME}/${YAML}-${mod_dt}"
fi
cp -a "${EXISTING}" "${YAML_BACKUP}"
}
path-services
here
~/git/pyontutils/nifstd/scigraph/services-base-template.yaml
Set path-curies
here
~/git/pyontutils/nifstd/scigraph/curie_map.yaml
Set path-build-services
here
./
Set file-stem-services-output
here
services
function ssh-keygen-simple () {
<<&vars-ssh-keygen-deploy>>
# test if the key already exists
if [ -f "${KEYPATH}" ]; then
echo "${KEYPATH}" already exists not running keygen and not deploying
unset KEYPATH SERVER SVC_USER SUDO_OR_SU NOSUDO
return 1
fi
# test that root access is available
# FIXME requires double password entry in the su case
ssh -t ${SERVER} "${SUDO_OR_SU} \"echo we have root access\"" && export HAVE_ROOT=1 || return $?
# generate the key
ssh-keygen -t ${KEYTYPE} -N "" -C "${USER}@${HOSTNAME}" -f "${KEYPATH}"
# all the relevant environment variables are left in the environment
# for the next step
}
function ssh-copy-id-simple () {
# note that &vars-ssh-keygen-deploy is written in such a way that
# the environment variables will persist if they are specified on
# ssh-keygen-simple and thus do not have to be set again in here
# they are included so that ssh-keygen-deploy can be used by itself
<<&vars-ssh-keygen-deploy>>
<<&command-ssh-copy-id-remote>>
# test that root access is available if HAVE_ROOT is not set
if [ -z $HAVE_ROOT ]; then
ssh -t ${SERVER} "${SUDO_OR_SU} \"echo we have root access\"" && \
export HAVE_ROOT=1 || return $?
fi
# copy the key to the host
TEMP_PATH=$(ssh ${SERVER} 'mktemp')
rsync --rsh ssh "${KEYPATH}.pub" ${SERVER}:${TEMP_PATH} || return $?
# TODO option to deploy private key if the key is generated for a service user
# and today we learn about typeset, sigh
ssh -t ${SERVER} "${SUDO_OR_SU} '$(typeset -f copy-id-to-user-authorized-keys); copy-id-to-user-authorized-keys ${TEMP_PATH} ${SVC_USER}'"
local oops=$?
if [ $oops -ne 0 ]; then
echo FAILED so ${SERVER}:${TEMP_PATH} is still present
return $oops
fi
unset KEYPATH SERVER SVC_USER SUDO_OR_SU TEMP_PATH NOSUDO
}
function copy-id-to-user-authorized-keys () {
PUB_KEY=${1}
SVC_USER=${2}
UHOME=$(eval echo ~${SVC_USER})
if [ ! -d ${UHOME}/.ssh ]; then
mkdir ${UHOME}/.ssh
fi
if [ ! -f ${UHOME}/.ssh/authorized_keys ]; then
touch ${UHOME}/.ssh/authorized_keys
chown -R ${SVC_USER}:${SVC_USER} ${UHOME}/.ssh
fi
cat ${PUB_KEY} >> \
${UHOME}/.ssh/authorized_keys && \
chown -R ${SVC_USER}:${SVC_USER} ${UHOME}/.ssh && \
rm ${PUB_KEY};
}
printf "Are you sure you want to deploy? y/N "
if [ $ZSH_VERSION ]; then
read -r -k 1 choice
else
read -r -n 1 choice
fi
case "${choice}" in
y|Y) echo ;;
n|N) echo; echo "Not deploying."; return 1;;
'?') echo; echo "$(set -o posix; set | grep -v '^_')"; return 1;;
*) echo; echo "Not deploying."; return 1;;
esac
echo "Deploying ..."
A bash function that implements a portable service-manager
command.
function service-manager () {
SVCNAME=${1}
ACTION=${2}
if [ $(command -v systemctl) ]; then
local CMD="systemctl ${ACTION} ${SVCNAME}"
if [ "${ACTION}" = "start" ]; then
local CMD_POST="systemctl status ${SVCNAME}"
else
# if we are stopping then status will return a non-zero value
# which will confuses the signal we want which would be if
# the program failed to stop, not is stopped
local CMD_POST=""
fi
elif [ $(command -v rc-service) ]; then
local CMD="rc-service ${SVCNAME} ${ACTION}"
local CMD_POST=""
else
echo unknown service-manager
return 1
fi
echo ${CMD}
if [[ $EUID = 0 ]]; then
${CMD} || return $?
elif [ $(command -v sudo) ]; then
sudo ${CMD} || return $?
else
echo For su on ${HOSTNAME}
su -c "${CMD}" || return $?
fi
if [ -n "${CMD_POST}" ]; then
sleep 5
$CMD_POST
fi
}
systemctl restart scigraph
ontutils scigraph-stress --scigraph-api http://${SERVER}:${PORT}/scigraph
Set path-scigraph-functions
here
~/git/pyontutils/nifstd/scigraph/bin/scigraph-functions.sh