Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gRPC improvements, mTLS, and more #66

Merged
merged 22 commits into from
Oct 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@

**/.vscode
**/.DS_Store

**/*.crt
**/*.crl
**/*.csr
**/*.key
7 changes: 7 additions & 0 deletions docker/base/Dockerfile.worker
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ RUN apt install -y \
build-essential curl git gnupg \
python3 python3-pip

# Install watchexec for development
RUN curl -s \
-L https://github.com/watchexec/watchexec/releases/download/1.14.1/watchexec-1.14.1-x86_64-unknown-linux-gnu.deb \
-o watchexec.deb \
&& apt install -y ./watchexec.deb \
&& rm watchexec.deb

# Install Moose
COPY Makefile README.md requirements-dev.txt setup.py ./
COPY moose /moose
Expand Down
23 changes: 11 additions & 12 deletions examples/keras/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@
import asyncio
import logging

from grpc.experimental import aio

from moose.cluster.cluster_spec import load_cluster_spec
from moose.logger import get_logger
from moose.server import Server
from moose.worker import Worker

parser = argparse.ArgumentParser(description="Launch worker")
parser.add_argument("--name", type=str, default="Worker")
Expand All @@ -20,14 +17,16 @@
get_logger().setLevel(level=logging.DEBUG)

if __name__ == "__main__":
aio.init_grpc_aio()

get_logger().info(f"Starting on {args.host}:{args.port}")
cluster_spec = load_cluster_spec(args.cluster_spec)
server = Server(args.host, args.port, cluster_spec)

asyncio.get_event_loop().run_until_complete(server.start())
worker = Worker(
name=args.name,
host=args.host,
part=args.port,
cluster_spec_filename=args.cluster_spec,
allow_insecure_networking=True, # TODO
)

asyncio.get_event_loop().run_until_complete(worker.start())
get_logger().info("Started")

asyncio.get_event_loop().run_until_complete(server.wait())
asyncio.get_event_loop().run_until_complete(worker.wait())
get_logger().info("Stopped")
3 changes: 0 additions & 3 deletions examples/mp-spdz/main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import argparse
import logging

from grpc.experimental import aio

from moose.compiler.edsl import HostPlacement
from moose.compiler.edsl import add
from moose.compiler.edsl import computation
Expand Down Expand Up @@ -79,7 +77,6 @@ def my_comp():
concrete_comp = my_comp.trace_func()

if __name__ == "__main__":
aio.init_grpc_aio()
if args.runtime == "test":
runtime = TestRuntime(workers=concrete_comp.devices())
elif args.runtime == "remote":
Expand Down
23 changes: 11 additions & 12 deletions examples/mp-spdz/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@
import asyncio
import logging

from grpc.experimental import aio

from moose.cluster.cluster_spec import load_cluster_spec
from moose.logger import get_logger
from moose.server import Server
from moose.worker import Worker

parser = argparse.ArgumentParser(description="Launch worker")
parser.add_argument("--name", type=str, default="Worker")
Expand All @@ -20,14 +17,16 @@
get_logger().setLevel(level=logging.DEBUG)

if __name__ == "__main__":
aio.init_grpc_aio()

get_logger().info(f"Starting on {args.host}:{args.port}")
cluster_spec = load_cluster_spec(args.cluster_spec)
server = Server(args.host, args.port, cluster_spec)

asyncio.get_event_loop().run_until_complete(server.start())
worker = Worker(
name=args.name,
host=args.host,
port=args.port,
cluster_spec_filename=args.cluster_spec,
allow_insecure_networking=True, # TODO
)

asyncio.get_event_loop().run_until_complete(worker.start())
get_logger().info("Started")

asyncio.get_event_loop().run_until_complete(server.wait())
asyncio.get_event_loop().run_until_complete(worker.wait())
get_logger().info("Stopped")
6 changes: 6 additions & 0 deletions examples/python-functions/Dockerfile.coordinator
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM tfencrypted/runtime-base-worker:latest

WORKDIR /example

# this is just a hack to keep the container runnning
CMD echo "Ready"; tail -f /dev/null
5 changes: 2 additions & 3 deletions examples/python-functions/Dockerfile.worker
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
FROM tfencrypted/runtime-base-worker:latest

# Copy over example files
COPY cluster-spec.yaml worker.py ./
WORKDIR /example

CMD python3 worker.py --verbose --port $PORT
CMD watchexec -e py -r "python3 worker.py --verbose --name $NAME"
39 changes: 20 additions & 19 deletions examples/python-functions/Makefile
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
certs:
rm -rf certs/
certstrap --depot-path certs init --common-name ca --passphrase ""
certstrap --depot-path certs request-cert --domain inputter0 --passphrase ""
certstrap --depot-path certs sign --CA ca inputter0
certstrap --depot-path certs request-cert --domain inputter1 --passphrase ""
certstrap --depot-path certs sign --CA ca inputter1
certstrap --depot-path certs request-cert --domain aggregator --passphrase ""
certstrap --depot-path certs sign --CA ca aggregator
certstrap --depot-path certs request-cert --domain outputter --passphrase ""
certstrap --depot-path certs sign --CA ca outputter
certstrap --depot-path certs request-cert --domain coordinator --passphrase ""
certstrap --depot-path certs sign --CA ca coordinator

MAIN_IMAGE?=tfencrypted/runtime-base-worker:latest

run:
docker run \
--tty \
--network python-functions_default \
--volume $(realpath ../../moose):/moose \
--volume $(realpath .):/examples/python-functions \
${MAIN_IMAGE} \
sh -c "python3 /examples/python-functions/main.py --runtime remote --verbose --cluster-spec /examples/python-functions/cluster-spec.yaml"

test-localhost:
docker run \
--tty \
--volume $(realpath ../../moose):/moose \
--volume $(realpath .):/examples/python-functions \
${MAIN_IMAGE} \
sh -c "python3 /examples/python-functions/main.py --runtime test --verbose "
up:
docker-compose up --build

down:
docker-compose down --remove-orphans

run:
docker-compose exec coordinator sh -c "python3 main.py --verbose --runtime remote"

.PHONY: run test-localhost
.PHONY: certs up down run
28 changes: 21 additions & 7 deletions examples/python-functions/README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
# Calling Python functions from Moose
# Calling Python Functions from Moose

This example shows you how you can call python functions on cleartext within Moose.
This example shows how you can use Moose to call Python functions on workers.

We recommend running the example using the included docker-compose.yaml file to start up the needed workers, and docker for running `main.py`. To do so, first run the following from this directory to start up the required workers:
We recommend running the example using Docker Compose by executing the following commands from this directory.

```
docker-compose up --build
First we must generate certificates for all participants (note that you must have [certstrap](https://github.com/square/certstrap) installed):

```sh
make certs
```

Then run the following to execute `main.py` against there:
Then we can spin up a local cluster of containers using:

```sh
make up
```

This will block the current terminal, so launch a new one to execute the remaining commands.

To execute a computation on the cluster we can finally run the following, which may be done repeatedly without re-running the steps above:

```sh
make run
```

To stop the workers again run `docker-compose down`.
Once we a done we can shut down the cluster again:

```
make down
```
6 changes: 3 additions & 3 deletions examples/python-functions/cluster-spec.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
workers:
- inputter0: inputter0:50000
- inputter1: inputter1:50001
- aggregator: aggregator:50002
- outputter: outputter:50003
- inputter1: inputter1:50000
- aggregator: aggregator:50000
- outputter: outputter:50000
49 changes: 37 additions & 12 deletions examples/python-functions/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,62 @@ services:
context: .
dockerfile: Dockerfile.worker
volumes:
- "../../moose:/moose"
- ".:/examples/python-functions"
- "../../moose:/moose"
- ".:/example"
environment:
PORT: 50000
NAME: inputter0
CA_CERT: ./certs/ca.crt
IDENT_CERT: ./certs/inputter0.crt
IDENT_KEY: ./certs/inputter0.key

inputter1:
build:
context: .
dockerfile: Dockerfile.worker
volumes:
- "../../moose:/moose"
- ".:/examples/python-functions"
- "../../moose:/moose"
- ".:/example"
environment:
PORT: 50001
NAME: inputter1
CA_CERT: ./certs/ca.crt
IDENT_CERT: ./certs/inputter1.crt
IDENT_KEY: ./certs/inputter1.key

aggregator:
build:
context: .
dockerfile: Dockerfile.worker
volumes:
- "../../moose:/moose"
- ".:/examples/python-functions"
- "../../moose:/moose"
- ".:/example"
environment:
PORT: 50002
NAME: aggregator
CA_CERT: ./certs/ca.crt
IDENT_CERT: ./certs/aggregator.crt
IDENT_KEY: ./certs/aggregator.key

outputter:
build:
context: .
dockerfile: Dockerfile.worker
volumes:
- "../../moose:/moose"
- ".:/examples/python-functions"
- "../../moose:/moose"
- ".:/example"
environment:
PORT: 50003
NAME: outputter
CA_CERT: ./certs/ca.crt
IDENT_CERT: ./certs/outputter.crt
IDENT_KEY: ./certs/outputter.key

coordinator:
build:
context: .
dockerfile: Dockerfile.coordinator
volumes:
- "../../moose:/moose"
- ".:/example"
environment:
NAME: coordinator
CA_CERT: ./certs/ca.crt
IDENT_CERT: ./certs/coordinator.crt
IDENT_KEY: ./certs/coordinator.key
16 changes: 11 additions & 5 deletions examples/python-functions/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import argparse
import logging

from grpc.experimental import aio
import os

from moose.compiler.edsl import HostPlacement
from moose.compiler.edsl import add
Expand All @@ -16,7 +15,10 @@
parser = argparse.ArgumentParser(description="Run example")
parser.add_argument("--runtime", type=str, default="test")
parser.add_argument("--verbose", action="store_true")
parser.add_argument("--cluster-spec", default="cluster-spec-main.yaml")
parser.add_argument("--cluster-spec", default="cluster-spec.yaml")
parser.add_argument("--ca-cert", default=os.environ.get("CA_CERT", None))
parser.add_argument("--ident-cert", default=os.environ.get("IDENT_CERT", None))
parser.add_argument("--ident-key", default=os.environ.get("IDENT_KEY", None))
args = parser.parse_args()

if args.verbose:
Expand Down Expand Up @@ -59,11 +61,15 @@ def my_comp():
concrete_comp = my_comp.trace_func()

if __name__ == "__main__":
aio.init_grpc_aio()
if args.runtime == "test":
runtime = TestRuntime(workers=concrete_comp.devices())
elif args.runtime == "remote":
runtime = RemoteRuntime(args.cluster_spec)
runtime = RemoteRuntime(
args.cluster_spec,
ca_cert_filename=args.ca_cert,
ident_cert_filename=args.ident_cert,
ident_key_filename=args.ident_key,
)
assert set(concrete_comp.devices()).issubset(runtime.executors.keys())
else:
raise ValueError(f"Unknown runtime '{args.runtime}'")
Expand Down
29 changes: 18 additions & 11 deletions examples/python-functions/worker.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,40 @@
import argparse
import asyncio
import logging
import os

from grpc.experimental import aio

from moose.cluster.cluster_spec import load_cluster_spec
from moose.logger import get_logger
from moose.server import Server
from moose.worker import Worker

parser = argparse.ArgumentParser(description="Launch worker")
parser.add_argument("--name", type=str, default="Worker")
parser.add_argument("--host", type=str, default="0.0.0.0")
parser.add_argument("--port", type=str, default="50000")
parser.add_argument("--verbose", action="store_true")
parser.add_argument("--cluster-spec", default="cluster-spec.yaml")
parser.add_argument("--ca-cert", default=os.environ.get("CA_CERT", None))
parser.add_argument("--ident-cert", default=os.environ.get("IDENT_CERT", None))
parser.add_argument("--ident-key", default=os.environ.get("IDENT_KEY", None))
args = parser.parse_args()

if args.verbose:
get_logger().setLevel(level=logging.DEBUG)

if __name__ == "__main__":
aio.init_grpc_aio()

get_logger().info(f"Starting on {args.host}:{args.port}")
cluster_spec = load_cluster_spec(args.cluster_spec)
server = Server(args.host, args.port, cluster_spec)

asyncio.get_event_loop().run_until_complete(server.start())
worker = Worker(
name=args.name,
host=args.host,
port=args.port,
cluster_spec_filename=args.cluster_spec,
ca_cert_filename=args.ca_cert,
ident_cert_filename=args.ident_cert,
ident_key_filename=args.ident_key,
allow_insecure_networking=True,
)

asyncio.get_event_loop().run_until_complete(worker.start())
get_logger().info("Started")

asyncio.get_event_loop().run_until_complete(server.wait())
asyncio.get_event_loop().run_until_complete(worker.wait())
get_logger().info("Stopped")
Loading