GPU MPC (#214)

* Adding GPU-MPC * Adding gitignore * Added weights as a submodule * Added mnist as a submodule * Added cutlass as a submodule * Added SEAL as a submodule --------- Co-authored-by: Neha J <b-nejawalkar@microsoft.com>
mpc-msri · May 17, 2024 · 473eb34 · 473eb34
1 parent 133464a
commit 473eb34
Show file tree

Hide file tree

Showing 498 changed files with 115,885 additions and 0 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -4,3 +4,15 @@
 [submodule "CrypTFlow2/extern/eigen"]
 	path = SCI/extern/eigen
 	url = https://gitlab.com/libeigen/eigen
+[submodule "GPU-MPC/experiments/orca/weights"]
+	path = GPU-MPC/experiments/orca/weights
+	url = https://github.com/neha-jawalkar/weights.git
+[submodule "GPU-MPC/experiments/orca/datasets/mnist"]
+	path = GPU-MPC/experiments/orca/datasets/mnist
+	url = https://github.com/neha-jawalkar/mnist.git
+[submodule "GPU-MPC/ext/cutlass"]
+	path = GPU-MPC/ext/cutlass
+	url = https://github.com/NVIDIA/cutlass.git
+[submodule "GPU-MPC/ext/sytorch/ext/sci/extern/SEAL"]
+	path = GPU-MPC/ext/sytorch/ext/sci/extern/SEAL
+	url = https://github.com/microsoft/SEAL.git
diff --git a/GPU-MPC/Dockerfile_Gen b/GPU-MPC/Dockerfile_Gen
@@ -0,0 +1,34 @@
+# Author: Tanmay Rajore,Neha Jawalkar
+#
+# Copyright:
+#     Copyright (c) 2024 Microsoft Research
+#     Permission is hereby granted, free of charge, to any person obtaining a copy
+#     of this software and associated documentation files (the "Software"), to deal
+#     in the Software without restriction, including without limitation the rights
+#     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#     copies of the Software, and to permit persons to whom the Software is
+#     furnished to do so, subject to the following conditions:
+#     The above copyright notice and this permission notice shall be included in all
+#     copies or substantial portions of the Software.
+#     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#     SOFTWARE.
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
+
+WORKDIR /home
+RUN ln -sf /bin/bash /bin/sh
+
+RUN apt update && apt upgrade -y &&  apt install -y git apt-utils; \
+    apt install -y sudo ; \
+    sudo apt install -y gcc-9 g++-9; \
+    sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9;\
+    sudo update-alternatives --config gcc;\
+    sudo apt install libssl-dev cmake python3-pip libgmp-dev libmpfr-dev -y;\
+    sudo apt install cmake make libeigen3-dev -y ;
+
+RUN git config --global --add safe.directory *
+#RUN git submodule update --init --recursive
diff --git a/GPU-MPC/Makefile b/GPU-MPC/Makefile
@@ -0,0 +1,132 @@
+CUDA_VERSION ?= $(value CUDA_VERSION)
+ifeq ($(CUDA_VERSION),)
+	CUDA_VERSION = 11.7
+endif
+CUTLASS_PATH=./ext/cutlass
+SYTORCH_PATH=./ext/sytorch
+SYTORCH_BUILD_PATH=$(SYTORCH_PATH)/build
+LLAMA_PATH=$(SYTORCH_PATH)/ext/llama
+CUDA_ARCH =$(GPU_ARCH)
+
+CXX=/usr/local/cuda-$(CUDA_VERSION)/bin/nvcc
+FLAGS := -O3 -gencode arch=compute_$(CUDA_ARCH),code=[sm_$(CUDA_ARCH),compute_$(CUDA_ARCH)] -std=c++17 -m64 -Xcompiler="-O3,-w,-std=c++17,-fpermissive,-fpic,-pthread,-fopenmp,-march=native" 
+LIBS := -lsytorch -lcryptoTools -lLLAMA -lbitpack -lcuda -lcudart -lcurand
+SECFLOAT_LIBS := -lSCI-FloatML -lSCI-FloatingPoint -lSCI-BuildingBlocks -lSCI-LinearOT -lSCI-GC -lcrypto
+
+UTIL_FILES := ./utils/gpu_mem.cu ./utils/gpu_file_utils.cpp ./utils/sigma_comms.cpp
+OBJ_INCLUDES := -I '$(CUTLASS_PATH)/include' -I '$(CUTLASS_PATH)/tools/util/include' -I '$(SYTORCH_PATH)/include' -I '$(LLAMA_PATH)/include' -I '$(SYTORCH_PATH)/ext/cryptoTools' -I '.'
+INCLUDES := $(OBJ_INCLUDES) -L$(CUTLASS_PATH)/build/tools/library -L$(SYTORCH_BUILD_PATH) -L$(SYTORCH_BUILD_PATH)/ext/cryptoTools -L$(SYTORCH_BUILD_PATH)/ext/llama -L$(SYTORCH_BUILD_PATH)/ext/bitpack -L$(SYTORCH_BUILD_PATH)/lib
+
+dpf: tests/fss/dpf.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dpf
+
+dpf_eval_all: tests/fss/dpf_eval_all.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dpf_eval_all
+
+dpf_drelu: tests/fss/dpf_drelu.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dpf_drelu
+
+dpf_lut: tests/fss/dpf_lut.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dpf_lut
+
+gelu: tests/fss/gelu.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/gelu
+
+relu: tests/fss/relu.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/relu
+
+rmsnorm: tests/fss/rmsnorm.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/rmsnorm
+
+softmax: tests/fss/softmax.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/softmax
+
+fc: tests/fss/fc.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/fc
+
+layernorm: tests/fss/layernorm.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/layernorm
+
+silu: tests/fss/silu.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/silu
+
+truncate: tests/fss/truncate.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/truncate
+
+mha: tests/fss/mha.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/mha
+
+secfloat_softmax: tests/fss/secfloat_softmax.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) $(SECFLOAT_LIBS) -o tests/fss/secfloat_softmax
+
+piranha_softmax: tests/fss/piranha_softmax.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/piranha_softmax
+
+orca_dealer: experiments/orca/orca_dealer.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) $(SECFLOAT_LIBS) -o experiments/orca/orca_dealer
+
+orca_evaluator: experiments/orca/orca_evaluator.cu experiments/orca/datasets/mnist.cpp
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) $(SECFLOAT_LIBS) -o experiments/orca/orca_evaluator
+
+dcf: tests/fss/dcf/dcf.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dcf/dcf
+
+aes: tests/fss/dcf/aes.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dcf/aes
+
+dcf_relu_extend: tests/fss/dcf/relu_extend.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dcf/relu_extend
+
+dcf_stochastic_truncate: tests/fss/dcf/stochastic_truncate.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dcf/stochastic_truncate
+
+dcf_relu: tests/fss/dcf/relu.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dcf/relu
+
+orca_conv2d: tests/nn/orca/conv2d_test.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/nn/orca/conv2d
+
+orca_maxpool: tests/nn/orca/maxpool_test.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/nn/orca/maxpool
+
+orca_relu_extend: tests/nn/orca/relu_extend_test.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/nn/orca/relu_extend
+
+orca_fc: tests/nn/orca/fc_test.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/nn/orca/fc
+
+orca_relu: tests/nn/orca/relu_test.cu
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/nn/orca/relu
+
+orca_inference: experiments/orca/orca_inference.cu 
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/orca/orca_inference
+
+orca_inference_u32: experiments/orca/orca_inference.cu
+	$(CXX) $(FLAGS) -DInfType=u32 $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/orca/orca_inference_u32
+
+sigma: experiments/sigma/sigma.cu 
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/sigma/sigma
+
+piranha: experiments/orca/piranha.cu 
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/orca/piranha
+
+share_data: experiments/orca/share_data.cpp experiments/orca/datasets/mnist.cpp
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/orca/share_data
+
+model_accuracy: experiments/orca/model_accuracy.cu experiments/orca/datasets/mnist.cpp
+	$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/orca/model_accuracy
+
+orca: orca_dealer orca_evaluator orca_inference orca_inference_u32 piranha
+
+clean:
+	rm -rf ext/cutlass/build
+	rm -rf ext/sytorch/build
+	rm -rf orca/experiments/output
+	rm -rf sigma/experiments/output
+	rm experiments/orca/orca_dealer
+	rm experiments/orca/orca_evaluator
+	rm experiments/orca/orca_inference
+	rm experiments/orca/orca_inference_u32
+	rm experiments/orca/piranha
+	rm experiments/sigma/sigma
+
diff --git a/GPU-MPC/README.md b/GPU-MPC/README.md
@@ -0,0 +1,123 @@
+
+# Orca: FSS-based Secure Training and Inference with GPUs
+
+Implementation of protocols from the paper [Orca](https://eprint.iacr.org/2023/206).
+
+**Warning**: This is an academic proof-of-concept prototype and has not received careful code review. This implementation is NOT ready for production use.
+
+## Build
+
+This project requires NVIDIA GPUs, and assumes that GPU drivers and the [NVIDIA CUDA Toolkit](https://docs.nvidia.com/cuda/) are already installed. The following has been tested on Ubuntu 20.04 with CUDA 11.7, CMake 3.27.2 and g++-9. 
+
+Please note that Sytorch requires CMake version >= 3.17 and the build will fail if this depency is not met. 
+
+The code uses CUTLASS version 2.11 by default, so if you change the CUDA version, please make sure that the CUTLASS version being built is compatible with the new CUDA version. To change the version of CUTLASS being built, add `git checkout <branch>;` after line 31 (`cd ext/cutlass;`) of setup.sh.
+
+The last line of `setup.sh` tries to install `matplotlib`, which is needed for generating Figures 5a and 5b. In our experience, the installation fails if the versions of Python and `pip` do not match. In case the installation fails, please install `matplotlib` manually before running `run_experiment.py`.
+
+1. Export environment variables
+
+```
+export CUDA_VERSION=11.7
+export GPU_ARCH=86
+```
+
+2. Set up the environment
+
+```
+sh setup.sh
+```
+
+3. Make Orca
+
+```
+make orca
+```
+
+## Run
+
+1. Each party runs two processes: a dealer and an evaluator. The configuration needs to define the GPU on which the dealer will run, and the directory in which it will store FSS keys. This is done in `config.json` as:
+
+```javascript
+"dealer" :
+    { "gpu": <The ID of the GPU to use>,
+      "key_dir": <The directory in which the dealer will store keys>
+    }
+```
+
+FSS keys tend to be quite large so please make sure that the key directory has at least 500GB of free space. Please also ensure that it is writeable.
+
+Similarly, the configuration also needs to define the GPU on which the evaluator will run, and the IP address of its peer, i.e., the address of the remote party the evaluator will communicate with for secure training or inference. This is done in `config.json` as:
+
+```javascript
+"dealer" :
+    { "gpu": <The ID of the GPU to use>,
+      "peer": <The address of the remote peer>
+    }
+```
+
+You can run Orca to generate Figures 5a and 5b, as well as Tables 3, 4, 6, 7, 8 and 9. Table 5 can be generated by throttling the network bandwidth (with `tc`, for example) and regenerating Table 4. The script reports numbers for Tables 4, 6, 7 and 9 as the average of 10 iterations.
+
+Figure 5b and Table 3 run end-to-end training and so can take a couple of days to finish.
+
+Evaluation runs through `experiments/orca/run_experiment.py`. Here are the relevant options:
+
+```
+usage: run_experiment.py [-h] [--figure FIGURE] [--table TABLE] --party 0/1
+
+optional arguments:
+  --figure FIGURE  Figure # to run.
+  --table TABLE    Table # to run.
+  --all true       Run all the experiments.
+```
+
+Results are stored in the `output/P<party-number>/Table<table-number>` or `output/P<party-number>/Fig<figure-number>` folders. 
+
+Log files (which might help with debugging) are stored in the corresponding experiment folders, i.e., in `output/P<party-number>/Table<table-number>/logs` and `output/P<party-number>/Fig<figure-number>/logs`.
+
+## Docker Build
+
+You can also build the docker image using the provided Dockerfile_Gen for building the Environment. 
+
+### Install Nvidia Container Toolkit
+- Configure the repository:
+```
+curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey |sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
+&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list \
+&& sudo apt-get update
+```
+
+- Install the NVIDIA Container Toolkit packages:
+```
+sudo apt-get install -y nvidia-container-toolkit
+sudo nvidia-ctk runtime configure --runtime=docker
+sudo systemctl restart docker
+```
+### Build the Docker Image / pull the image from Docker Hub
+```
+# Local Build
+docker build -t gpu_mpc -f Dockerfile_Gen .
+
+# Pull from Docker Hub (Cuda 11.8)
+docker pull trajore/gpu_mpc
+```
+### Run the Docker Container
+```
+sudo docker run --gpus all --network host -v /home/$USER/path_to_GPU-MPC/:/home -it container_name /bin/bash
+
+```
+Then Run setup.sh to configure according to GPU_arch and make orca as mentioned above.
+
+## Citation
+
+You can cite the paper using the following BibTeX entry:
+
+```
+@INPROCEEDINGS {,
+author = {N. Jawalkar and K. Gupta and A. Basu and N. Chandran and D. Gupta and R. Sharma},
+booktitle = {2024 IEEE Symposium on Security and Privacy (SP)},
+title = {Orca: FSS-based Secure Training and Inference with GPUs},
+year = {2024}
+}
+```
+