Skip to content

Commit

Permalink
Separate requester and compute nodes (#513)
Browse files Browse the repository at this point in the history
  • Loading branch information
hevans66 committed Jul 17, 2023
1 parent ca5fa3c commit c001733
Show file tree
Hide file tree
Showing 6 changed files with 352 additions and 0 deletions.
21 changes: 21 additions & 0 deletions infrastructure/ansible/files/compute.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[Unit]
Description=Bacalhau

[Service]
User=ubuntu
Group=ubuntu
ExecStart=bacalhau serve \
--node-type compute \
--ipfs-connect {{ ipfs_connect }} \
--private-internal-ipfs=false \
{% if gpu %}
--limit-total-gpu 1 \
{% endif %}
--limit-job-memory 12gb \
--job-selection-accept-networked \
--job-selection-data-locality anywhere \
--labels owner={{ owner }} \
--peer {{ requester_peer }}

[Install]
WantedBy=multi-user.target
17 changes: 17 additions & 0 deletions infrastructure/ansible/files/requester.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[Unit]
Description=Bacalhau Requester

[Service]
User=ubuntu
Group=ubuntu
ExecStart=bacalhau serve \
--node-type requester \
--ipfs-connect {{ ipfs_connect }} \
--private-internal-ipfs=false \
{% if receptor_url is defined %}
--job-selection-probe-http {{ receptor_url }} \
{% endif %}
--labels owner={{ owner }}

[Install]
WantedBy=multi-user.target
59 changes: 59 additions & 0 deletions infrastructure/ansible/install_ipfs_tasks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
- name: Install IPFS
ansible.builtin.get_url:
url: https://dist.ipfs.tech/kubo/v0.18.0/kubo_v0.18.0_linux-amd64.tar.gz
dest: /tmp/ipfs.tar.gz

- name: Make a folder to put IPFS files in
ansible.builtin.file:
path: /tmp/ipfs
state: directory

- name: Unzip IPFS
become: yes
ansible.builtin.unarchive:
remote_src: true
src: /tmp/ipfs.tar.gz
dest: /tmp/ipfs

- name: Install Kubo
become: yes
ansible.builtin.command: /tmp/ipfs/kubo/install.sh

- name: Create IPFS directory
become: yes
ansible.builtin.file:
owner: ubuntu
group: ubuntu
path: /opt/local/ipfs
state: directory

- name: Put the IPFS directory in env for future shells
become: yes
ansible.builtin.lineinfile:
path: /etc/environment
line: IPFS_PATH={{ ipfs_path }}

- name: Initiazlie IPFS
ansible.builtin.command:
cmd: ipfs init
creates: "{{ ipfs_path }}/config"

- name: Configure IPFS
ansible.builtin.shell: |
ipfs config Addresses.API /ip4/0.0.0.0/tcp/5001
ipfs config Addresses.Gateway /ip4/0.0.0.0/tcp/8080
ipfs config --json API.HTTPHeaders.Access-Control-Allow-Methods '["PUT", "POST"]'
ipfs config Pinning.Recursive true
- name: Install the IPFS systemd unit
become: yes
ansible.builtin.copy:
src: files/ipfs.service
dest: /etc/systemd/system

- name: Enable and start the IPFS Daemon
become: yes
ansible.builtin.service:
name: ipfs
state: started
enabled: true
162 changes: 162 additions & 0 deletions infrastructure/ansible/provision_compute_only.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
- name: Provision Bacalhau Compute Instance
remote_user: ubuntu
hosts: tag_Type_compute_only:&tag_Env_prod
vars:
nvidia_distribution: ubuntu2004
nvidia_container_toolkit_key_path: /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
ipfs_path: /opt/local/ipfs
requester_peer: /ip4/172.31.90.74/tcp/1235/p2p/QmbETsVtL1sQ97KKV1jPQA5ng8RSyzPWUiDgRBQp7AcjRt
gpu: true
environment:
IPFS_PATH: "{{ ipfs_path }}"
tasks:
# Aptitude is preferred by ansible
- name: Install aptitude
become: yes
ansible.builtin.apt:
name: aptitude
state: latest
update_cache: true

# Docker
- name: Add Docker GPG key
become: yes
ansible.builtin.get_url:
url: https://download.docker.com/linux/ubuntu/gpg
dest: /etc/apt/trusted.gpg.d/docker.asc

- name: Add Docker Repository
become: yes
ansible.builtin.apt_repository:
repo: deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable
state: present

- name: Create the docker group
become: yes
ansible.builtin.group:
name: docker

- name: Add ubuntu user to docker group
become: yes
ansible.builtin.user:
name: ubuntu
groups: docker

# Nvidia
- name: Get Nvidia drivers apt key
ansible.builtin.get_url:
url: https://developer.download.nvidia.com/compute/cuda/repos/{{ nvidia_distribution }}/x86_64/cuda-keyring_1.0-1_all.deb
dest: /tmp/cuda-keyring.deb
when: gpu

- name: Add Nvidia Keyring
become: yes
ansible.builtin.apt:
deb: /tmp/cuda-keyring.deb
when: gpu

- name: Get Nvidia Container Tookit GPG key
become: yes
ansible.builtin.shell:
cmd: curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --yes --dearmor -o {{ nvidia_container_toolkit_key_path }}
creates: "{{ nvidia_container_toolkit_key_path }}"
when: gpu

- name: Add Nvidia Container Tookit Repository
become: yes
ansible.builtin.apt_repository:
repo: deb [signed-by={{ nvidia_container_toolkit_key_path }}] https://nvidia.github.io/libnvidia-container/stable/ubuntu18.04/$(ARCH) /
state: present
when: gpu

- name: Install required system packages for gpu build
become: yes
ansible.builtin.apt:
pkg:
- cuda-drivers
state: latest
update_cache: true
when: gpu

- name: Install required system packages
become: yes
ansible.builtin.apt:
pkg:
- ca-certificates
- curl
- gnupg
- lsb-release
- docker-ce
- docker-ce-cli
- containerd.io
- docker-compose-plugin
state: latest
update_cache: true

- name: Install Nvidia Container Tookit
become: yes
ansible.builtin.apt:
pkg:
- nvidia-docker2
notify:
- Restart docker
when: gpu

- name: Ensure Nvidia persitence daemon is started
ansible.builtin.systemd:
name: nvidia-persistenced
when: gpu

- name: Install Golag
become: yes
vars:
go_version: 1.20.3
block:
- name: Download Go binary
ansible.builtin.get_url:
url: https://go.dev/dl/go{{ go_version }}.linux-amd64.tar.gz
dest: /tmp/go-binary.tar.gz
- name: Unzip Go binary
ansible.builtin.command:
cmd: tar -C /usr/local -xzf /tmp/go-binary.tar.gz

- name: Install IPFS
ansible.builtin.import_tasks: install_ipfs_tasks.yaml

- name: Install Bacalhau
ansible.builtin.shell:
cmd: curl -sL https://get.bacalhau.org/install.sh | bash

- name: Bump System Resources
become: yes
ansible.builtin.command: sysctl -w net.core.rmem_max=2500000

- name: Install the Bacalhau systemd unit
become: yes
ansible.builtin.template:
src: files/compute.service
dest: /etc/systemd/system
vars:
owner: labdao
ipfs_connect: /ip4/127.0.0.1/tcp/5001
notify:
- Restart Bacalhau

- name: Systemd Daemon Reload
become: yes
ansible.builtin.systemd:
daemon_reload: true

handlers:
- name: Restart docker
become: yes
ansible.builtin.service:
name: docker
state: restarted

- name: Restart Bacalhau
become: yes
ansible.builtin.service:
name: compute
state: restarted
enabled: true
49 changes: 49 additions & 0 deletions infrastructure/ansible/provision_requester.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
- name: Provision Bacalhau Requester
remote_user: ubuntu
hosts: tag_Type_requester
vars:
ipfs_path: /opt/local/ipfs
tasks:
# Aptitude is preferred by ansible
- name: Install aptitude
become: yes
ansible.builtin.apt:
name: aptitude
state: latest
update_cache: true

- name: Install IPFS
ansible.builtin.import_tasks: install_ipfs_tasks.yaml

- name: Install Bacalhau
ansible.builtin.shell:
cmd: curl -sL https://get.bacalhau.org/install.sh | bash

- name: Bump System Resources
become: yes
ansible.builtin.command: sysctl -w net.core.rmem_max=2500000

- name: Install the Bacalhau systemd unit
become: yes
ansible.builtin.template:
src: files/requester.service
dest: /etc/systemd/system
vars:
owner: labdao
ipfs_connect: /ip4/127.0.0.1/tcp/5001
receptor_url: http://ip-172-31-82-127.ec2.internal:8080/judge
notify:
- Restart Bacalhau

- name: Systemd Daemon Reload
become: yes
ansible.builtin.systemd:
daemon_reload: true

handlers:
- name: Restart Bacalhau
become: yes
ansible.builtin.service:
name: requester
state: restarted
enabled: true
44 changes: 44 additions & 0 deletions infrastructure/terraform/plex.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,50 @@ resource "aws_instance" "plex_compute_prod" {
}
}

resource "aws_instance" "plex_compute_only" {
for_each = toset([
"compute_only_1"
])
ami = "ami-053b0d53c279acc90"
instance_type = "g5.2xlarge"

vpc_security_group_ids = [aws_security_group.plex.id, aws_security_group.internal.id]
key_name = var.key_main
availability_zone = var.availability_zones[0]

root_block_device {
volume_size = 1000
tags = {
Name = "plex-prod-${each.key}"
}
}

tags = {
Name = "plex-compute-only-${each.key}"
Env = "prod"
InstanceKey = each.key
Type = "compute_only"
}
}

resource "aws_instance" "plex_requester" {
ami = "ami-053b0d53c279acc90"
instance_type = "m5.large"

vpc_security_group_ids = [aws_security_group.plex.id, aws_security_group.internal.id]
key_name = var.key_main
availability_zone = var.availability_zones[0]

root_block_device {
volume_size = 10
}

tags = {
Name = "plex-requester-prod"
Env = "prod"
Type = "requester"
}
}

resource "aws_eip" "plex_prod" {
instance = aws_instance.plex_compute_prod["compute1"].id
Expand Down

0 comments on commit c001733

Please sign in to comment.