Skip to content

Commit

Permalink
GH-535 ansible idempotency (#540)
Browse files Browse the repository at this point in the history
  • Loading branch information
alabdao committed Jul 27, 2023
1 parent e233d68 commit e7e8368
Show file tree
Hide file tree
Showing 20 changed files with 301 additions and 175 deletions.
2 changes: 1 addition & 1 deletion infrastructure/ansible/files/ipfs.service
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Description=IPFS daemon
User=ubuntu
Group=ubuntu
ExecStart=ipfs daemon --routing=dhtclient
Environment="IPFS_PATH=/opt/local/ipfs"
Environment="IPFS_PATH={{ ipfs_path }}"

[Install]
WantedBy=multi-user.target
57 changes: 0 additions & 57 deletions infrastructure/ansible/install_ipfs_tasks.yaml

This file was deleted.

3 changes: 3 additions & 0 deletions infrastructure/ansible/inventory.aws_ec2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ plugin: amazon.aws.aws_ec2
regions:
- us-east-1

filters:
instance-state-name: running

keyed_groups:
- prefix: tag
key: tags
2 changes: 1 addition & 1 deletion infrastructure/ansible/jupyter_deploy_plex.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

- name: Install Plex
ansible.builtin.include_tasks:
file: install_plex_tasks.yaml
file: tasks/install_plex_tasks.yaml

- name: Add plex python wrapper to the conda path
become: yes
Expand Down
2 changes: 1 addition & 1 deletion infrastructure/ansible/jupyter_set_users.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
loop: "{{ admins | default([])}}"

- name: Create teams
include_tasks: jupyter_team_setup_tasks.yaml
include_tasks: tasks/jupyter_team_setup_tasks.yaml
loop: "{{ teams | default([])}}"

- name: Reload the tljh configuration
Expand Down
2 changes: 1 addition & 1 deletion infrastructure/ansible/provision_canary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

- name: Install Plex
ansible.builtin.include_tasks:
file: install_plex_tasks.yaml
file: tasks/install_plex_tasks.yaml
vars:
plex_dir: "{{ binary_dir }}"

Expand Down
6 changes: 2 additions & 4 deletions infrastructure/ansible/provision_compute_instance.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
environment:
IPFS_PATH: "{{ ipfs_path }}"
tasks:
# Must provide limit flag to ensure running against current environment
# Must provide limit flag to ensure running against correct environment
- fail:
msg: "you must use -l or --limit"
when: ansible_limit is not defined
Expand Down Expand Up @@ -159,7 +159,7 @@
- name: Install the IPFS systemd unit
become: yes
ansible.builtin.copy:
ansible.builtin.template:
src: files/ipfs.service
dest: /etc/systemd/system
notify:
Expand Down Expand Up @@ -209,5 +209,3 @@
name: bacalhau
state: restarted
enabled: true


108 changes: 27 additions & 81 deletions infrastructure/ansible/provision_compute_only.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@
hosts: tag_Type_compute_only
vars:
nvidia_distribution: ubuntu2004
ipfs_path: /opt/local/ipfs
ipfs_version: "0.18.0"
ipfs_path: "/opt/ipfs"
gpu: true
environment:
IPFS_PATH: "{{ ipfs_path }}"
go_version: 1.20.3
bacalhau_version: "v1.0.3"
bacalhau_node_type: "compute"
ipfs_connect: /ip4/127.0.0.1/tcp/5001
owner: labdao
tasks:
# Must provide limit flag to ensure running against current environment
# Must provide limit flag to ensure running against correct environment
- fail:
msg: "you must use -l or --limit"
when: ansible_limit is not defined
Expand All @@ -25,91 +29,33 @@
- ca-certificates
- gnupg
- lsb-release
- tar
- gzip
- unzip
- git

# Docker
- name: Install Docker
ansible.builtin.import_tasks: install_docker_tasks.yaml
ansible.builtin.include_tasks: tasks/install_docker_tasks.yaml

# Nvidia
- name: Add Nvidia Keyring
become: yes
ansible.builtin.apt:
deb: https://developer.download.nvidia.com/compute/cuda/repos/{{ nvidia_distribution }}/x86_64/cuda-keyring_1.1-1_all.deb

- name: Get Nvidia Container Tookit GPG key
become: yes
ansible.builtin.apt_key:
url: https://nvidia.github.io/libnvidia-container/gpgkey

- name: Add Nvidia Container Tookit Repository
become: yes
ansible.builtin.apt_repository:
repo: deb https://nvidia.github.io/libnvidia-container/stable/ubuntu18.04/$(ARCH) /
state: present

- name: Install required system packages for gpu build
become: yes
ansible.builtin.apt:
pkg:
- cuda-drivers
state: latest
update_cache: true

- name: Install Nvidia Container Tookit
become: yes
ansible.builtin.apt:
pkg:
- nvidia-docker2
notify:
- Restart docker
# GPU
- name: Install tools and binaries for GPU support
ansible.builtin.include_tasks: tasks/install_gpu_tasks.yaml
when: gpu

- name: Ensure Nvidia persitence daemon is started
ansible.builtin.systemd:
name: nvidia-persistenced

- name: Install Golag
become: yes
vars:
go_version: 1.20.3
block:
- name: Download Go binary
ansible.builtin.get_url:
url: https://go.dev/dl/go{{ go_version }}.linux-amd64.tar.gz
dest: /tmp/go-binary.tar.gz
- name: Unzip Go binary
ansible.builtin.command:
cmd: tar -C /usr/local -xzf /tmp/go-binary.tar.gz
# Ensure handlers are restarted before continuing
- name: flush handlers
ansible.builtin.meta: flush_handlers

# Required docker since IPFS runs in container
- name: Install IPFS
ansible.builtin.import_tasks: install_ipfs_tasks.yaml

- name: Add the IPFS node to the swarm
ansible.builtin.command:
cmd: ipfs swarm connect {{ requester_ipfs_peer }}
ansible.builtin.include_tasks: tasks/install_ipfs_tasks.yaml
tags: ipfs_install

- name: Install Bacalhau
ansible.builtin.shell:
cmd: curl -sL https://get.bacalhau.org/install.sh | bash

- name: Bump System Resources
become: yes
ansible.builtin.command: sysctl -w net.core.rmem_max=2500000

- name: Install the Bacalhau systemd unit
become: yes
ansible.builtin.template:
src: files/compute.service
dest: /etc/systemd/system
vars:
owner: labdao
ipfs_connect: /ip4/127.0.0.1/tcp/5001
notify:
- Restart Bacalhau

- name: Systemd Daemon Reload
become: yes
ansible.builtin.systemd:
daemon_reload: true
# Run Bacalhau agent
- name: Run Baclahau container
ansible.builtin.include_tasks: tasks/install_bacalhau_tasks.yaml
tags: bacalhau

handlers:
- name: Restart docker
Expand Down
2 changes: 1 addition & 1 deletion infrastructure/ansible/provision_jupyter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
letsencrypt_email: "josh@labdao.xyz"
letsencrypt_domain: "jupyter.labdao.xyz"
tasks:
# Must provide limit flag to ensure running against current environment
# Must provide limit flag to ensure running against correct environment
- fail:
msg: "you must use -l or --limit"
when: ansible_limit is not defined
Expand Down
2 changes: 1 addition & 1 deletion infrastructure/ansible/provision_receptor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
plex_dir: /opt/local/plex
receptor_dir: /opt/local/receptor
tasks:
# Must provide limit flag to ensure running against current environment
# Must provide limit flag to ensure running against correct environment
- fail:
msg: "you must use -l or --limit"
when: ansible_limit is not defined
Expand Down
29 changes: 10 additions & 19 deletions infrastructure/ansible/provision_requester.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
remote_user: ubuntu
hosts: tag_Type_requester
vars:
ipfs_path: /opt/local/ipfs
ipfs_version: "0.18.0"
ipfs_path: "/opt/ipfs"
bacalhau_version: "v1.0.3"
bacalhau_node_type: "requester"
ipfs_connect: /ip4/127.0.0.1/tcp/5001
owner: labdao
tasks:
# Must provide limit flag to ensure running against current environment
- fail:
Expand All @@ -19,26 +24,12 @@
update_cache: true

- name: Install IPFS
ansible.builtin.import_tasks: install_ipfs_tasks.yaml
ansible.builtin.include_tasks: tasks/install_ipfs_tasks.yaml

# Run Bacalhau agent
- name: Install Bacalhau
ansible.builtin.shell:
cmd: curl -sL https://get.bacalhau.org/install.sh | bash

- name: Bump System Resources
become: yes
ansible.builtin.command: sysctl -w net.core.rmem_max=2500000

- name: Install the Bacalhau systemd unit
become: yes
ansible.builtin.template:
src: files/requester.service
dest: /etc/systemd/system
vars:
owner: labdao
ipfs_connect: /ip4/127.0.0.1/tcp/5001
notify:
- Restart Bacalhau
ansible.builtin.include_tasks: tasks/install_bacalhau_tasks.yaml
tags: bacalhau

- name: Systemd Daemon Reload
become: yes
Expand Down
46 changes: 46 additions & 0 deletions infrastructure/ansible/tasks/install_bacalhau_tasks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Try running Bacalhau first, to see what version it is.
- name: Check bacalhau version
ansible.builtin.command: /usr/local/bin/bacalhau version
register: existing_bacalhau_version
ignore_errors: true
changed_when: false
no_log: true
check_mode: false

- name: Print installed kubo version
ansible.builtin.debug:
msg: "Installed bacalhau version: {{ existing_bacalhau_version.stdout.split('Server Version: ')[1] }} vs {{ bacalhau_version }}"
when: existing_kubo_version.stdout != ''

# Compare the latest version of bacalhau with the version that is already installed, if any.
- name: Install or update bacalhau
when:
"(existing_bacalhau_version.stdout == '') or (existing_bacalhau_version.stdout.split('Server Version: ')[1] != bacalhau_version)"
block:
- name: Download Bacalhau binary
become: true
ansible.builtin.unarchive:
src: "https://github.com/bacalhau-project/bacalhau/releases/download/{{ bacalhau_version }}/bacalhau_{{ bacalhau_version }}_linux_amd64.tar.gz"
dest: /usr/local/bin
# Letting when condition control if needs installing/upgrading
# creates: /usr/local/bin/bacalhau
remote_src: "true"
register: bacalhau_unarchive
retries: 5
delay: 3
until: bacalhau_unarchive is succeeded

- name: Install the Bacalhau systemd unit
become: true
ansible.builtin.template:
src: "files/{{ bacalhau_node_type }}.service"
dest: /etc/systemd/system
notify:
- Restart Bacalhau

- name: Ensure Bacalhau is running
become: true
ansible.builtin.service:
name: "{{ bacalhau_node_type }}"
state: started
enabled: true

0 comments on commit e7e8368

Please sign in to comment.