Skip to content

Commit

Permalink
Merge pull request #3242 from ingvagabund/create-node-upgrade-role
Browse files Browse the repository at this point in the history
Move current node upgrade tasks under openshift_node_upgrade role
  • Loading branch information
sdodson committed Feb 2, 2017
2 parents c948081 + 43ecd7b commit 4dfe8c7
Show file tree
Hide file tree
Showing 16 changed files with 531 additions and 80 deletions.
87 changes: 7 additions & 80 deletions playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,7 @@
# and is evaluated early. Values such as "20%" can also be used.
serial: "{{ openshift_upgrade_nodes_serial | default(1) }}"
any_errors_fatal: true
roles:
- openshift_facts
- docker
handlers:
- include: ../../../../roles/openshift_node/handlers/main.yml
static: yes

pre_tasks:
# TODO: To better handle re-trying failed upgrades, it would be nice to check if the node
# or docker actually needs an upgrade before proceeding. Perhaps best to save this until
Expand All @@ -21,17 +16,14 @@
register: node_output
delegate_to: "{{ groups.oo_first_master.0 }}"
changed_when: false
when: inventory_hostname in groups.oo_nodes_to_upgrade

- set_fact:
was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}"
when: inventory_hostname in groups.oo_nodes_to_upgrade

- name: Mark unschedulable if host is a node
command: >
{{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=false
delegate_to: "{{ groups.oo_first_master.0 }}"
when: inventory_hostname in groups.oo_nodes_to_upgrade
# NOTE: There is a transient "object has been modified" error here, allow a couple
# retries for a more reliable upgrade.
register: node_unsched
Expand All @@ -43,83 +35,18 @@
command: >
{{ hostvars[groups.oo_first_master.0].openshift.common.admin_binary }} drain {{ openshift.node.nodename | lower }} --force --delete-local-data
delegate_to: "{{ groups.oo_first_master.0 }}"
when: inventory_hostname in groups.oo_nodes_to_upgrade

tasks:

- include: docker/upgrade.yml
vars:
# We will restart Docker ourselves after everything is ready:
skip_docker_restart: True
when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool

- include: "{{ node_config_hook }}"
when: node_config_hook is defined and inventory_hostname in groups.oo_nodes_to_upgrade

- include: rpm_upgrade.yml
vars:
component: "node"
openshift_version: "{{ openshift_pkg_version | default('') }}"
when: inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool

- name: Remove obsolete docker-sdn-ovs.conf
file: path=/etc/systemd/system/docker.service.d/docker-sdn-ovs.conf state=absent
when: (deployment_type == 'openshift-enterprise' and openshift_release | version_compare('3.4', '>=')) or (deployment_type == 'origin' and openshift_release | version_compare('1.4', '>='))

- include: containerized_node_upgrade.yml
when: inventory_hostname in groups.oo_nodes_to_upgrade and openshift.common.is_containerized | bool

- name: Ensure containerized services stopped before Docker restart
service: name={{ item }} state=stopped
with_items:
- etcd_container
- openvswitch
- "{{ openshift.common.service_type }}-master"
- "{{ openshift.common.service_type }}-master-api"
- "{{ openshift.common.service_type }}-master-controllers"
- "{{ openshift.common.service_type }}-node"
failed_when: false
when: openshift.common.is_containerized | bool

- name: Upgrade openvswitch
package:
name: openvswitch
state: latest
register: ovs_pkg
when: inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool

- name: Restart openvswitch
systemd:
name: openvswitch
state: restarted
when:
- inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool
- ovs_pkg | changed

# Mandatory Docker restart, ensure all containerized services are running:
- include: docker/restart.yml

- name: Restart rpm node service
service: name="{{ openshift.common.service_type }}-node" state=restarted
when: inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool

- name: Wait for node to be ready
command: >
{{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} get node {{ openshift.common.hostname | lower }} --no-headers
register: node_output
delegate_to: "{{ groups.oo_first_master.0 }}"
when: inventory_hostname in groups.oo_nodes_to_upgrade
until: "{{ node_output.stdout.split()[1].startswith('Ready')}}"
# Give the node two minutes to come back online. Note that we pre-pull images now
# so containerized services should restart quickly as well.
retries: 24
delay: 5
roles:
- openshift_facts
- docker
- openshift_node_upgrade

post_tasks:
- name: Set node schedulability
command: >
{{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true
delegate_to: "{{ groups.oo_first_master.0 }}"
when: inventory_hostname in groups.oo_nodes_to_upgrade and was_schedulable | bool
when: was_schedulable | bool
register: node_sched
until: node_sched.rc == 0
retries: 3
Expand Down
108 changes: 108 additions & 0 deletions roles/openshift_node_upgrade/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
OpenShift/Atomic Enterprise Node upgrade
=========

Role responsible for a single node upgrade.
It is expected a node is functioning and a part of an OpenShift cluster.

Requirements
------------

TODO

Role Variables
--------------
From this role:

| Name | Default value | |
|--------------------------------|-----------------------|--------------------------------------------------------|
| deployment_type | | Inventory var |
| docker_upgrade_nuke_images | | Optional inventory var |
| docker_version | | Optional inventory var |
| l_docker_upgrade | | |
| node_config_hook | | |
| openshift.docker.gte_1_10 | | |
| openshift_image_tag | | Set by openshift_version role |
| openshift_pkg_version | | Set by openshift_version role |
| openshift_release | | Set by openshift_version role |
| skip_docker_restart | | |
| openshift_cloudprovider_kind | | |

From openshift.common:

| Name | Default Value | |
|------------------------------------|---------------------|---------------------|
| openshift.common.config_base |---------------------|---------------------|
| openshift.common.data_dir |---------------------|---------------------|
| openshift.common.hostname |---------------------|---------------------|
| openshift.common.http_proxy |---------------------|---------------------|
| openshift.common.is_atomic |---------------------|---------------------|
| openshift.common.is_containerized |---------------------|---------------------|
| openshift.common.portal_net |---------------------|---------------------|
| openshift.common.service_type |---------------------|---------------------|
| openshift.common.use_openshift_sdn |---------------------|---------------------|

From openshift.master:

| Name | Default Value | |
|------------------------------------|---------------------|---------------------|
| openshift.master.api_port |---------------------|---------------------|

From openshift.node:

| Name | Default Value | |
|------------------------------------|---------------------|---------------------|
| openshift.node.debug_level |---------------------|---------------------|
| openshift.node.node_image |---------------------|---------------------|
| openshift.node.ovs_image |---------------------|---------------------|


Dependencies
------------
openshift_common

TODO

Example Playbook
----------------

Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too:

```
---
- name: Upgrade nodes
hosts: oo_nodes_to_upgrade
serial: 1
any_errors_fatal: true
pre_tasks:
- name: Mark unschedulable
command: >
{{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=false
delegate_to: "{{ groups.oo_first_master.0 }}"
- name: Drain Node for Kubelet upgrade
command: >
{{ hostvars[groups.oo_first_master.0].openshift.common.admin_binary }} drain {{ openshift.node.nodename | lower }} --force --delete-local-data
delegate_to: "{{ groups.oo_first_master.0 }}"
roles:
- openshift_facts
- docker
- openshift_node_upgrade
post_tasks:
- name: Set node schedulability
command: >
{{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true
delegate_to: "{{ groups.oo_first_master.0 }}"
```

License
-------

Apache License, Version 2.0

Author Information
------------------

TODO
25 changes: 25 additions & 0 deletions roles/openshift_node_upgrade/files/nuke_images.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

# Stop any running containers
running_container_ids=`docker ps -q`
if test -n "$running_container_ids"
then
docker stop $running_container_ids
fi

# Delete all containers
container_ids=`docker ps -a -q`
if test -n "$container_ids"
then
docker rm -f -v $container_ids
fi

# Delete all images (forcefully)
image_ids=`docker images -aq`
if test -n "$image_ids"
then
# Some layers are deleted recursively and are no longer present
# when docker goes to remove them:
docker rmi -f `docker images -aq` || true
fi

14 changes: 14 additions & 0 deletions roles/openshift_node_upgrade/handlers/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
- name: restart openvswitch
systemd: name=openvswitch state=restarted
when: (not skip_node_svc_handlers | default(False) | bool) and not (ovs_service_status_changed | default(false) | bool) and openshift.common.use_openshift_sdn | bool
notify:
- restart openvswitch pause

- name: restart openvswitch pause
pause: seconds=15
when: (not skip_node_svc_handlers | default(False) | bool) and openshift.common.is_containerized | bool

- name: restart node
systemd: name={{ openshift.common.service_type }}-node state=restarted
when: (not skip_node_svc_handlers | default(False) | bool) and not (node_service_status_changed | default(false) | bool)
13 changes: 13 additions & 0 deletions roles/openshift_node_upgrade/meta/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
galaxy_info:
author: your name
description: OpenShift Node upgrade
company: Red Hat, Inc.
license: Apache License, Version 2.0
min_ansible_version: 2.1
platforms:
- name: EL
versions:
- 7
dependencies:
- role: openshift_common
14 changes: 14 additions & 0 deletions roles/openshift_node_upgrade/tasks/containerized_node_upgrade.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
# This is a hack to allow us to use systemd_units.yml, but skip the handlers which
# restart services. We will unconditionally restart all containerized services
# because we have to unconditionally restart Docker:
- set_fact:
skip_node_svc_handlers: True

- name: Update systemd units
include: systemd_units.yml

# This is a no-op because of skip_node_svc_handlers, but lets us trigger it before end of
# play when the node has already been marked schedulable again. (this would look strange
# in logs otherwise)
- meta: flush_handlers
33 changes: 33 additions & 0 deletions roles/openshift_node_upgrade/tasks/docker/restart.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
# input variables:
# - openshift.common.service_type
# - openshift.common.is_containerized
# - openshift.common.hostname
# - openshift.master.api_port

- name: Restart docker
service: name=docker state=restarted

- name: Update docker facts
openshift_facts:
role: docker

- name: Restart containerized services
service: name={{ item }} state=started
with_items:
- etcd_container
- openvswitch
- "{{ openshift.common.service_type }}-master"
- "{{ openshift.common.service_type }}-master-api"
- "{{ openshift.common.service_type }}-master-controllers"
- "{{ openshift.common.service_type }}-node"
failed_when: false
when: openshift.common.is_containerized | bool

- name: Wait for master API to come back online
wait_for:
host: "{{ openshift.common.hostname }}"
state: started
delay: 10
port: "{{ openshift.master.api_port }}"
when: inventory_hostname in groups.oo_masters_to_config
49 changes: 49 additions & 0 deletions roles/openshift_node_upgrade/tasks/docker/upgrade.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
---
# input variables:
# - openshift.common.service_type
# - openshift.common.is_containerized
# - docker_upgrade_nuke_images
# - docker_version
# - skip_docker_restart

# We need docker service up to remove all the images, but these services will keep
# trying to re-start and thus re-pull the images we're trying to delete.
- name: Stop containerized services
service: name={{ item }} state=stopped
with_items:
- "{{ openshift.common.service_type }}-master"
- "{{ openshift.common.service_type }}-master-api"
- "{{ openshift.common.service_type }}-master-controllers"
- "{{ openshift.common.service_type }}-node"
- etcd_container
- openvswitch
failed_when: false
when: openshift.common.is_containerized | bool

- name: Check Docker image count
shell: "docker images -aq | wc -l"
register: docker_image_count

- debug: var=docker_image_count.stdout

# TODO(jchaloup): put all docker_upgrade_nuke_images into a block with only one condition
- name: Remove all containers and images
script: nuke_images.sh
register: nuke_images_result
when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool

- name: Check Docker image count
shell: "docker images -aq | wc -l"
register: docker_image_count
when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool

- debug: var=docker_image_count.stdout
when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool

- service: name=docker state=stopped

- name: Upgrade Docker
package: name=docker{{ '-' + docker_version }} state=present

- include: restart.yml
when: not skip_docker_restart | default(False) | bool

0 comments on commit 4dfe8c7

Please sign in to comment.