Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions .github/workflows/health_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ on:
# Every 5 minutes
- cron: '*/5 * * * *'

env:
health_check_file: health_check.json
health_check_blocks_file: health_check_blocks.json

jobs:
health_check:
runs-on: ubuntu-latest
Expand All @@ -20,11 +24,47 @@ jobs:
steps:
- uses: actions/checkout@v4

- uses: ./.github/actions/run-docker
- name: Run health check
id: health_check
continue-on-error: true
uses: ./.github/actions/run-docker
with:
target: development
version: local
run: ./scripts/health_check.py --env ${{ matrix.environment }} --verbose
run: |
./scripts/health_check.py \
--env ${{ matrix.environment }} \
--verbose \
--output ${{ env.health_check_file }}

- name: Set message blocks
id: blocks
if: steps.health_check.outcome == 'failure'
shell: bash
run: |
# Create the message blocks file
./scripts/health_check_blocks.py \
--input ${{ env.health_check_file }} \
--output ${{ env.health_check_blocks_file }}
# Multiline output needs to use a delimiter to be passed to
# the GITHUB_OUTPUT file.
blocks=$(cat ${{ env.health_check_blocks_file }})
echo "blocks<<EOF"$'\n'$blocks$'\n'EOF >> $GITHUB_OUTPUT
cat $GITHUB_OUTPUT

- uses: mozilla/addons/.github/actions/slack@main
if: |
github.event_name == 'scheduled' &&
steps.health_check.outcome == 'failure'
with:
slack_token: ${{ secrets.SLACK_TOKEN }}
payload: |
{
"channel": "${{ secrets.SLACK_ADDONS_PRODUCTION_CHANNEL }}",
"blocks": ${{ toJson(steps.blocks.outputs.blocks) }},
"text": "Health check failed",
# Don't unfurl links or media
"unfurl_links": false,
"unfurl_media": false,
}

45 changes: 45 additions & 0 deletions .github/workflows/health_check_completed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Health Check Completed

on:
workflow_run:
workflows: Health Check
types: [completed]

jobs:
context:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Context
id: context
uses: ./.github/actions/context

health_check_failure_notification:
if: |
github.event.workflow_run.event == 'scheduled' &&
github.event.workflow_run.conclusion == 'failure'
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Context
id: context
uses: ./.github/actions/context

- uses: mozilla/addons/.github/actions/slack-workflow-notification@main
with:
slack_token: ${{ secrets.SLACK_TOKEN }}
slack_channel: ${{ secrets.SLACK_ADDONS_PRODUCTION_CHANNEL }}
emoji: ':x:'
actor: ${{ vars.slack_actor }}
conclusion: ${{ github.event.workflow_run.conclusion }}
workflow_id: ${{ github.event.workflow_run.id }}
workflow_url: ${{ github.event.workflow_run.url }}
event: ${{ github.event.workflow_run.event }}
env: ci
ref: ''
ref_link: ''


66 changes: 39 additions & 27 deletions scripts/health_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self, env: ENV_ENUM, verbose: bool = False):
self.environment = ENV_ENUM[env]
self.verbose = verbose

def _fetch(self, path: str) -> dict[str, str] | None:
def _fetch(self, path: str):
url = f'{self.environment.value}/{path}'
if self.verbose:
print(f'Requesting {url} for {self.environment.name}')
Expand All @@ -47,10 +47,13 @@ def _fetch(self, path: str) -> dict[str, str] | None:
}
)

if self.verbose and data is not None:
if data is None:
return {}

if self.verbose:
print(json.dumps(data, indent=2))

return data
return {'url': url, 'data': data}

def version(self):
return self._fetch('__version__')
Expand All @@ -62,49 +65,58 @@ def monitors(self):
return self._fetch('services/__heartbeat__')


def main(env: ENV_ENUM, verbose: bool = False):
def main(env: ENV_ENUM, verbose: bool, retries: int = 0, attempt: int = 0):
fetcher = Fetcher(env, verbose)

version_data = fetcher.version()
heartbeat_data = fetcher.heartbeat()
monitors_data = fetcher.monitors()

if version_data is None:
raise ValueError('Error fetching version data')
combined_data = {
'heartbeat': heartbeat_data,
'monitors': monitors_data,
}

if heartbeat_data is None:
raise ValueError('Error fetching heartbeat data')
has_failures = any(
monitor['state'] is False
for data in combined_data.values()
for monitor in data.get('data', {}).values()
)

if monitors_data is None:
raise ValueError('Error fetching monitors data')
if has_failures and attempt < retries:
wait_for = 2**attempt
if verbose:
print(f'waiting for {wait_for} seconds')
time.sleep(wait_for)
return main(env, verbose, retries, attempt + 1)

combined_data = {**heartbeat_data, **monitors_data}
failing_monitors = [
name for name, monitor in combined_data.items() if monitor['state'] is False
]
results = {
'version': version_data,
'heartbeat': heartbeat_data,
'monitors': monitors_data,
}

if len(failing_monitors) > 0:
raise ValueError(f'Some monitors are failing {failing_monitors}')
return results, has_failures


if __name__ == '__main__':
args = argparse.ArgumentParser()
args.add_argument(
'--env', type=str, choices=list(ENV_ENUM.__members__.keys()), required=True
)
args.add_argument('--output', type=str)
args.add_argument('--verbose', action='store_true')
args.add_argument('--retries', type=int, default=3)
args = args.parse_args()

attempt = 1
data, has_failures = main(args.env, args.verbose, args.retries)

while attempt <= args.retries:
try:
main(args.env, args.verbose)
break
except Exception as e:
print(f'Error: {e}')
if attempt == args.retries:
raise
time.sleep(2**attempt)
attempt += 1
if args.output:
with open(args.output, 'w') as f:
json_data = json.dumps(data, indent=2)
f.write(json_data)
if args.verbose:
print(f'Health check data saved to {args.output}')

if has_failures:
raise ValueError(f'Health check failed: {data}')
141 changes: 141 additions & 0 deletions scripts/health_check_blocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env python3

import argparse
import json


def format_monitors(data: dict, source: str):
monitors = data['data']
failures = []

for name, monitor in monitors.items():
if not monitor['state']:
failures.append(
{
'type': 'rich_text_section',
'elements': [
{
'type': 'text',
'text': f'{name}: ',
'style': {
'bold': True,
},
},
{
'type': 'text',
'text': f'{monitor["status"]}',
},
],
}
)

if failures:
return {
'type': 'rich_text',
'elements': [
{
'type': 'rich_text_section',
'elements': [
{
'type': 'text',
'text': f'{source.capitalize()}:',
'style': {
'bold': True,
},
}
],
},
{
'type': 'rich_text_list',
'elements': failures,
'style': 'bullet',
'indent': 0,
'border': 1,
},
],
}


def format_context(data: dict):
version_data = data.get('version', {}).get('data', {})
version_elements = [
{'type': 'mrkdwn', 'text': f'{key.capitalize()}: {value} |'}
for key, value in version_data.items()
if value and key in ['version', 'commit', 'build']
]
url_elements = [
{'type': 'mrkdwn', 'text': f'<{data["url"]}|{name.capitalize()}> |'}
for name, data in data.items()
]
return {'type': 'context', 'elements': version_elements + url_elements}


def format_header(emoji: str, text: setattr):
return {
'type': 'rich_text',
'elements': [
{
'type': 'rich_text_section',
'elements': [
{
'type': 'emoji',
'name': emoji,
},
{
'type': 'text',
'text': 'Health Check Alert: ',
'style': {'bold': True},
},
{
'type': 'text',
'text': text,
},
],
}
],
}


def create_blocks(health_data: dict):
"""Create a Slack message from health check data."""
failing_monitors = []

for name, data in health_data.items():
if name in ['monitors', 'heartbeat']:
if monitors := format_monitors(data, name):
failing_monitors.append(monitors)

if not failing_monitors:
return []

return [
format_header('x', 'Issues Detected'),
*failing_monitors,
format_context(health_data),
]


def main():
args = argparse.ArgumentParser()
args.add_argument('--input', type=str, required=True)
args.add_argument('--output', type=str, required=True)
args.add_argument('--verbose', action='store_true')

args = args.parse_args()

with open(args.input) as f:
health_data = json.load(f)

if args.verbose:
print(f'Health data loaded from {args.input}')

blocks = create_blocks(health_data)
with open(args.output, 'w') as f:
json.dump(blocks, f)

if args.verbose:
print(f'Blocks saved to {args.output}')


if __name__ == '__main__':
main()
Loading
Loading