Skip to content

Commit

Permalink
Monitor: Restart WSGI if the Scheduler is stalled & then restart all
Browse files Browse the repository at this point in the history
monitoring tasks
  • Loading branch information
flavour committed Feb 14, 2020
1 parent 40af104 commit 7333c7c
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 13 deletions.
2 changes: 1 addition & 1 deletion VERSION
@@ -1 +1 @@
b'7a63bf3c4' (2020-02-14 10:23:51)
b'40af104dd' (2020-02-14 11:43:58)
6 changes: 5 additions & 1 deletion modules/s3/s3task.py
Expand Up @@ -73,7 +73,11 @@ def __init__(self):
# Warning should already have been given by eden_update_check.py
self.scheduler = None
else:
self.scheduler = Scheduler(current.db, tasks, migrate=migrate)
self.scheduler = Scheduler(current.db,
tasks,
migrate = migrate,
#use_spawn = True # Possible subprocess method with Py3
)

# -------------------------------------------------------------------------
def configure_tasktable_crud(self,
Expand Down
43 changes: 42 additions & 1 deletion modules/s3db/setup.py
Expand Up @@ -43,6 +43,7 @@
#"setup_DeploymentRepresent",
#"setup_MonitorTaskRepresent",
"setup_monitor_run_task",
"setup_monitor_task_restart",
"setup_monitor_check_email_reply",
"setup_instance_settings_read",
#"setup_write_playbook",
Expand Down Expand Up @@ -2877,6 +2878,46 @@ def setup_monitor_task_disable_interactive(r, **attr):
current.session.confirmation = result
redirect(URL(f = "monitor_task"))

# =============================================================================
def setup_monitor_task_restart():
"""
Restart all Enabled Monitor Tasks
CLI API for shell scripts & to be called by S3Method
"""

db = current.db
s3db = current.s3db

# Clear all current Tasks from the Scheduler
ttable = s3db.scheduler_task
db(ttable.function_name == "setup_monitor_run_task").delete()

# Schedule all Enabled Tasks on all Enabled Servers
stable = s3db.setup_monitor_server
query = (stable.enabled == True) & \
(stable.deleted == False)
servers = db(query).select(stable.server_id)
servers = [s.server_id for s in servers]

table = s3db.setup_monitor_task
query = (table.server_id.belongs(servers)) & \
(table.enabled == True) & \
(table.deleted == False)
tasks = db(query).select(table.id,
table.period,
)
schedule_task = current.s3task.schedule_task
for task in tasks:
schedule_task("setup_monitor_run_task",
args = [task.id],
period = task.period, # seconds
timeout = 300, # seconds
repeats = 0 # unlimited
)

return "Monitor Tasks restarted"

# =============================================================================
def setup_monitor_task_run(r, **attr):
"""
Expand Down Expand Up @@ -3691,7 +3732,7 @@ def setup_setting_apply(setting_id):
#"become_user": "web2py",
},
{"name": "Restart WebServer",
# We don't want to restart the UWSGI process running the Task until after the Task has completed
# We don't want to restart the WSGI process running the Task until after the Task has completed
#"service": {"name": service_name,
# "state": "restarted",
# },
Expand Down
50 changes: 42 additions & 8 deletions modules/templates/default/monitor.py
Expand Up @@ -488,22 +488,44 @@ def scheduler(task_id, run_id):
earliest = current.request.utcnow - datetime.timedelta(seconds = 900) # 15 minutes

if server.host_ip == "127.0.0.1":
# This doesn't make much sense as a check, since this won't run if the scheduler has died!
# This shouldn't make much sense as a check, since this won't run if the scheduler has died
# - however in practise, it can actually provide useful warning!

wtable = s3db.scheduler_worker
worker = db(wtable.status == "ACTIVE").select(wtable.last_heartbeat,
limitby = (0, 1)
).first()

error = None
if worker is None:
return {"result": "Warning: Scheduler not ACTIVE",
error = "Warning: Scheduler not ACTIVE"

elif worker.last_heartbeat < earliest:
error = "Warning: Scheduler stalled since %s" % worker.last_heartbeat.strftime("%H:%M %a %d %b")

if error:
appname = options_get("appname", "eden")
instance = options_get("instance", "prod")

# Restart uwsgi
error += "\n\nAttempting to restart:\n"
# Note this needs to actually run after last task as it kills us ;)
command = 'echo "sudo service uwsgi-%s restart" | at now + 1 minutes' % instance
output = subprocess.check_output(command,
stderr = subprocess.STDOUT,
shell = True)
error += output
# Restart Monitoring Scripts
command = 'echo "cd /home/%s;python web2py.py --no-banner -S %s -M -R applications/%s/static/scripts/tools/restart_monitor_tasks.py" | at now + 5 minutes' % \
(instance, appname, appname)
output = subprocess.check_output(command,
stderr = subprocess.STDOUT,
shell = True)
error += output
return {"result": error,
"status": 3,
}

if worker.last_heartbeat < earliest:
return {"result": "Warning: Scheduler stalled since:\n\n%s" % worker.last_heartbeat,
"status": 3,
}
return {"result": "OK",
"status": 1,
}
Expand All @@ -521,12 +543,24 @@ def scheduler(task_id, run_id):
(instance, appname, appname, earliest)
stdin, stdout, stderr = ssh.exec_command(command)
outlines = stdout.readlines()
ssh.close()

if outlines:
return {"result": outlines[0],
error = outlines[0]
# Restart uwsgi
error += "\n\nAttempting to restart:\n"
command = "sudo service uwsgi-%s restart" % instance
stdin, stdout, stderr = ssh.exec_command(command)
outlines = stdout.readlines()
if outlines:
error += "\n".join(outlines)
else:
# Doesn't usually give any output
error += "OK"
ssh.close()
return {"result": error,
"status": 3,
}
ssh.close()

return {"result": "OK",
"status": 1,
Expand Down
4 changes: 2 additions & 2 deletions static/scripts/tools/check_scheduler.py
Expand Up @@ -4,7 +4,7 @@
# Script to check the status of the Scheduler Worker
#
# Run as:
# python web2py.py -S eden -M -R applications/eden/static/scripts/tools/check_scheduler.py -A earliest
# python web2py.py --no-banner -S eden -M -R applications/eden/static/scripts/tools/check_scheduler.py -A earliest

import datetime
import sys
Expand All @@ -26,4 +26,4 @@
print("Warning: Scheduler not ACTIVE")

elif worker.last_heartbeat < datetime.datetime.fromisoformat(earliest):
print("Warning: Scheduler stalled since:\n\n%s" % worker.last_heartbeat)
print("Warning: Scheduler stalled since %s" % worker.last_heartbeat.strftime("%H:%M %a %d %b"))
10 changes: 10 additions & 0 deletions static/scripts/tools/restart_monitor_tasks.py
@@ -0,0 +1,10 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Script to reset all Monitor Taks on all Enabled Servers
#
# Run as:
# python web2py.py --no-banner -S eden -M -R applications/eden/static/scripts/tools/restart_monitor_tasks.py

output = s3db.setup_monitor_task_restart()
print(output)

0 comments on commit 7333c7c

Please sign in to comment.