Skip to content

Commit

Permalink
Rough draft of status, fetchkill, and reconfigure-n. Many subsystems …
Browse files Browse the repository at this point in the history
…like logfetch and termination are changed because of status.
  • Loading branch information
timf committed Apr 5, 2011
1 parent b2ab08d commit 81a1c21
Show file tree
Hide file tree
Showing 23 changed files with 980 additions and 193 deletions.
4 changes: 2 additions & 2 deletions etc/epumgmt/events.conf
Expand Up @@ -4,8 +4,8 @@

# 1. A common place on every VM instance where the logs with events exist
# (this is the "vmlogdir" configuration here).
# 2. A common user login for every VM instance (configured in iaas.conf)
# 3. The localsshkeypath configuration in iaas.conf allows access.
# 2. A common user login for every VM instance (configured in launch plan)
# 3. The localsshkeypath configuration in launch plan allows access.

vmlogdir: /home/cc/app/logs

Expand Down
1 change: 1 addition & 0 deletions etc/epumgmt/internal.conf
Expand Up @@ -35,3 +35,4 @@ Common: epumgmt.defaults.DefaultCommon
Parameters: epumgmt.defaults.DefaultParameters
Runlogs: epumgmt.defaults.DefaultRunlogs
EventGather: epumgmt.defaults.DefaultEventGather
RemoteSvcAdapter: epumgmt.defaults.DefaultRemoteSvcAdapter
1 change: 1 addition & 0 deletions etc/epumgmt/main.conf
Expand Up @@ -15,3 +15,4 @@ dirs: dirs.conf
events: events.conf
internal: internal.conf
logging: logging.conf
svcadapter: svcadapter.conf
13 changes: 13 additions & 0 deletions etc/epumgmt/svcadapter.conf
@@ -0,0 +1,13 @@
[svcadapter]

# Name of the dep variable in the launch plan that contains the sysname of the services
exchange_scope_key: exchange_scope

# Prefix of the dep variables in the launch plan that contain the actual service names of EPU controllers
controller_prefix: epu_controller

# Path to the home directory of the remote epu software (this tight coupling will go away in time)
homedir: /home/cc/app

# Path to the environment file of the remote epu software (this tight coupling will go away in time)
envfile: /home/cc/app-venv/bin/activate
47 changes: 38 additions & 9 deletions src/python/epumgmt/api/__init__.py
Expand Up @@ -11,18 +11,18 @@ def get_default_ac():
return ac

def get_parameters(opts, ac=None):
if ac == None:
if ac is None:
ac = get_default_ac()
p_cls = get_class_by_keyword("Parameters", allconfigs=ac)
p = p_cls(ac, opts)
return (p, ac)

def get_common(opts=None, p=None, ac=None):
if p == None and opts == None:
if p is None and opts is None:
raise Exception("either opts of p must be specified")
if ac == None:
if ac is None:
ac = get_default_ac()
if p == None:
if p is None:
(p, ac) = get_parameters(opts, ac)
c_cls = get_class_by_keyword("Common", allconfigs=ac)
c = c_cls(p)
Expand All @@ -48,13 +48,12 @@ def __init__(self):
# Assumed that harness can ssh to this node
self.hostname = None

# The haservice that caused this VM to be started.
# If this VM was heard about from intaking an EPU controller's log
# files (i.e., a worker VM for that EPU controller), then the value
# will be that haservice name plus the constant WORKER_SUFFIX (see
# above).
# The svc that caused this VM to be started.
self.service_type = None

# If this node was launched by an EPU Controller, the controller service endpoint name
self.parent = None

# Absolute path to the localhost directory of log files to look
# for events that happened on this vm
self.runlogdir = None
Expand All @@ -65,3 +64,33 @@ def __init__(self):

# List of events that have parsed and recorded so far.
self.events = []

class WorkerInstanceState:
"""Object to store worker instance information from an EPU Controller state query
"""
def __init__(self):
self.nodeid = None
self.parent_controller = None
self.iaas_state = None # string, epu.states.*
self.iaas_state_time = -1 # seconds since epoch or -1
self.heartbeat_state = None # string, epu.epucontroller.health.NodeHealthState.*
self.heartbeat_time = -1 # seconds since epoch or -1

class EPUControllerState:
"""Object to store a new EPU Controller information capture
"""

def __init__(self):
# Time when this set of data was fetched
self.capture_time = -1 # seconds since epoch or -1

# Actual controller service endpoint name
self.controller_name = None

self.de_state = None # stable engine or not - (a decision engine is not required to implement this)
self.de_conf_report = None # Configuration report - (a decision engine is not required to implement this)
self.last_queuelen_size = -1 # 0-N integer, or -1 if no report
self.last_queuelen_time = -1 # seconds since epoch or -1

# List of WorkerInstanceState
self.instances = []
7 changes: 3 additions & 4 deletions src/python/epumgmt/api/actions.py
@@ -1,17 +1,16 @@
class ACTIONS:

EXECUTE_WORKLOAD_TEST = "execute-workload-test"
#FETCH_KILL = "fetchkill"
FETCH_KILL = "fetchkill"
FIND_WORKERS_ONCE = "find-workers"
GENERATE_GRAPH = "generate-graph"
KILLRUN = "killrun"
LOAD = "load"
LOGFETCH = "logfetch"
RECONFIGURE_N = "reconfigure-n"
STATUS = "status"
UPDATE_EVENTS = "update-events"

# For later:
#STATUS = "worker-status"

def all_actions(self):
"""Return the values of all Python members of this class whose
identifiers are capitalized. So if you add an action, make sure
Expand Down
1 change: 1 addition & 0 deletions src/python/epumgmt/defaults/__init__.py
Expand Up @@ -2,3 +2,4 @@
from parameters import DefaultParameters
from runlogs import DefaultRunlogs
from event_gather import DefaultEventGather
from svc_adapter import DefaultRemoteSvcAdapter
4 changes: 3 additions & 1 deletion src/python/epumgmt/defaults/cloudinitd_load.py
Expand Up @@ -30,12 +30,14 @@ def load_for_destruction(p, c, m, run_name, cloudinitd_dbdir):
continue_on_error=True,
service_callback=service_callback) #, log=c.log)

def load(p, c, m, run_name, cloudinitd_dbdir, silent=False, terminate=False, wholerun=False):
def load(p, c, m, run_name, cloudinitd_dbdir, silent=False, terminate=False, wholerun=True):
"""Load any EPU related instances from a local cloudinit.d launch with the same run name.
"""

try:
cb = CloudInitD(cloudinitd_dbdir, db_name=run_name, terminate=terminate, boot=False, ready=False)
cb.start()
cb.block_until_complete()
except APIUsageException, e:
raise IncompatibleEnvironment("Problem loading records from cloudinit.d: %s" % str(e))
svc_list = cb.get_all_services()
Expand Down
29 changes: 29 additions & 0 deletions src/python/epumgmt/defaults/epustates.py
@@ -0,0 +1,29 @@
"""Copy of instance states
"""

REQUESTING = '100-REQUESTING'
"""Request has been made but not acknowledged through SA"""

REQUESTED = '200-REQUESTED'
"""Request has been acknowledged by provisioner"""

ERROR_RETRYING = '300-ERROR_RETRYING'
"""Request encountered an error but is still being attempted"""

PENDING = '400-PENDING'
"""Request is pending in IaaS layer"""

STARTED = '500-STARTED'
"""Instance has been started in IaaS layer"""

RUNNING = '600-RUNNING'
"""Instance has been contextualized and is operational"""

TERMINATING = '700-TERMINATING'
"""Termination of the instance has been requested"""

TERMINATED = '800-TERMINATED'
"""Instance has been terminated in IaaS layer"""

FAILED = '900-FAILED'
"""Instance has failed and will not be retried"""
2 changes: 1 addition & 1 deletion src/python/epumgmt/defaults/event_gather.py
Expand Up @@ -66,9 +66,9 @@ def _fill_one(self, vm):
vm.events.append(event)

def _all_events_in_dir(self, logdir):
self.c.log.debug("Getting events from '%s'" % logdir)
events = []
for fullpath in self.dirwalk(logdir):
self.c.log.debug("Looking in '%s'" % fullpath)
events.extend(cyvents.events_from_file(fullpath))
return events

Expand Down
27 changes: 22 additions & 5 deletions src/python/epumgmt/defaults/runlogs.py
Expand Up @@ -3,7 +3,6 @@

from epumgmt.api.exceptions import *
import epumgmt.main.em_args as em_args
from epumgmt.api.actions import ACTIONS

import child

Expand Down Expand Up @@ -75,7 +74,7 @@ def new_vm(self, newvm):

newvm.vmlogdir = self.allvmslogdir

def get_scp_command_str(self, c, vm, cloudinitd):
def _scp_command_common(self, c, vm, cloudinitd):
if not vm.hostname:
c.log.warn("Cannot retrieve logs for '%s', hostname is unknown" % vm.instanceid)
return None
Expand All @@ -87,8 +86,6 @@ def get_scp_command_str(self, c, vm, cloudinitd):
raise IncompatibleEnvironment("Problem finding the provisioner node in cloudinit.d, "
"cannot fetch any worker logs without it: %s" % str(e))

source = vm.vmlogdir
dest = vm.runlogdir
forcehost = None
try:
svc = get_cloudinitd_service(cloudinitd, vm.service_type)
Expand All @@ -97,7 +94,27 @@ def get_scp_command_str(self, c, vm, cloudinitd):
c.log.warn("cloudinit.d is unaware of '%s' but it is not an EPU worker?" % vm.service_type)
svc = provisioner
forcehost = vm.hostname
return svc, forcehost

def get_scp_command_str(self, c, vm, cloudinitd):
common = self._scp_command_common(c, vm, cloudinitd)
if not common:
return None
svc = common[0]
forcehost = common[1]
source = vm.vmlogdir
dest = vm.runlogdir
return svc.get_scp_command(source, dest, recursive=True, forcehost=forcehost)

def get_onefile_scp_command_str(self, c, vm, cloudinitd, logfilename):
common = self._scp_command_common(c, vm, cloudinitd)
if not common:
return None
svc = common[0]
forcehost = common[1]
source = os.path.join(vm.vmlogdir, logfilename)
dest = vm.runlogdir
return svc.get_scp_command(source, dest, recursive=False, forcehost=forcehost)

def fetch_logs(self, scpcmd):
if not self.validated:
Expand All @@ -113,7 +130,7 @@ def _run_one_cmd(self, cmd):
self.c.log.error("TIMED OUT: '%s'" % cmd)
return False

if retcode == 0:
if not retcode:
self.c.log.debug("command succeeded: '%s'" % cmd)
return True
else:
Expand Down

0 comments on commit 81a1c21

Please sign in to comment.