Skip to content

Commit

Permalink
Implement 'pods' endpoint for info/restart/replace (#388)
Browse files Browse the repository at this point in the history
Also makes several improvements to:
- logging in offer evaluation code, which was done while debugging an issue during 'pods replace' development.
- hooking up all the config options in the default hello-world config
  • Loading branch information
nickbp committed Dec 23, 2016
1 parent 4c10b70 commit 800c67d
Show file tree
Hide file tree
Showing 42 changed files with 1,432 additions and 313 deletions.
52 changes: 52 additions & 0 deletions cli/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,57 @@ func HandlePlanSection(app *kingpin.Application) {
restart.Arg("step", "UUID of Step to be restarted").Required().StringVar(&cmd.StepId)
}

// Pods section

type PodsHandler struct {
PodName string
}

func (cmd *PodsHandler) RunList(c *kingpin.ParseContext) error {
PrintJSON(HTTPGet("v1/pods"))
return nil
}
func (cmd *PodsHandler) RunStatus(c *kingpin.ParseContext) error {
if len(cmd.PodName) == 0 {
PrintJSON(HTTPGet("v1/pods/status"))
} else {
PrintJSON(HTTPGet(fmt.Sprintf("v1/pods/%s/status", cmd.PodName)))
}
return nil
}
func (cmd *PodsHandler) RunInfo(c *kingpin.ParseContext) error {
PrintJSON(HTTPGet(fmt.Sprintf("v1/pods/%s/info", cmd.PodName)))
return nil
}
func (cmd *PodsHandler) RunRestart(c *kingpin.ParseContext) error {
PrintText(HTTPPost(fmt.Sprintf("v1/pods/%s/restart", cmd.PodName)))
return nil
}
func (cmd *PodsHandler) RunReplace(c *kingpin.ParseContext) error {
PrintText(HTTPPost(fmt.Sprintf("v1/pods/%s/replace", cmd.PodName)))
return nil
}

func HandlePodsSection(app *kingpin.Application) {
// pods [status [name], info <name>, restart <name>, replace <name>]
cmd := &PodsHandler{}
pods := app.Command("pods", "View Pod/Task state")

pods.Command("list", "Display the list of known pod instances").Action(cmd.RunList)

status := pods.Command("status", "Display the status for tasks in one pod or all pods").Action(cmd.RunStatus)
status.Arg("pod", "Name of a specific pod instance to display").StringVar(&cmd.PodName)

info := pods.Command("info", "Display the full state information for tasks in a pod").Action(cmd.RunInfo)
info.Arg("pod", "Name of the pod instance to display").Required().StringVar(&cmd.PodName)

restart := pods.Command("restart", "Restarts a given pod without moving it to a new agent").Action(cmd.RunRestart)
restart.Arg("pod", "Name of the pod instance to restart").Required().StringVar(&cmd.PodName)

replace := pods.Command("replace", "Destroys a given pod and moves it to a new agent").Action(cmd.RunReplace)
replace.Arg("pod", "Name of the pod instance to replace").Required().StringVar(&cmd.PodName)
}

// State section

type StateHandler struct {
Expand All @@ -283,6 +334,7 @@ func (cmd *StateHandler) RunTasks(c *kingpin.ParseContext) error {
return nil
}

// TODO remove this command once callers have migrated to HandlePodsSection().
func HandleStateSection(app *kingpin.Application) {
// state <framework_id, status, task, tasks>
cmd := &StateHandler{}
Expand Down
2 changes: 1 addition & 1 deletion frameworks/hdfs/cli/dcos-hdfs/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func main() {
cli.HandleConfigSection(app)
cli.HandleEndpointsSection(app)
cli.HandlePlanSection(app)
cli.HandleStateSection(app)
cli.HandlePodsSection(app)
handleNodeSection(app, modName)

// Omit modname:
Expand Down
20 changes: 10 additions & 10 deletions frameworks/helloworld/README.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
Refer to the [top-level tutorial document](../../docs/tutorial.md).
Refer to the [top-level tutorial document](../../docs/pages/tutorial.md).

# Sample configuration files

Four sample configuration files are given here (change the name of the input yml file at "universe/marathon.json.mustache"):

- `svc.yml` (only one pod)
- `svc_simple.yml` (two pods, with disk)
- `svc_plan.yml` (two pods, with healthcheck, with disk, with port, and with a plan structure)
- `svc_uri.yml` (sample pod and task specific uri's that are downloaded before task launch)
- [svc.yml](src/main/dist/svc.yml): Two pods, with volumes. Used in the 'official' `hello-world` package.
- [svc_simple.yml](src/main/dist/svc_simple.yml): Bare minimum example. Just one pod, with no extra features.
- [svc_plan.yml](src/main/dist/svc_plan.yml): Two pods, with healthcheck, volumes, ports, and with a plan structure.
- [svc_uri.yml](src/main/dist/svc_uri.yml): Sample pod and task-specific URIs that are downloaded before task launch.

# Framework API Port

Please note that Marathon dynamically selects a port number, and passes this information to the framework (`PORT0` in our examples). We start the API service on that given port number. You can start the framework by giving a specific port number, if you are sure that the port is available to you. This is very unlikely, so api-port should be set to a variable as shown below:
Please note that Marathon dynamically selects a port number, and passes this information to the framework (`PORT0` in our examples). We start the API service on that given port number. You can start the framework by giving a specific port number, if you are sure that the port is available to you. This is very unlikely, so the api-port should be set to a variable as shown below:
```
api-port : {{PORT0}}
```

See [universe/marathon.json.mustache](universe/marathon.json.mustache) for more information:
See [marathon.json.mustache](universe/marathon.json.mustache) for more information:
```
[...]
"DCOS_MIGRATION_API_PATH": "/v1/plan",
Expand All @@ -28,9 +30,7 @@ See [universe/marathon.json.mustache](universe/marathon.json.mustache) for more
"port": 0,
"protocol": "tcp",
"name": "api",
}
[...]
```




2 changes: 1 addition & 1 deletion frameworks/helloworld/cli/dcos-hello-world/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func main() {
cli.HandleConfigSection(app)
cli.HandleEndpointsSection(app)
cli.HandlePlanSection(app)
cli.HandleStateSection(app)
cli.HandlePodsSection(app)
handleExampleSection(app)

// Omit modname:
Expand Down
120 changes: 113 additions & 7 deletions frameworks/helloworld/integration/tests/test_sanity.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
import dcos.http
import json
import pytest
import re
import shakedown

from tests.test_utils import (
DEFAULT_TASK_COUNT,
PACKAGE_NAME,
check_health,
get_marathon_config,
get_deployment_plan,
get_task_count,
install,
marathon_api_url,
request,
run_dcos_cli_cmd,
uninstall,
spin
)


def setup_module(module):
uninstall()

install()

check_health()


Expand Down Expand Up @@ -55,13 +56,12 @@ def test_bump_hello_cpus():
config = get_marathon_config()
cpus = float(config['env']['HELLO_CPUS'])
config['env']['HELLO_CPUS'] = str(cpus + 0.1)
r = request(
request(
dcos.http.put,
marathon_api_url('apps/' + PACKAGE_NAME),
json=config)

tasks_updated('hello', hello_ids)

check_health()


Expand All @@ -75,15 +75,121 @@ def test_bump_hello_nodes():
config = get_marathon_config()
nodeCount = int(config['env']['HELLO_COUNT']) + 1
config['env']['HELLO_COUNT'] = str(nodeCount)
r = request(
request(
dcos.http.put,
marathon_api_url('apps/' + PACKAGE_NAME),
json=config)

check_health(DEFAULT_TASK_COUNT + 1)
check_health()
tasks_not_updated('hello', hello_ids)


@pytest.mark.sanity
def test_pods_list():
stdout = run_dcos_cli_cmd('hello-world pods list')
jsonobj = json.loads(stdout)
assert len(jsonobj) == get_task_count()
# expect: X instances of 'hello-#' followed by Y instances of 'world-#',
# in alphanumerical order
first_world = -1
for i in range(len(jsonobj)):
entry = jsonobj[i]
if first_world < 0:
if entry.startswith('world-'):
first_world = i
if first_world == -1:
assert jsonobj[i] == 'hello-{}'.format(i)
else:
assert jsonobj[i] == 'world-{}'.format(i - first_world)


@pytest.mark.sanity
def test_pods_status_all():
stdout = run_dcos_cli_cmd('hello-world pods status')
jsonobj = json.loads(stdout)
assert len(jsonobj) == get_task_count()
for k,v in jsonobj.items():
assert re.match('(hello|world)-[0-9]+', k)
assert len(v) == 1
task = v[0]
assert len(task) == 3
assert re.match('(hello|world)-[0-9]+-server__[0-9a-f-]+', task['id'])
assert re.match('(hello|world)-[0-9]+-server', task['name'])
assert task['state'] == 'TASK_RUNNING'


@pytest.mark.sanity
def test_pods_status_one():
stdout = run_dcos_cli_cmd('hello-world pods status hello-0')
jsonobj = json.loads(stdout)
assert len(jsonobj) == 1
task = jsonobj[0]
assert len(task) == 3
assert re.match('hello-0-server__[0-9a-f-]+', task['id'])
assert task['name'] == 'hello-0-server'
assert task['state'] == 'TASK_RUNNING'


@pytest.mark.sanity
def test_pods_info():
stdout = run_dcos_cli_cmd('hello-world pods info world-1')
jsonobj = json.loads(stdout)
assert len(jsonobj) == 1
task = jsonobj[0]
assert len(task) == 2
assert task['info']['name'] == 'world-1-server'
assert task['info']['taskId']['value'] == task['status']['taskId']['value']
assert task['status']['state'] == 'TASK_RUNNING'


@pytest.mark.sanity
def test_pods_restart():
hello_ids = get_task_ids('hello-0')

# get current agent id:
stdout = run_dcos_cli_cmd('hello-world pods info hello-0')
old_agent = json.loads(stdout)[0]['info']['slaveId']['value']

stdout = run_dcos_cli_cmd('hello-world pods restart hello-0')
jsonobj = json.loads(stdout)
assert len(jsonobj) == 2
assert jsonobj['pod'] == 'hello-0'
assert len(jsonobj['tasks']) == 1
assert jsonobj['tasks'][0] == 'hello-0-server'

tasks_updated('hello', hello_ids)
check_health()

# check agent didn't move:
stdout = run_dcos_cli_cmd('hello-world pods info hello-0')
new_agent = json.loads(stdout)[0]['info']['slaveId']['value']
assert old_agent == new_agent


@pytest.mark.sanity
def test_pods_replace():
world_ids = get_task_ids('world-0')

# get current agent id:
stdout = run_dcos_cli_cmd('hello-world pods info world-0')
old_agent = json.loads(stdout)[0]['info']['slaveId']['value']

jsonobj = json.loads(run_dcos_cli_cmd('hello-world pods replace world-0'))
assert len(jsonobj) == 2
assert jsonobj['pod'] == 'world-0'
assert len(jsonobj['tasks']) == 1
assert jsonobj['tasks'][0] == 'world-0-server'

tasks_updated('world-0', world_ids)
check_health()

# check agent moved:
stdout = run_dcos_cli_cmd('hello-world pods info world-0')
new_agent = json.loads(stdout)[0]['info']['slaveId']['value']
# TODO: enable assert if/when agent is guaranteed to change (may randomly move back to old agent)
#assert old_agent != new_agent


def get_task_ids(prefix):
tasks = shakedown.get_service_tasks(PACKAGE_NAME)
prefixed_tasks = [t for t in tasks if t['name'].startswith(prefix)]
Expand Down
3 changes: 1 addition & 2 deletions frameworks/helloworld/integration/tests/test_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ def test_upgrade():


def get_pkg_version():
cmd = 'dcos package describe {}'.format(PACKAGE_NAME)
pkg_description = run_dcos_cli_cmd(cmd)
pkg_description = run_dcos_cli_cmd('package describe {}'.format(PACKAGE_NAME))
regex = r'"version": "(\S+)"'
match = re.search(regex, pkg_description)
return match.group(1)
Expand Down
20 changes: 14 additions & 6 deletions frameworks/helloworld/integration/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
WAIT_TIME_IN_SECONDS = 15 * 60

TASK_RUNNING_STATE = 'TASK_RUNNING'
DEFAULT_TASK_COUNT = 1


# expected SECURITY values: 'permissive', 'strict', 'disabled'
Expand All @@ -31,7 +30,14 @@
DEFAULT_OPTIONS_DICT = {}


def check_health(expected_tasks = DEFAULT_TASK_COUNT):
def get_task_count():
config = get_marathon_config()
return int(config['env']['HELLO_COUNT']) + int(config['env']['WORLD_COUNT'])


def check_health():
expected_tasks = get_task_count()

def fn():
try:
return shakedown.get_service_tasks(PACKAGE_NAME)
Expand Down Expand Up @@ -170,9 +176,11 @@ def success_predicate(response):

return spin(request_fn, success_predicate, *args, **kwargs)


def run_dcos_cli_cmd(cmd):
print('Running {}'.format(cmd))
stdout = subprocess.check_output(cmd, shell=True).decode('utf-8')
print(stdout)
(stdout, stderr, ret) = shakedown.run_dcos_command(cmd)
if ret != 0:
err = "Got error code {} when running command 'dcos {}':\nstdout: {}\nstderr: {}".format(
ret, cmd, stdout, stderr)
print(err)
raise Exception(err)
return stdout
24 changes: 21 additions & 3 deletions frameworks/helloworld/src/main/dist/svc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,31 @@ api-port: {{PORT0}}
pods:
hello:
count: {{HELLO_COUNT}}
placement: {{HELLO_PLACEMENT}}
tasks:
server:
goal: RUNNING
cmd: "echo hello >> hello-container-path/output && sleep 1000"
cmd: "echo hello >> hello-container-path/output && sleep $SLEEP_DURATION"
cpus: {{HELLO_CPUS}}
memory: 256
memory: {{HELLO_MEM}}
volumes:
- path: "hello-container-path"
type: ROOT
size: 50
size: {{HELLO_DISK}}
env:
SLEEP_DURATION: {{SLEEP_DURATION}}
world:
count: {{WORLD_COUNT}}
placement: {{WORLD_PLACEMENT}}
tasks:
server:
goal: RUNNING
cmd: "echo world >> world-container-path/output && sleep $SLEEP_DURATION"
cpus: {{WORLD_CPUS}}
memory: {{WORLD_MEM}}
volumes:
- path: "world-container-path"
type: ROOT
size: {{WORLD_DISK}}
env:
SLEEP_DURATION: {{SLEEP_DURATION}}
Loading

0 comments on commit 800c67d

Please sign in to comment.