Skip to content
This repository has been archived by the owner on Aug 2, 2023. It is now read-only.

Commit

Permalink
Merge pull request #65 from microsoft/zhiyuhe/event_api
Browse files Browse the repository at this point in the history
add event api
  • Loading branch information
hzy46 committed Oct 14, 2020
2 parents 84e9ffb + 1e88f89 commit 6903b18
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 2 deletions.
16 changes: 15 additions & 1 deletion src/api/v2/clients/jobClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
import { Util } from '@pai/commom/util';
import * as yaml from 'js-yaml';

import { IJobListQeury } from '../models/job';
import { IEventListQuery, IJobListQeury } from '../models/job';

import { OpenPAIBaseClient } from './baseClient';

Expand Down Expand Up @@ -122,4 +122,18 @@ export class JobClient extends OpenPAIBaseClient {
);
return await this.httpClient.delete(url, undefined, { data: { value: tag } });
}

/**
* Get the events of a job.
* @param userName The user name.
* @param jobName The job name.
* @param query filter jobs by event type
*/
public async listEvents(userName: string, jobName: string, query?: IEventListQuery): Promise<any> {
const url: string = Util.fixUrl(
`${this.cluster.rest_server_uri}/api/v2/jobs/${userName}~${jobName}/events`,
this.cluster.https
);
return await this.httpClient.get(url, undefined, undefined, query);
}
}
4 changes: 4 additions & 0 deletions src/api/v2/models/job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ export interface IJobListQeury {
withTotalCount?: boolean;
}

export interface IEventListQuery {
type?: 'Warning' | 'Normal';
}

export interface IAppExitSpec {
code: number;
phrase: string;
Expand Down
124 changes: 123 additions & 1 deletion src/api/v2/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ info:
Version 2.0.4: add default field in get storage list
Version 2.0.5: add more parameters to job list; add submissionTime
Version 2.1.0: add add/delete tag api; add tags field in get job detail and get job list; add tags filter in get job list
Version 2.1.1: add get event api
license:
name: MIT License
url: "https://github.com/microsoft/pai/blob/master/LICENSE"
version: 2.1.0
version: 2.1.1
externalDocs:
description: Find out more about OpenPAI
url: "https://github.com/microsoft/pai"
Expand Down Expand Up @@ -1397,6 +1398,49 @@ paths:
$ref: "#/components/responses/NoTagError/content/application~1json/examples/NoTagError"
"500":
$ref: "#/components/responses/UnknownError"
"/api/v2/jobs/{user}~{job}/events":
get:
tags:
- job
summary: Get events of a job.
description: Get events of a job. The events are sorted by "lastTimestamp,DESC".
operationId: getEvents
security:
- bearerAuth: []
parameters:
- name: type
in: query
description: filter events with type. Could be "Warning" or "Normal".
schema:
type: string
responses:
"200":
description: Succeeded
content:
application/json:
schema:
$ref: "#/components/schemas/JobEventsWithTotalCount"
example:
totalCount: 1
data:
- uid: "b8a87b12-29de-4702-818a-442d5a97623e"
frameworkName: "399af21acb5befd015d4c45f6c7c1eb9"
podUid: "94920b63-f4d6-4c63-b72b-b926b8bc1334"
taskroleName: "taskrole"
taskName: "399af21acb5befd015d4c45f6c7c1eb9-taskrole-11"
taskIndex: 11
type: "Warning"
reason: "FailedScheduling"
message: "0/17 nodes are available: 1 Insufficient memory, 1 node(s) didn't match node selector, 1 node(s) were unschedulable, 14 Insufficient cpu, 16 Insufficient nvidia.com/gpu."
firstTimestamp: "2020-09-29T09:46:42.000Z"
lastTimestamp: "2020-09-29T09:55:53.000Z"
count: 9
sourceComponent: "default-scheduler"
sourceHost: null
"404":
$ref: "#/components/responses/NoJobError"
"500":
$ref: "#/components/responses/UnknownError"
"/api/v2/jobs/{user}~{job}/job-attempts/healthz":
get:
tags:
Expand Down Expand Up @@ -1831,6 +1875,84 @@ components:
- createdTime
- completedTime
- appExitCode
JobEventsWithTotalCount:
type: object
properties:
totalCount:
type: number
description: total count of events with given filters
data:
$ref: "#/components/schemas/JobEvents"
required:
- totalCount
- data
JobEvents:
type: array
description: job event list
items:
type: object
properties:
uid:
type: string
description: event uid
frameworkName:
type: string
description: related framework name
podUid:
type: string
description: pod uid of the event
taskroleName:
type: string
description: taskrole name of the event
taskName:
type: string
description: task name of the event
taskIndex:
type: integer
description: task index of the event
type:
type: string
description: type of the event
enum:
- Normal
- Warning
reason:
type: string
description: event reason
message:
type: string
description: event message
firstTimestamp:
type: string
description: firstTimestamp of the event
lastTimestamp:
type: string
description: lastTimestamp of the event
count:
type: integer
description: event count between firstTimestamp and lastTimestamp
sourceComponent:
type: string
description: source component of the event
sourceHost:
type: string
nullable: true
description: source host of the event
required:
- uid
- frameworkName
- podUid
- taskroleName
- taskName
- taskIndex
- type
- reason
- message
- firstTimestamp
- lastTimestamp
- count
- sourceComponent
- sourceHost
JobDetail:
type: object
description: job details
Expand Down
14 changes: 14 additions & 0 deletions tests/common/apiTestCases.ts
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,16 @@ export const ApiDefaultTestCases: {[key: string]: IApiTestCase} = {
}],
after: [ updateTestJobExecutionType('STOP') ]
},
'get /api/v2/jobs/{user}~{job}/events': {
tests: [
{
// We cannot predict cluster events in most time.
// So skip the test.
description: 'Skip',
customizedTest: 'skipTest'
}
]
},
'get /api/v2/jobs/{user}~{job}/job-attempts': {
before: [ createTestJob() ],
tests: [{
Expand Down Expand Up @@ -998,6 +1008,10 @@ export const ApiDefaultTestCases: {[key: string]: IApiTestCase} = {
class CustomizedTestsClass {
private readonly ajvInstance: Ajv = new ajv({ nullable: true });

public async skipTest(): Promise<void> {
return;
}

public async getTokensWithUnauthorizedUser(
test: IApiTestItem, operationResults?: IOperationResults
): Promise<void> {
Expand Down
27 changes: 27 additions & 0 deletions tests/common/test_data/testEventList.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

/**
* Event list test data.
*/
export const testEventList: any = {
totalCount: 1,
data: [
{
uid: 'b8a87b12-29de-4702-818a-442d5a97623e',
frameworkName: '399af21acb5befd015d4c45f6c7c1eb9',
podUid: '94920b63-f4d6-4c63-b72b-b926b8bc1334',
taskroleName: 'taskrole',
taskName: '399af21acb5befd015d4c45f6c7c1eb9-taskrole-11',
taskIndex: 11,
type: 'Warning',
reason: 'FailedScheduling',
message: '0/17 nodes are available: 1 Insufficient memory, 1 node(s) didn\'t match node selector, 1 node(s) were unschedulable, 14 Insufficient cpu, 16 Insufficient nvidia.com/gpu.',
firstTimestamp: '2020-09-29T09:46:42.000Z',
lastTimestamp: '2020-09-29T09:55:53.000Z',
count: 9,
sourceComponent: 'default-scheduler',
sourceHost: null
}
]
};
15 changes: 15 additions & 0 deletions tests/unit_tests/jobClient.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import dirtyChai from 'dirty-chai';
import * as yaml from 'js-yaml';
import nock from 'nock';

import { testEventList } from '../common/test_data/testEventList';
import { testJobConfig, testJobConfigV1 } from '../common/test_data/testJobConfig';
import { testJobList } from '../common/test_data/testJobList';
import { testJobSshInfo } from '../common/test_data/testJobSshInfo';
Expand Down Expand Up @@ -186,3 +187,17 @@ describe('Delete a tag', () => {
expect(result).to.be.eql(response);
});
});

describe('List events', () => {
const response: any = testEventList;
const userName: string = 'core';
const jobName: string = 'tensorflow_serving_mnist_2019_6585ba19';
const queryString: string = 'type=Warning';
before(() => nock(`http://${testUri}`).get(`/api/v2/jobs/${userName}~${jobName}/events?${queryString}`).reply(200, response));

it('should return events', async () => {
const jobClient: JobClient = new JobClient(cluster);
const result: any = await jobClient.listEvents(userName, jobName, { type: 'Warning' });
expect(result).to.be.eql(response);
});
});

0 comments on commit 6903b18

Please sign in to comment.