Skip to content

Commit

Permalink
[data] Ray Data jobs detail table (#40756)
Browse files Browse the repository at this point in the history
Creates a table under the jobs page to display dataset-level metrics.

The `_StatsActor` now stores dataset metadata like `state`, `progress` and `start/end_time` for each executed dataset, that is directly queried by the new `data_head` dashboard api. This api also makes requests to the prometheus server to get other metrics that are displayed.

Signed-off-by: Andrew Xue <andewzxue@gmail.com>
  • Loading branch information
Zandew committed Nov 3, 2023
1 parent d3c1878 commit f08498e
Show file tree
Hide file tree
Showing 16 changed files with 566 additions and 7 deletions.
12 changes: 12 additions & 0 deletions .buildkite/data.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,18 @@ steps:
depends_on: datamongobuild
job_env: forge

- label: ":database: data: dashboard tests"
tags:
- python
- dashboard
instance_type: small
commands:
- bazel run //ci/ray_ci:test_in_docker -- python/ray/dashboard/... data
--build-name datanbuild
--parallelism-per-worker 3
depends_on: datanbuild
job_env: forge

- label: ":database: data: flaky tests"
tags:
- python
Expand Down
7 changes: 7 additions & 0 deletions dashboard/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,10 @@ py_test(
srcs = ["modules/serve/tests/test_serve_dashboard.py"],
tags = ["team:serve"],
)

py_test(
name = "test_data_head",
size = "small",
srcs = ["modules/data/tests/test_data_head.py"],
tags = ["team:data"],
)
174 changes: 174 additions & 0 deletions dashboard/client/src/components/DataOverviewTable.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
import {
Box,
Table,
TableBody,
TableCell,
TableHead,
TableRow,
TextField,
TextFieldProps,
Typography,
} from "@material-ui/core";
import Autocomplete from "@material-ui/lab/Autocomplete";
import Pagination from "@material-ui/lab/Pagination";
import React, { useState } from "react";
import { formatDateFromTimeMs } from "../common/formatUtils";
import rowStyles from "../common/RowStyles";
import { TaskProgressBar } from "../pages/job/TaskProgressBar";
import { DatasetMetrics } from "../type/data";
import { memoryConverter } from "../util/converter";
import { useFilter } from "../util/hook";
import StateCounter from "./StatesCounter";
import { StatusChip } from "./StatusChip";
import { HelpInfo } from "./Tooltip";

const columns = [
{ label: "Dataset" },
{
label: "Progress",
helpInfo: <Typography>Blocks outputted by output operator.</Typography>,
},
{ label: "State" },
{ label: "Bytes Outputted" },
{
label: "Memory Usage (Current / Max)",
helpInfo: (
<Typography>
Amount of object store memory used by a dataset. Includes spilled
objects.
</Typography>
),
},
{
label: "Bytes Spilled",
helpInfo: (
<Typography>
Set
"ray.data.context.DataContext.get_current().enable_get_object_locations_for_metrics
= True" to collect spill stats.
</Typography>
),
},
{ label: "Start Time" },
{ label: "End Time" },
];

const DataOverviewTable = ({
datasets = [],
}: {
datasets: DatasetMetrics[];
}) => {
const [pageNo, setPageNo] = useState(1);
const { changeFilter, filterFunc } = useFilter();
const pageSize = 10;
const datasetList = datasets.filter(filterFunc);

const list = datasetList.slice((pageNo - 1) * pageSize, pageNo * pageSize);

const classes = rowStyles();

return (
<div>
<div style={{ flex: 1, display: "flex", alignItems: "center" }}>
<Autocomplete
style={{ margin: 8, width: 120 }}
options={Array.from(new Set(datasets.map((e) => e.dataset)))}
onInputChange={(_: any, value: string) => {
changeFilter("dataset", value.trim());
}}
renderInput={(params: TextFieldProps) => (
<TextField {...params} label="Dataset" />
)}
/>
</div>
<div style={{ display: "flex", alignItems: "center" }}>
<div>
<Pagination
page={pageNo}
onChange={(e, num) => setPageNo(num)}
count={Math.ceil(datasetList.length / pageSize)}
/>
</div>
<div>
<StateCounter type="task" list={datasetList} />
</div>
</div>
<div className={classes.tableContainer}>
<Table>
<TableHead>
<TableRow>
{columns.map(({ label, helpInfo }) => (
<TableCell align="center" key={label}>
<Box
display="flex"
justifyContent="center"
alignItems="center"
>
{label}
{helpInfo && (
<HelpInfo className={classes.helpInfo}>
{helpInfo}
</HelpInfo>
)}
</Box>
</TableCell>
))}
</TableRow>
</TableHead>
<TableBody>
{list.map(
({
dataset,
state,
ray_data_current_bytes,
ray_data_output_bytes,
ray_data_spilled_bytes,
progress,
total,
start_time,
end_time,
}) => (
<TableRow key={dataset}>
<TableCell align="center">{dataset}</TableCell>
<TableCell align="center">
<TaskProgressBar
numFinished={progress}
numRunning={
state === "RUNNING" ? total - progress : undefined
}
numCancelled={
state === "FAILED" ? total - progress : undefined
}
total={total}
/>
</TableCell>
<TableCell align="center">
<StatusChip type="task" status={state} />
</TableCell>
<TableCell align="center">
{memoryConverter(Number(ray_data_output_bytes.max))}
</TableCell>
<TableCell align="center">
{memoryConverter(Number(ray_data_current_bytes.value))}/
{memoryConverter(Number(ray_data_current_bytes.max))}
</TableCell>
<TableCell align="center">
{memoryConverter(Number(ray_data_spilled_bytes.max))}
</TableCell>
<TableCell align="center">
{formatDateFromTimeMs(start_time * 1000)}
</TableCell>
<TableCell align="center">
{end_time && formatDateFromTimeMs(end_time * 1000)}
</TableCell>
</TableRow>
),
)}
</TableBody>
</Table>
</div>
</div>
);
};

export default DataOverviewTable;
69 changes: 69 additions & 0 deletions dashboard/client/src/pages/data/DataOverview.component.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { render, screen } from "@testing-library/react";
import React from "react";
import { TEST_APP_WRAPPER } from "../../util/test-utils";
import DataOverview from "./DataOverview";

describe("DataOverview", () => {
it("renders table with dataset metrics", async () => {
const datasets = [
{
dataset: "test_ds1",
state: "RUNNING",
progress: 50,
total: 100,
start_time: 0,
end_time: undefined,
ray_data_output_bytes: {
max: 10,
},
ray_data_spilled_bytes: {
max: 20,
},
ray_data_current_bytes: {
value: 30,
max: 40,
},
},
{
dataset: "test_ds2",
state: "FINISHED",
progress: 200,
total: 200,
start_time: 1,
end_time: 2,
ray_data_output_bytes: {
max: 50,
},
ray_data_spilled_bytes: {
max: 60,
},
ray_data_current_bytes: {
value: 70,
max: 80,
},
},
];

render(<DataOverview datasets={datasets} />, { wrapper: TEST_APP_WRAPPER });

// First Dataset
expect(screen.getByText("test_ds1")).toBeVisible();
expect(screen.getByText("Total: 100")).toBeVisible();
expect(screen.getByText("Finished: 50")).toBeVisible();
expect(screen.getByText("Running: 50")).toBeVisible();
expect(screen.getByText("1969/12/31 16:00:00")).toBeVisible();
expect(screen.getByText("10.0000B")).toBeVisible();
expect(screen.getByText("20.0000B")).toBeVisible();
expect(screen.getByText("30.0000B/40.0000B")).toBeVisible();

// Second Dataset
expect(screen.getByText("test_ds2")).toBeVisible();
expect(screen.getByText("Total: 200")).toBeVisible();
expect(screen.getByText("Finished: 200")).toBeVisible();
expect(screen.getByText("1969/12/31 16:00:01")).toBeVisible();
expect(screen.getByText("1969/12/31 16:00:02")).toBeVisible();
expect(screen.getByText("50.0000B")).toBeVisible();
expect(screen.getByText("60.0000B")).toBeVisible();
expect(screen.getByText("70.0000B/80.0000B")).toBeVisible();
});
});
13 changes: 13 additions & 0 deletions dashboard/client/src/pages/data/DataOverview.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import React from "react";
import DataOverviewTable from "../../components/DataOverviewTable";
import { DatasetMetrics } from "../../type/data";

const DataOverview = ({ datasets = [] }: { datasets: DatasetMetrics[] }) => {
return (
<div>
<DataOverviewTable datasets={datasets} />
</div>
);
};

export default DataOverview;
28 changes: 27 additions & 1 deletion dashboard/client/src/pages/job/JobDetail.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { Box, makeStyles } from "@material-ui/core";
import React, { useRef, useState } from "react";
import useSWR from "swr";
import { CollapsibleSection } from "../../common/CollapsibleSection";
import { Section } from "../../common/Section";
import {
Expand All @@ -9,8 +10,10 @@ import {
import Loading from "../../components/Loading";
import { StatusChip } from "../../components/StatusChip";
import TitleCard from "../../components/TitleCard";
import { getDataDatasets } from "../../service/data";
import { NestedJobProgressLink } from "../../type/job";
import ActorList from "../actor/ActorList";
import DataOverview from "../data/DataOverview";
import { NodeCountCard } from "../overview/cards/NodeCountCard";
import PlacementGroupList from "../state/PlacementGroup";
import TaskList from "../state/task";
Expand Down Expand Up @@ -53,6 +56,18 @@ export const JobDetailChartsPage = () => {
const actorTableRef = useRef<HTMLDivElement>(null);
const { cluster_status } = useRayStatus();

const { data } = useSWR(
"useDataDatasets",
async () => {
const rsp = await getDataDatasets();

if (rsp) {
return rsp.data;
}
},
{ refreshInterval: 5000 },
);

if (!job) {
return (
<div className={classes.root}>
Expand Down Expand Up @@ -104,8 +119,19 @@ export const JobDetailChartsPage = () => {
<div className={classes.root}>
<JobMetadataSection job={job} />

{data?.datasets && data.datasets.length > 0 && (
<CollapsibleSection
title="Ray Data Overview"
className={classes.section}
>
<Section>
<DataOverview datasets={data.datasets} />
</Section>
</CollapsibleSection>
)}

<CollapsibleSection
title="Tasks/actor overview (beta)"
title="Ray Core Overview"
startExpanded
className={classes.section}
>
Expand Down
6 changes: 6 additions & 0 deletions dashboard/client/src/pages/job/TaskProgressBar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export const TaskProgressBar = ({
numPendingNodeAssignment = 0,
numSubmittedToWorker = 0,
numFailed = 0,
numCancelled = 0,
numUnknown = 0,
showAsComplete = false,
showTooltip = true,
Expand Down Expand Up @@ -55,6 +56,11 @@ export const TaskProgressBar = ({
value: numPendingArgsAvail,
color: "#f79e02",
},
{
label: "Cancelled",
value: numCancelled,
color: theme.palette.grey.A100,
},
{
label: "Unknown",
value: numUnknown,
Expand Down
6 changes: 6 additions & 0 deletions dashboard/client/src/service/data.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import { DatasetResponse } from "../type/data";
import { get } from "./requestHandlers";

export const getDataDatasets = () => {
return get<DatasetResponse>("api/data/datasets");
};
22 changes: 22 additions & 0 deletions dashboard/client/src/type/data.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
export type DatasetResponse = {
datasets: DatasetMetrics[];
};

export type DatasetMetrics = {
dataset: string;
state: string;
ray_data_current_bytes: {
value: number;
max: number;
};
ray_data_output_bytes: {
max: number;
};
ray_data_spilled_bytes: {
max: number;
};
progress: number;
total: number;
start_time: number;
end_time: number | undefined;
};
Loading

0 comments on commit f08498e

Please sign in to comment.