Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Serve] [Dashboard] Add serve controller metrics to serve system dashboard page #43797

10 changes: 8 additions & 2 deletions dashboard/client/src/pages/serve/ServeDeploymentsListPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ import { HelpInfo } from "../../components/Tooltip";
import { useServeDeployments } from "./hook/useServeApplications";
import { ServeApplicationRows } from "./ServeApplicationRow";
import { ServeEntityLogViewer } from "./ServeEntityLogViewer";
import { ServeMetricsSection } from "./ServeMetricsSection";
import {
APPS_METRICS_CONFIG,
ServeMetricsSection,
} from "./ServeMetricsSection";
import { ServeSystemPreview } from "./ServeSystemDetails";

const useStyles = makeStyles((theme) =>
Expand Down Expand Up @@ -172,7 +175,10 @@ export const ServeDeploymentsListPage = () => {
</CollapsibleSection>
</React.Fragment>
)}
<ServeMetricsSection className={classes.section} />
<ServeMetricsSection
className={classes.section}
metricsConfig={APPS_METRICS_CONFIG}
/>
</div>
);
};
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import { render, screen, waitFor } from "@testing-library/react";
import React, { PropsWithChildren } from "react";
import { GlobalContext } from "../../App";
import { ServeMetricsSection } from "./ServeMetricsSection";
import {
APPS_METRICS_CONFIG,
ServeMetricsSection,
SYSTEM_METRICS_CONFIG,
} from "./ServeMetricsSection";

const Wrapper = ({ children }: PropsWithChildren<{}>) => {
return (
Expand Down Expand Up @@ -54,10 +58,12 @@ const MetricsDisabledWrapper = ({ children }: PropsWithChildren<{}>) => {
};

describe("ServeMetricsSection", () => {
it("renders", async () => {
it("renders app metrics", async () => {
expect.assertions(4);

render(<ServeMetricsSection />, { wrapper: Wrapper });
render(<ServeMetricsSection metricsConfig={APPS_METRICS_CONFIG} />, {
wrapper: Wrapper,
});
await screen.findByText(/View in Grafana/);
expect(screen.getByText(/5 minutes/)).toBeVisible();
expect(screen.getByTitle("QPS per application")).toBeInTheDocument();
Expand All @@ -67,10 +73,30 @@ describe("ServeMetricsSection", () => {
).toBeInTheDocument();
});

it("renders system metrics", async () => {
expect.assertions(5);

render(<ServeMetricsSection metricsConfig={SYSTEM_METRICS_CONFIG} />, {
wrapper: Wrapper,
});
await screen.findByText(/View in Grafana/);
expect(screen.getByTitle("Ongoing HTTP Requests")).toBeInTheDocument();
expect(screen.getByTitle("Controller Starts")).toBeInTheDocument();
expect(screen.getByTitle("Scheduling Tasks")).toBeInTheDocument();
expect(
screen.getByTitle("Scheduling Tasks in Backoff"),
).toBeInTheDocument();
expect(
screen.getByTitle("Controller Control Loop Duration"),
).toBeInTheDocument();
});

it("renders nothing when grafana is not available", async () => {
expect.assertions(5);

render(<ServeMetricsSection />, { wrapper: MetricsDisabledWrapper });
render(<ServeMetricsSection metricsConfig={APPS_METRICS_CONFIG} />, {
wrapper: MetricsDisabledWrapper,
});
// Wait .1 seconds for render to finish
await waitFor(() => new Promise((r) => setTimeout(r, 100)));

Expand Down
34 changes: 31 additions & 3 deletions dashboard/client/src/pages/serve/ServeMetricsSection.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ const useStyles = makeStyles((theme) =>
);

// NOTE: please keep the titles here in sync with dashboard/modules/metrics/dashboards/serve_dashboard_panels.py
const METRICS_CONFIG: MetricConfig[] = [
export const APPS_METRICS_CONFIG: MetricConfig[] = [
{
title: "QPS per application",
pathParams: "orgId=1&theme=light&panelId=7",
Expand All @@ -75,11 +75,39 @@ const METRICS_CONFIG: MetricConfig[] = [
},
];

type ServeMetricsSectionProps = ClassNameProps;
// NOTE: please keep the titles here in sync with dashboard/modules/metrics/dashboards/serve_dashboard_panels.py
export const SYSTEM_METRICS_CONFIG: MetricConfig[] = [
GeneDer marked this conversation as resolved.
Show resolved Hide resolved
{
title: "Ongoing HTTP Requests",
pathParams: "orgId=1&theme=light&panelId=20",
},
{
title: "Controller Starts",
pathParams: "orgId=1&theme=light&panelId=21",
},
{
title: "Scheduling Tasks",
pathParams: "orgId=1&theme=light&panelId=22",
},
{
title: "Scheduling Tasks in Backoff",
pathParams: "orgId=1&theme=light&panelId=23",
},
{
title: "Controller Control Loop Duration",
pathParams: "orgId=1&theme=light&panelId=24",
},
];

type ServeMetricsSectionProps = ClassNameProps & {
metricsConfig: MetricConfig[];
};

export const ServeMetricsSection = ({
className,
metricsConfig,
}: ServeMetricsSectionProps) => {
console.log(metricsConfig);
architkulkarni marked this conversation as resolved.
Show resolved Hide resolved
const classes = useStyles();
const { grafanaHost, prometheusHealth, dashboardUids, dashboardDatasource } =
useContext(GlobalContext);
Expand Down Expand Up @@ -131,7 +159,7 @@ export const ServeMetricsSection = ({
</TextField>
</Paper>
<div className={classes.grafanaEmbedsContainer}>
{METRICS_CONFIG.map(({ title, pathParams }) => {
{metricsConfig.map(({ title, pathParams }) => {
const path =
`/d-solo/${grafanaServeDashboardUid}?${pathParams}` +
`&refresh${timeRangeParams}&var-datasource=${dashboardDatasource}`;
Expand Down
12 changes: 11 additions & 1 deletion dashboard/client/src/pages/serve/ServeSystemDetailPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ import { Outlet } from "react-router-dom";
import Loading from "../../components/Loading";
import { MainNavPageInfo } from "../layout/mainNavContext";
import { useServeDeployments } from "./hook/useServeApplications";
import {
ServeMetricsSection,
SYSTEM_METRICS_CONFIG,
} from "./ServeMetricsSection";
import { ServeSystemDetails } from "./ServeSystemDetails";

const useStyles = makeStyles((theme) =>
createStyles({
root: {
Expand All @@ -15,6 +18,9 @@ const useStyles = makeStyles((theme) =>
serveInstanceWarning: {
marginBottom: theme.spacing(2),
},
section: {
marginTop: theme.spacing(4),
},
}),
);

Expand Down Expand Up @@ -53,6 +59,10 @@ export const ServeSystemDetailPage = () => {
setPage={setProxiesPage}
/>
)}
<ServeMetricsSection
className={classes.section}
metricsConfig={SYSTEM_METRICS_CONFIG}
/>
</div>
);
};
Expand Down
65 changes: 65 additions & 0 deletions dashboard/modules/metrics/dashboards/serve_dashboard_panels.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,71 @@
stack=False,
grid_pos=GridPos(16, 5, 8, 8),
),
Panel(
id=20,
title="Ongoing HTTP Requests",
GeneDer marked this conversation as resolved.
Show resolved Hide resolved
description="The number of ongoing HTTP requests in Ray Serve.",
unit="requests",
targets=[
Target(
expr="ray_serve_num_ongoing_http_requests{{{global_filters}}}",
legend="Ongoing HTTP Requests",
),
],
grid_pos=GridPos(0, 6, 8, 8),
),
Panel(
id=21,
title="Controller Starts",
description="The number of times the Ray Serve controller has started.",
unit="starts",
targets=[
Target(
expr="ray_serve_controller_num_starts{{{global_filters}}}",
legend="Controller Starts",
),
],
grid_pos=GridPos(8, 6, 8, 8),
),
Panel(
id=22,
title="Scheduling Tasks",
description="The number of tasks currently being scheduled in Ray Serve.",
unit="tasks",
targets=[
Target(
expr="ray_serve_num_scheduling_tasks{{{global_filters}}}",
legend="Scheduling Tasks",
),
],
grid_pos=GridPos(16, 6, 8, 8),
),
Panel(
id=23,
title="Scheduling Tasks in Backoff",
description="The number of scheduling tasks in backoff state in Ray Serve.",
unit="tasks",
targets=[
Target(
expr="ray_serve_num_scheduling_tasks_in_backoff{{{global_filters}}}",
legend="Scheduling Tasks in Backoff",
),
],
grid_pos=GridPos(0, 7, 8, 8),
),
Panel(
id=24,
title="Controller Control Loop Duration",
description="The duration of the control loop within the Ray Serve controller.",
unit="seconds",
targets=[
Target(
expr="ray_serve_controller_control_loop_duration_s{{{global_filters}}}",
legend="Control Loop Duration",
),
],
grid_pos=GridPos(8, 7, 8, 8),
),
]

ids = []
Expand Down
Loading