Skip to content

Commit

Permalink
refactor: add time to first token to observations_view (#2092)
Browse files Browse the repository at this point in the history
  • Loading branch information
marcklingen committed May 21, 2024
1 parent 6435ce1 commit fe75e4c
Show file tree
Hide file tree
Showing 10 changed files with 84 additions and 23 deletions.
1 change: 1 addition & 0 deletions packages/shared/prisma/generated/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ export type ObservationView = {
calculated_output_cost: string | null;
calculated_total_cost: string | null;
latency: number | null;
time_to_first_token: number | null;
};
export type PosthogIntegration = {
project_id: string;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
CREATE OR REPLACE VIEW "observations_view" AS
SELECT
o.*,
m.id AS "model_id",
m.start_date AS "model_start_date",
m.input_price,
m.output_price,
m.total_price,
m.tokenizer_config AS "tokenizer_config",
CASE
WHEN o.input_cost IS NULL AND o.output_cost IS NULL AND o.total_cost IS NULL THEN
o.prompt_tokens::decimal * m.input_price
ELSE
o.input_cost
END AS "calculated_input_cost",
CASE
WHEN o.input_cost IS NULL AND o.output_cost IS NULL AND o.total_cost IS NULL THEN
o.completion_tokens::decimal * m.output_price
ELSE
o.output_cost
END AS "calculated_output_cost",
CASE
WHEN o.input_cost IS NULL AND o.output_cost IS NULL AND o.total_cost IS NULL THEN
CASE
WHEN m.total_price IS NOT NULL AND o.total_tokens IS NOT NULL THEN
m.total_price * o.total_tokens
ELSE
o.prompt_tokens::decimal * m.input_price +
o.completion_tokens::decimal * m.output_price
END
ELSE
o.total_cost
END AS "calculated_total_cost",
CASE WHEN o.end_time IS NULL THEN NULL ELSE (EXTRACT(EPOCH FROM o."end_time") - EXTRACT(EPOCH FROM o."start_time"))::double precision END AS "latency",
CASE WHEN o.completion_start_time IS NOT NULL AND o.start_time IS NOT NULL THEN EXTRACT(EPOCH FROM (completion_start_time - start_time))::double precision ELSE NULL END as "time_to_first_token"

FROM
observations o
LEFT JOIN LATERAL (
SELECT
models.*
FROM
models
WHERE (models.project_id = o.project_id OR models.project_id IS NULL)
AND models.model_name = o.internal_model
AND (models.start_date < o.start_time OR models.start_date IS NULL)
AND o.unit::TEXT = models.unit
ORDER BY
models.project_id ASC, -- in postgres, NULLs are sorted last when ordering ASC
models.start_date DESC NULLS LAST -- now, NULLs are sorted last when ordering DESC as well
LIMIT 1
) m ON TRUE


-- requirements:
-- 1. The view should return all columns from the observations table
-- 2. The view should match with only one model for each observation if:
-- a. The model has the same project_id as the observation, otherwise the model without project_id.
-- b. The model has the same model_name as the observation
-- c. The model has a start_date that is less than the observation start_time, otherwise the model without start_date
-- d. The model has the same unit as the observation
1 change: 1 addition & 0 deletions packages/shared/prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ view ObservationView {
calculatedOutputCost Decimal? @map("calculated_output_cost")
calculatedTotalCost Decimal? @map("calculated_total_cost")
latency Float? @map("latency")
timeToFirstToken Float? @map("time_to_first_token")
@@map("observations_view")
}
Expand Down
13 changes: 11 additions & 2 deletions packages/shared/prisma/seed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -528,9 +528,9 @@ function createObjects(
const spanTsStart = new Date(
traceTs.getTime() + Math.floor(Math.random() * 30)
);
// random duration of upto 30ms
// random duration of upto 5000ms
const spanTsEnd = new Date(
spanTsStart.getTime() + Math.floor(Math.random() * 30)
spanTsStart.getTime() + Math.floor(Math.random() * 5000)
);

const span = {
Expand Down Expand Up @@ -574,6 +574,13 @@ function createObjects(
(spanTsEnd.getTime() - generationTsStart.getTime())
)
);
// somewhere in the middle
const generationTsCompletionStart = new Date(
generationTsStart.getTime() +
Math.floor(
(generationTsEnd.getTime() - generationTsStart.getTime()) / 3
)
);

const promptTokens = Math.floor(Math.random() * 1000) + 300;
const completionTokens = Math.floor(Math.random() * 500) + 100;
Expand Down Expand Up @@ -602,6 +609,8 @@ function createObjects(
id: `generation-${v4()}`,
startTime: generationTsStart,
endTime: generationTsEnd,
completionStartTime:
Math.random() > 0.5 ? generationTsCompletionStart : undefined,
name: `generation-${i}-${j}-${k}`,
projectId: trace.projectId,
promptId: promptId,
Expand Down
16 changes: 4 additions & 12 deletions web/src/components/table/use-cases/generations.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ export type GenerationsTableRow = {
endTime?: string;
completionStartTime?: Date;
latency?: number;
timeToFirstToken?: number;
name?: string;
model?: string;
// i/o not set explicitly, but fetched from the server from the cell
Expand Down Expand Up @@ -310,21 +311,12 @@ export default function GenerationsTable({
enableHiding: true,
enableSorting: true,
cell: ({ row }) => {
const startTime: Date = row.getValue("startTime");
const completionStartTime: Date | undefined =
const timeToFirstToken: number | undefined =
row.getValue("timeToFirstToken");

if (!completionStartTime) {
return undefined;
}

const latencyInSeconds =
intervalInSeconds(startTime, completionStartTime) || "-";
return (
<span>
{typeof latencyInSeconds === "number"
? formatIntervalSeconds(latencyInSeconds)
: latencyInSeconds}
{timeToFirstToken ? formatIntervalSeconds(timeToFirstToken) : "-"}
</span>
);
},
Expand Down Expand Up @@ -623,7 +615,7 @@ export default function GenerationsTable({
traceName: generation.traceName ?? "",
startTime: generation.startTime,
endTime: generation.endTime?.toLocaleString() ?? undefined,
timeToFirstToken: generation.completionStartTime ?? undefined,
timeToFirstToken: generation.timeToFirstToken ?? undefined,
latency: generation.latency ?? undefined,
totalCost: generation.calculatedTotalCost ?? undefined,
inputCost: generation.calculatedInputCost ?? undefined,
Expand Down
8 changes: 2 additions & 6 deletions web/src/components/trace/ObservationPreview.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,10 @@ export const ObservationPreview = (props: {
projectId={preloadedObservation.projectId}
/>
) : undefined}
{preloadedObservation.completionStartTime ? (
{preloadedObservation.timeToFirstToken ? (
<Badge variant="outline">
Time to first token:{" "}
{formatIntervalSeconds(
(preloadedObservation.completionStartTime.getTime() -
preloadedObservation.startTime.getTime()) /
1000,
)}
{formatIntervalSeconds(preloadedObservation.timeToFirstToken)}
</Badge>
) : null}
{preloadedObservation.endTime ? (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ export async function getAllGenerations({
o.trace_id as "traceId",
t.name as "traceName",
o.completion_start_time as "completionStartTime",
o.time_to_first_token as "timeToFirstToken",
o.prompt_tokens as "promptTokens",
o.completion_tokens as "completionTokens",
o.total_tokens as "totalTokens",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ export function transformStreamToCsv(): Transform {
row.endTime?.toISOString() ?? "",
row.completionStartTime?.toISOString() ?? "",
// time to first token
row.completionStartTime
? intervalInSeconds(row.startTime, row.completionStartTime).toFixed(2)
: "",
row.timeToFirstToken?.toFixed(2) ?? "",
row.scores ? JSON.stringify(row.scores) : "",
row.latency ? formatIntervalSeconds(row.latency).slice(0, -1) : "",
// latency per token
Expand Down
1 change: 1 addition & 0 deletions web/src/server/api/routers/traces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ export const traceRouter = createTRPCRouter({
totalTokens: true,
unit: true,
completionStartTime: true,
timeToFirstToken: true,
promptId: true,
modelId: true,
inputPrice: true,
Expand Down
1 change: 1 addition & 0 deletions worker/generated/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ export type ObservationView = {
calculated_output_cost: string | null;
calculated_total_cost: string | null;
latency: number | null;
time_to_first_token: number | null;
};
export type PosthogIntegration = {
project_id: string;
Expand Down

0 comments on commit fe75e4c

Please sign in to comment.