diff --git a/src/packages/frontend/project/trial-banner.tsx b/src/packages/frontend/project/trial-banner.tsx index 10528b3ec9..d9f30f4b56 100644 --- a/src/packages/frontend/project/trial-banner.tsx +++ b/src/packages/frontend/project/trial-banner.tsx @@ -380,9 +380,10 @@ interface CountdownProjectProps { } function CountdownProject({ fontSize }: CountdownProjectProps) { - const { status, project, project_id } = useProjectContext(); + const { status, project, project_id, actions } = useProjectContext(); const limit_min = useTypedRedux("customize", "limit_free_project_uptime"); const [showInfo, setShowInfo] = useState(false); + const openFiles = useTypedRedux({ project_id }, "open_files_order"); const triggered = useRef(false); const update = useForceUpdate(); @@ -410,6 +411,9 @@ function CountdownProject({ fontSize }: CountdownProjectProps) { if (countdown < 0 && !triggered.current) { triggered.current = true; + + // This closes all tabs and then stops the project. + openFiles.map((path) => actions?.close_tab(path)); redux.getActions("projects").stop_project(project_id); } diff --git a/src/packages/server/llm/abuse.ts b/src/packages/server/llm/abuse.ts index aa5a91847c..e75192547b 100644 --- a/src/packages/server/llm/abuse.ts +++ b/src/packages/server/llm/abuse.ts @@ -1,28 +1,17 @@ /* -We initially just implement some very simple rate limitations to prevent very -blatant abuse. - -- at most $10^5$ tokens per signed in user per hour \(that's \$0.20\); that allows for major usage... - but if somebody tried to do something really abusive, it would stop it. Nobody - would hit this in practice unless they are really trying to abuse cocalc... - WRONG: it's very easy to hit this due to large inputs, e.g., analyzing a paper. -- at most $10^6$ tokens per hour across all users \-\- that's \$2/hour. That would - come out to a bit more if sustained than my budget, but allows for bursts. - -See https://help.openai.com/en/articles/7039783-chatgpt-api-faq for the upstream rate limits, -where they limit per minute, not per hour (like below): - - What's the rate limits for the ChatGPT API? - - Free trial users: 20 RPM 40000 TPM - Pay-as-you-go users (first 48 hours): 60 RPM 60000 TPM - Pay-as-you-go users (after 48 hours): 3500 RPM 90000 TPM - - RPM = requests per minute - TPM = tokens per minute +This is a basic rate limitation for free and metered usage of LLMs. +- any call must be identified by an account (we had just by a cookie ID, but it got abused, hence noAccount=0) +- There is a distinction between "cocalc.com" and "on-prem": + - cocalc.com has some models (the more expensive ones) which are metered per token and some which are free + - on-prem: there is only rate limiting, no metered usage +- quotas are adjustable +- at it's core, this should limit individual users from too much free usage, and overall cap the usage +- monitoring as necessary, to give feedback for tweaking the parameters */ -import { newCounter, newHistogram } from "@cocalc/backend/metrics"; +import { isObject } from "lodash"; + +import { newCounter, newGauge, newHistogram } from "@cocalc/backend/metrics"; import { process_env_int } from "@cocalc/backend/misc"; import getPool, { CacheTime } from "@cocalc/database/pool"; import { getServerSettings } from "@cocalc/database/settings"; @@ -41,7 +30,7 @@ import { } from "@cocalc/util/db-schema/llm-utils"; import { KUCALC_COCALC_COM } from "@cocalc/util/db-schema/site-defaults"; import { isValidUUID } from "@cocalc/util/misc"; -import { isObject } from "lodash"; +import isValidAccount from "../accounts/is-valid-account"; // These are tokens over a given period of time – summed by account/analytics_cookie or global. const QUOTAS = { @@ -50,18 +39,18 @@ const QUOTAS = { global: process_env_int("COCALC_LLM_QUOTA_GLOBAL", 10 ** 6), } as const; -const prom_quotas = newHistogram( +const prom_quota_global = newGauge( + "llm", + "abuse_usage_global_pct", + "Language model abuse limit, global, 0 to 100 percent of limit, rounded", + ["quota"], +); + +const prom_quota_per_account = newHistogram( "llm", - "abuse_usage", - "Language model abuse usage", - { - buckets: - // 10 buckets evenly spaced from 0 to QUOTAS.global - Array.from({ length: 10 }, (_, i) => - Math.floor((i * QUOTAS.global) / 10), - ), - labels: ["usage"], - }, + "abuse_usage_account_pct", + "Language model usage per account, to see if users reach certain thresholds for their account usage.", + { buckets: [25, 50, 75, 100, 110] }, ); const prom_rejected = newCounter( @@ -104,7 +93,6 @@ export async function checkForAbuse({ (await getServerSettings()).kucalc === KUCALC_COCALC_COM; if (!isFreeModel(model, is_cocalc_com)) { - // we exclude Ollama (string), because it is free. const service = model2service(model) as LanguageServiceCore; // This is a for-pay product, so let's make sure user can purchase it. await assertPurchaseAllowed({ account_id, service }); @@ -122,7 +110,9 @@ export async function checkForAbuse({ analytics_cookie, }); - prom_quotas.labels("recent").observe(usage); + // this fluctuates for each account, we'll tally up how often users end up in certain usage buckets + // that's more explicit than a histogram + prom_quota_per_account.observe(100 * (usage / QUOTAS.account)); // console.log("usage = ", usage); if (account_id) { @@ -146,8 +136,9 @@ export async function checkForAbuse({ // Prevent more sophisticated abuse, e.g., changing analytics_cookie or account frequently, // or just a general huge surge in usage. const overallUsage = await recentUsage({ cache: "long", period: "1 hour" }); - prom_quotas.labels("global").observe(overallUsage); - // console.log("overallUsage = ", usage); + prom_quota_global + .labels("global") + .set(Math.round(100 * (overallUsage / QUOTAS.global))); if (overallUsage > QUOTAS.global) { prom_rejected.labels("global").inc(); throw new Error( @@ -175,11 +166,7 @@ async function recentUsage({ const pool = getPool(cache); let query, args; if (account_id) { - const { rows } = await pool.query( - "SELECT COUNT(*) FROM accounts WHERE account_id=$1", - [account_id], - ); - if (rows.length == 0) { + if (!(await isValidAccount(account_id))) { throw Error(`invalid account_id ${account_id}`); } query = `SELECT SUM(total_tokens) AS usage FROM openai_chatgpt_log WHERE account_id=$1 AND time >= NOW() - INTERVAL '${period}'`;