Skip to content

Commit 72e2372

Browse files
feat(pricing): add Z.AI GLM model pricing to hardcoded fallback (#1225)
* feat(pricing): add Z.AI GLM model pricing to hardcoded fallback Add per-token pricing for glm-4.5, glm-4.6, glm-4.7, glm-5, glm-5-turbo, and glm-5.1 sourced from https://docs.z.ai/guides/overview/pricing so offline and fallback cost calculations work for Z.AI's GLM models. * chore: add .npmrc to .gitignore --------- Co-authored-by: pullfrog[bot] <226033991+pullfrog[bot]@users.noreply.github.com>
1 parent c868875 commit 72e2372

2 files changed

Lines changed: 73 additions & 0 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
2525
.env.test.local
2626
.env.production.local
2727
.env.local
28+
.npmrc
2829

2930
# caches
3031
.eslintcache

rust/crates/ccusage/src/pricing.rs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,52 @@ impl PricingMap {
718718
fast_multiplier: 1.0,
719719
},
720720
);
721+
// Source: https://docs.z.ai/guides/overview/pricing
722+
let glm_base = Pricing {
723+
input: 0.6e-6,
724+
output: 2.2e-6,
725+
cache_create: 0.75e-6,
726+
cache_read: 0.11e-6,
727+
cache_read_explicit: true,
728+
input_above_200k: None,
729+
output_above_200k: None,
730+
cache_create_above_200k: None,
731+
cache_read_above_200k: None,
732+
fast_multiplier: 1.0,
733+
};
734+
self.entries.insert("glm-4.5".to_string(), glm_base);
735+
self.entries.insert("glm-4.6".to_string(), glm_base);
736+
self.entries.insert("glm-4.7".to_string(), glm_base);
737+
self.entries.insert(
738+
"glm-5".to_string(),
739+
Pricing {
740+
input: 1.0e-6,
741+
output: 3.2e-6,
742+
cache_create: 1.25e-6,
743+
cache_read: 0.2e-6,
744+
..glm_base
745+
},
746+
);
747+
self.entries.insert(
748+
"glm-5-turbo".to_string(),
749+
Pricing {
750+
input: 1.2e-6,
751+
output: 4.0e-6,
752+
cache_create: 1.5e-6,
753+
cache_read: 0.24e-6,
754+
..glm_base
755+
},
756+
);
757+
self.entries.insert(
758+
"glm-5.1".to_string(),
759+
Pricing {
760+
input: 1.4e-6,
761+
output: 4.4e-6,
762+
cache_create: 1.75e-6,
763+
cache_read: 0.26e-6,
764+
..glm_base
765+
},
766+
);
721767
self.context_limits.insert("gpt-5.5".to_string(), 1_050_000);
722768
self.context_limits
723769
.insert("grok-4.3".to_string(), 1_000_000);
@@ -941,6 +987,32 @@ mod tests {
941987
assert_eq!(pricing.context_limit("moonshot/kimi-k2.6"), Some(262_144));
942988
}
943989

990+
#[test]
991+
fn embedded_pricing_includes_z_ai_glm_models_for_offline_reports() {
992+
let pricing = PricingMap::load_embedded();
993+
994+
let glm_51 = pricing.find("glm-5.1").unwrap();
995+
assert_eq!(glm_51.input, 1.4e-6);
996+
assert_eq!(glm_51.output, 4.4e-6);
997+
assert_eq!(glm_51.cache_read, 0.26e-6);
998+
assert!(glm_51.cache_read_explicit);
999+
1000+
let glm_5 = pricing.find("glm-5").unwrap();
1001+
assert_eq!(glm_5.input, 1.0e-6);
1002+
assert_eq!(glm_5.output, 3.2e-6);
1003+
assert_eq!(glm_5.cache_read, 0.2e-6);
1004+
1005+
let glm_5_turbo = pricing.find("glm-5-turbo").unwrap();
1006+
assert_eq!(glm_5_turbo.input, 1.2e-6);
1007+
assert_eq!(glm_5_turbo.output, 4.0e-6);
1008+
assert_eq!(glm_5_turbo.cache_read, 0.24e-6);
1009+
1010+
let glm_47 = pricing.find("glm-4.7").unwrap();
1011+
assert_eq!(glm_47.input, 0.6e-6);
1012+
assert_eq!(glm_47.output, 2.2e-6);
1013+
assert_eq!(glm_47.cache_read, 0.11e-6);
1014+
}
1015+
9441016
#[test]
9451017
fn records_whether_cache_read_rate_came_from_litellm_pricing() {
9461018
let mut pricing = PricingMap::default();

0 commit comments

Comments
 (0)