From 40ab5b18990a2e91ffd1090188494bd69ec2ddbc Mon Sep 17 00:00:00 2001 From: Christoph Knittel Date: Thu, 11 Sep 2025 18:08:49 +0200 Subject: [PATCH 1/2] Have `String.charCodeAt` return `option`; add `charCodeAtUnsafe` --- packages/@rescript/runtime/Stdlib_String.res | 10 +++++++- packages/@rescript/runtime/Stdlib_String.resi | 25 ++++++++++++++++--- .../runtime/lib/es6/Stdlib_String.js | 10 ++++++++ .../@rescript/runtime/lib/js/Stdlib_String.js | 10 ++++++++ 4 files changed, 50 insertions(+), 5 deletions(-) diff --git a/packages/@rescript/runtime/Stdlib_String.res b/packages/@rescript/runtime/Stdlib_String.res index 5a76f80225..4cd3914f5b 100644 --- a/packages/@rescript/runtime/Stdlib_String.res +++ b/packages/@rescript/runtime/Stdlib_String.res @@ -17,7 +17,15 @@ external compare: (string, string) => Stdlib_Ordering.t = "%compare" @get_index external getUnsafe: (string, int) => string = "" @send external charAt: (string, int) => string = "charAt" -@send external charCodeAt: (string, int) => int = "charCodeAt" +@send external charCodeAtUnsafe: (string, int) => int = "charCodeAt" + +@val @scope("Number") external isNaN: float => bool = "isNaN" +@send external charCodeAt: (string, int) => float = "charCodeAt" +let charCodeAt = (s, i) => { + let c = charCodeAt(s, i) + isNaN(c) ? None : Some(c->Stdlib_Int.fromFloat) +} + @send external codePointAt: (string, int) => option = "codePointAt" @send external concat: (string, string) => string = "concat" diff --git a/packages/@rescript/runtime/Stdlib_String.resi b/packages/@rescript/runtime/Stdlib_String.resi index 01825f55fa..b62d72fcb2 100644 --- a/packages/@rescript/runtime/Stdlib_String.resi +++ b/packages/@rescript/runtime/Stdlib_String.resi @@ -213,19 +213,36 @@ external charAt: (string, int) => string = "charAt" `charCodeAt(str, index)` returns the character code at position `index` in string `str` the result is in the range 0-65535, unlike `codePointAt`, so it will not work correctly for characters with code points greater than or equal -to 0x10000. The return type is `float` because this function returns NaN if -`index` is less than zero or greater than the length of the string. +to 0x10000. See [`String.charCodeAt`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/charCodeAt) on MDN. ## Examples ```rescript -String.charCodeAt(`😺`, 0) == 0xd83d +String.charCodeAt(`😺`, 0) == Some(0xd83d) +String.charCodeAt("", 0) == None +String.codePointAt(`😺`, 0) == Some(0x1f63a) +``` +*/ +let charCodeAt: (string, int) => option + +/** +`charCodeAtUnsafe(str, index)` returns the character code at position `index` in +string `str` the result is in the range 0-65535, unlike `codePointAt`, so it +will not work correctly for characters with code points greater than or equal +to 0x10000. +Beware: If the index is out of range, it will return `NaN` which is not actually a valid int. +See [`String.charCodeAt`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/charCodeAt) on MDN. + +## Examples + +```rescript +String.charCodeAtUnsafe(`😺`, 0) == 0xd83d String.codePointAt(`😺`, 0) == Some(0x1f63a) ``` */ @send -external charCodeAt: (string, int) => int = "charCodeAt" +external charCodeAtUnsafe: (string, int) => int = "charCodeAt" /** `codePointAt(str, index)` returns the code point at position `index` within diff --git a/packages/@rescript/runtime/lib/es6/Stdlib_String.js b/packages/@rescript/runtime/lib/es6/Stdlib_String.js index b6e5069cab..3225a94a82 100644 --- a/packages/@rescript/runtime/lib/es6/Stdlib_String.js +++ b/packages/@rescript/runtime/lib/es6/Stdlib_String.js @@ -1,6 +1,15 @@ +function charCodeAt(s, i) { + let c = s.charCodeAt(i); + if (Number.isNaN(c)) { + return; + } else { + return c | 0; + } +} + function indexOfOpt(s, search) { let index = s.indexOf(search); if (index !== -1) { @@ -38,6 +47,7 @@ function capitalize(s) { } export { + charCodeAt, indexOfOpt, lastIndexOfOpt, searchOpt, diff --git a/packages/@rescript/runtime/lib/js/Stdlib_String.js b/packages/@rescript/runtime/lib/js/Stdlib_String.js index 09ffc8b903..562c0942ab 100644 --- a/packages/@rescript/runtime/lib/js/Stdlib_String.js +++ b/packages/@rescript/runtime/lib/js/Stdlib_String.js @@ -1,6 +1,15 @@ 'use strict'; +function charCodeAt(s, i) { + let c = s.charCodeAt(i); + if (Number.isNaN(c)) { + return; + } else { + return c | 0; + } +} + function indexOfOpt(s, search) { let index = s.indexOf(search); if (index !== -1) { @@ -37,6 +46,7 @@ function capitalize(s) { } } +exports.charCodeAt = charCodeAt; exports.indexOfOpt = indexOfOpt; exports.lastIndexOfOpt = lastIndexOfOpt; exports.searchOpt = searchOpt; From b295b3f1d3e8feba7d378c6abcfb8793ed70be0b Mon Sep 17 00:00:00 2001 From: Christoph Knittel Date: Thu, 11 Sep 2025 18:23:45 +0200 Subject: [PATCH 2/2] CHANGELOG # Conflicts: # CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bed0e55169..ace15e0370 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - Fix return type of `String.charCodeAt`. https://github.com/rescript-lang/rescript/pull/7864 - Remove support of JSX children spread. https://github.com/rescript-lang/rescript/pull/7869 +- Have `String.charCodeAt` return `option`; add `String.charCodeAtUnsafe`. https://github.com/rescript-lang/rescript/pull/7877 #### :eyeglasses: Spec Compliance