From 7f8b140ec51d41de3187d9a39a8e915a5878eece Mon Sep 17 00:00:00 2001 From: tan Date: Sat, 20 Dec 2014 16:05:00 +0530 Subject: [PATCH] maybefloat & maybeint: parse string to Nullable Introduces following methods that parse a string as the indicated type and return a `Nullable` with the result instead of throwing exception: - `maybeint{T<:Integer}(::Type{T<:Integer},s::AbstractString)` - `maybefloat32(s::AbstractString)` and `maybefloat64(s::AbstractString)` Ref: discussions at #9316, #3631, #5704 --- base/exports.jl | 3 + base/gmp.jl | 17 ++++- base/string.jl | 76 ++++++++++++++++++++- src/builtins.c | 174 ++++++++++++++++++++++++++++++++++-------------- src/julia.h | 11 +++ test/strings.jl | 22 ++++++ 6 files changed, 249 insertions(+), 54 deletions(-) diff --git a/base/exports.jl b/base/exports.jl index 07fe6b9b3b4a2..e62c406d69414 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -355,6 +355,9 @@ export fldmod, flipsign, float, + maybefloat32, + maybefloat64, + maybeint, floor, fma, frexp, diff --git a/base/gmp.jl b/base/gmp.jl index f885cdc452cba..02861a6a1e8f6 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -76,17 +76,28 @@ signed(x::BigInt) = x BigInt(x::BigInt) = x BigInt(s::AbstractString) = parseint(BigInt,s) -function Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int) +function parse_bigint(s::AbstractString, base::Int, nothrow::Bool) + _n = Nullable{BigInt}() s = bytestring(s) sgn, base, i = Base.parseint_preamble(true,s,base) + if i == 0 + nothrow && return _n + throw(ArgumentError("premature end of integer: $(repr(s))")) + end z = BigInt() err = ccall((:__gmpz_set_str, :libgmp), Int32, (Ptr{BigInt}, Ptr{UInt8}, Int32), &z, SubString(s,i), base) - err == 0 || throw(ArgumentError("invalid BigInt: $(repr(s))")) - return sgn < 0 ? -z : z + if err != 0 + nothrow && return _n + throw(ArgumentError("invalid BigInt: $(repr(s))")) + end + Nullable(sgn < 0 ? -z : z) end +Base.maybeint_internal(::Type{BigInt}, s::AbstractString, base::Int) = parse_bigint(s, base, true) +Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int) = get(parse_bigint(s, base, false)) + function BigInt(x::Union(Clong,Int32)) z = BigInt() ccall((:__gmpz_set_si, :libgmp), Void, (Ptr{BigInt}, Clong), &z, x) diff --git a/base/string.jl b/base/string.jl index 4d70e7879b28e..aa53d63e14d4c 100644 --- a/base/string.jl +++ b/base/string.jl @@ -1487,7 +1487,7 @@ parseint{T<:Integer}(::Type{T}, c::Char, base::Integer) = convert(T,parseint(c,b parseint{T<:Integer}(::Type{T}, c::Char) = convert(T,parseint(c)) function parseint_next(s::AbstractString, i::Int=start(s)) - done(s,i) && throw(ArgumentError("premature end of integer: $(repr(s))")) + done(s,i) && (return Char(0), 0, 0) j = i c, i = next(s,i) c, i, j @@ -1495,9 +1495,12 @@ end function parseint_preamble(signed::Bool, s::AbstractString, base::Int) c, i, j = parseint_next(s) + while isspace(c) c, i, j = parseint_next(s,i) end + (j == 0) && (return 0, 0, 0) + sgn = 1 if signed if c == '-' || c == '+' @@ -1505,9 +1508,12 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int) c, i, j = parseint_next(s,i) end end + while isspace(c) c, i, j = parseint_next(s,i) end + (j == 0) && (return 0, 0, 0) + if base == 0 if c == '0' && !done(s,i) c, i = next(s,i) @@ -1522,9 +1528,71 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int) return sgn, base, j end +safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2) +safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) : + (n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) : + ((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2) + +#safe_sub{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 < (typemin(T) + n2)) : (n1 > (typemax(T) + n2))) ? Nullable{T}() : Nullable{T}(n1 - n2) +#safe_div{T<:Integer}(n1::T, n2::T) = ((n1 == typemin(T)) && (n2 == T(-1))) ? Nullable{T}() : Nullable{T}(div(n1, n2)) +#safe_abs{T<:Integer}(n::T) = (n == typemin(T)) ? Nullable{T}() : abs(n) + +function maybeint_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int) + _n = Nullable{T}() + sgn, base, i = parseint_preamble(T<:Signed,s,base) + (i == 0) && return _n + c, i = parseint_next(s,i) + (i == 0) && return _n + + base = convert(T,base) + m::T = div(typemax(T)-base+1,base) + n::T = 0 + while n <= m + d::T = '0' <= c <= '9' ? c-'0' : + 'A' <= c <= 'Z' ? c-'A'+10 : + 'a' <= c <= 'z' ? c-'a'+a : base + d < base || return _n + n *= base + n += d + if done(s,i) + n *= sgn + return Nullable{T}(n) + end + c, i = next(s,i) + isspace(c) && break + end + (T <: Signed) && (n *= sgn) + while !isspace(c) + d::T = '0' <= c <= '9' ? c-'0' : + 'A' <= c <= 'Z' ? c-'A'+10 : + 'a' <= c <= 'z' ? c-'a'+a : base + d < base || return _n + (T <: Signed) && (d *= sgn) + + safe_n = safe_mul(n, base) + isnull(safe_n) || (safe_n = safe_add(get(safe_n), d)) + isnull(safe_n) && return Nullable{T}() + n = get(safe_n) + done(s,i) && return Nullable{T}(n) + c, i = next(s,i) + end + while !done(s,i) + c, i = next(s,i) + isspace(c) || return _n + end + return Nullable{T}(n) +end +maybeint_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int) = + maybeint_internal(T, s, base, base <= 36 ? 10 : 36) +maybeint{T<:Integer}(::Type{T}, s::AbstractString) = maybeint_internal(T,s,0) + function parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int) sgn, base, i = parseint_preamble(T<:Signed,s,base) + (i == 0) && throw(ArgumentError("premature end of integer: $(repr(s))")) + c, i = parseint_next(s,i) + (i == 0) && throw(ArgumentError("premature end of integer: $(repr(s))")) + base = convert(T,base) ## FIXME: remove 128-bit specific code once 128-bit div doesn't rely on BigInt m::T = T===UInt128 || T===Int128 ? typemax(T) : div(typemax(T)-base+1,base) @@ -1608,6 +1676,12 @@ end float(x::AbstractString) = parsefloat(x) parsefloat(x::AbstractString) = parsefloat(Float64,x) +maybefloat64(s::AbstractString) = ccall(:jl_maybe_strtod, Nullable{Float64}, (Ptr{UInt8},), s) +maybefloat64(s::SubString) = ccall(:jl_maybe_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof) + +maybefloat32(s::AbstractString) = ccall(:jl_maybe_strtof, Nullable{Float32}, (Ptr{UInt8},), s) +maybefloat32(s::SubString) = ccall(:jl_maybe_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof) + float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a) # find the index of the first occurrence of a value in a byte array diff --git a/src/builtins.c b/src/builtins.c index 62e3a6bd43e91..b665b4fe0c8ec 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -737,13 +737,34 @@ DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a) // printing ------------------------------------------------------------------- -DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out) +int substr_isspace(char *p, char *pend) { + while (p != pend) { + if (!isspace((unsigned char)*p)) { + return 0; + } + p++; + } + return 1; +} + +int str_isspace(char *p) { + while (*p != '\0') { + if (!isspace((unsigned char)*p)) { + return 0; + } + p++; + } + return 1; +} + +DLLEXPORT jl_nullable_float64_t jl_maybe_substrtod(char *str, size_t offset, int len) { char *p; - errno = 0; char *bstr = str+offset; char *pend = bstr+len; int err = 0; + + errno = 0; if (!(*pend == '\0' || isspace((unsigned char)*pend) || *pend == ',')) { // confusing data outside substring. must copy. char *newstr = (char*)malloc(len+1); @@ -752,38 +773,65 @@ DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out) bstr = newstr; pend = bstr+len; } - *out = strtod_c(bstr, &p); - if (p == bstr || - (errno==ERANGE && (*out==0 || *out==HUGE_VAL || *out==-HUGE_VAL))) + double out = strtod_c(bstr, &p); + + if (errno==ERANGE && (out==0 || out==HUGE_VAL || out==-HUGE_VAL)) { err = 1; - // Deal with case where the substring might be something like "1 ", - // which is OK, and "1 X", which we don't allow. - while (p != pend) { - if (!isspace((unsigned char)*p)) { - err = 1; - break; - } - p++; } + else if (p == bstr) { + err = 1; + } + else { + // Deal with case where the substring might be something like "1 ", + // which is OK, and "1 X", which we don't allow. + err = substr_isspace(p, pend) ? 0 : 1; + } + if (bstr != str+offset) free(bstr); - return err; + + return (jl_nullable_float64_t){(uint8_t)err, out}; } -DLLEXPORT int jl_strtod(char *str, double *out) +DLLEXPORT jl_nullable_float64_t jl_maybe_strtod(char *str) { char *p; + int err = 0; + errno = 0; - *out = strtod_c(str, &p); - if (p == str || - (errno==ERANGE && (*out==0 || *out==HUGE_VAL || *out==-HUGE_VAL))) - return 1; - while (*p != '\0') { - if (!isspace((unsigned char)*p)) - return 1; - p++; + double out = strtod_c(str, &p); + + if (errno==ERANGE && (out==0 || out==HUGE_VAL || out==-HUGE_VAL)) { + err = 1; + } + else if (p == str) { + err = 1; + } + else { + err = str_isspace(p) ? 0 : 1; } - return 0; + + return (jl_nullable_float64_t){(uint8_t)err, out}; +} + +DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out) +{ + jl_nullable_float64_t nd = jl_maybe_substrtod(str, offset, len); + if(0 == nd.isnull) { + *out = nd.value; + return 0; + } + return 1; +} + +DLLEXPORT int jl_strtod(char *str, double *out) +{ + jl_nullable_float64_t nd = jl_maybe_strtod(str); + if(0 == nd.isnull) { + *out = nd.value; + return 0; + } + return 1; } // MSVC pre-2013 did not define HUGE_VALF @@ -791,13 +839,14 @@ DLLEXPORT int jl_strtod(char *str, double *out) #define HUGE_VALF (1e25f * 1e25f) #endif -DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out) +DLLEXPORT jl_nullable_float32_t jl_maybe_substrtof(char *str, size_t offset, int len) { char *p; - errno = 0; char *bstr = str+offset; char *pend = bstr+len; int err = 0; + + errno = 0; if (!(*pend == '\0' || isspace((unsigned char)*pend) || *pend == ',')) { // confusing data outside substring. must copy. char *newstr = (char*)malloc(len+1); @@ -807,46 +856,71 @@ DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out) pend = bstr+len; } #if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_) - *out = (float)strtod_c(bstr, &p); + float out = (float)strtod_c(bstr, &p); #else - *out = strtof_c(bstr, &p); + float out = strtof_c(bstr, &p); #endif - if (p == bstr || - (errno==ERANGE && (*out==0 || *out==HUGE_VALF || *out==-HUGE_VALF))) + if (errno==ERANGE && (out==0 || out==HUGE_VALF || out==-HUGE_VALF)) { err = 1; - // Deal with case where the substring might be something like "1 ", - // which is OK, and "1 X", which we don't allow. - while (p != pend) { - if (!isspace((unsigned char)*p)) { - err = 1; - break; - } - p++; } + else if (p == bstr) { + err = 1; + } + else { + // Deal with case where the substring might be something like "1 ", + // which is OK, and "1 X", which we don't allow. + err = substr_isspace(p, pend) ? 0 : 1; + } + if (bstr != str+offset) free(bstr); - return err; + + return (jl_nullable_float32_t){(uint8_t)err, out}; } -DLLEXPORT int jl_strtof(char *str, float *out) +DLLEXPORT jl_nullable_float32_t jl_maybe_strtof(char *str) { char *p; + int err = 0; + errno = 0; #if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_) - *out = (float)strtod_c(str, &p); + float out = (float)strtod_c(str, &p); #else - *out = strtof_c(str, &p); + float out = strtof_c(str, &p); #endif - if (p == str || - (errno==ERANGE && (*out==0 || *out==HUGE_VALF || *out==-HUGE_VALF))) - return 1; - while (*p != '\0') { - if (!isspace((unsigned char)*p)) - return 1; - p++; + if (errno==ERANGE && (out==0 || out==HUGE_VALF || out==-HUGE_VALF)) { + err = 1; + } + else if (p == str) { + err = 1; } - return 0; + else { + err = str_isspace(p) ? 0 : 1; + } + + return (jl_nullable_float32_t){(uint8_t)err, out}; +} + +DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out) +{ + jl_nullable_float32_t nf = jl_maybe_substrtof(str, offset, len); + if(0 == nf.isnull) { + *out = nf.value; + return 0; + } + return 1; +} + +DLLEXPORT int jl_strtof(char *str, float *out) +{ + jl_nullable_float32_t nf = jl_maybe_strtof(str); + if(0 == nf.isnull) { + *out = nf.value; + return 0; + } + return 1; } // showing -------------------------------------------------------------------- diff --git a/src/julia.h b/src/julia.h index 618ba84b121d3..c2f42fe4cd404 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1494,6 +1494,17 @@ DLLEXPORT extern int jl_ver_patch(void); DLLEXPORT extern int jl_ver_is_release(void); DLLEXPORT extern const char* jl_ver_string(void); +// nullable struct representations +typedef struct { + uint8_t isnull; + double value; +} jl_nullable_float64_t; + +typedef struct { + uint8_t isnull; + float value; +} jl_nullable_float32_t; + #ifdef __cplusplus } #endif diff --git a/test/strings.jl b/test/strings.jl index af8031746269e..022335481bbcf 100644 --- a/test/strings.jl +++ b/test/strings.jl @@ -1405,3 +1405,25 @@ gstr = Base.GenericString("12"); # issue #10307 @test typeof(map(Int16,String[])) == Vector{Int16} + +for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128] + for i in [typemax(T), typemin(T)] + s = "$i" + @test get(maybeint(T, s)) == i + end +end + +for T in [Int8, Int16, Int32, Int64, Int128] + for i in [typemax(T), typemin(T)] + f = "$(i)0" + @test isnull(maybeint(T, f)) + end +end + +@test get(maybeint(BigInt, "1234567890")) == BigInt(1234567890) +@test isnull(maybeint(BigInt, "1234567890-")) + +@test get(maybefloat64("64")) == 64.0 +@test isnull(maybefloat64("64o")) +@test get(maybefloat32("32")) == 32.0f0 +@test isnull(maybefloat32("32o"))