Skip to content

Commit

Permalink
maybefloat & maybeint: parse string to Nullable
Browse files Browse the repository at this point in the history
Introduces following methods that parse a string as the indicated type and return a `Nullable` with the result instead of throwing exception:
- `maybeint{T<:Integer}(::Type{T<:Integer},s::AbstractString)`
- `maybefloat32(s::AbstractString)` and `maybefloat64(s::AbstractString)`

Ref: discussions at JuliaLang#9316, JuliaLang#3631, JuliaLang#5704
  • Loading branch information
tanmaykm committed Mar 12, 2015
1 parent f54ba05 commit 7f8b140
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 54 deletions.
3 changes: 3 additions & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,9 @@ export
fldmod,
flipsign,
float,
maybefloat32,
maybefloat64,
maybeint,
floor,
fma,
frexp,
Expand Down
17 changes: 14 additions & 3 deletions base/gmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,28 @@ signed(x::BigInt) = x
BigInt(x::BigInt) = x
BigInt(s::AbstractString) = parseint(BigInt,s)

function Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int)
function parse_bigint(s::AbstractString, base::Int, nothrow::Bool)
_n = Nullable{BigInt}()
s = bytestring(s)
sgn, base, i = Base.parseint_preamble(true,s,base)
if i == 0
nothrow && return _n
throw(ArgumentError("premature end of integer: $(repr(s))"))
end
z = BigInt()
err = ccall((:__gmpz_set_str, :libgmp),
Int32, (Ptr{BigInt}, Ptr{UInt8}, Int32),
&z, SubString(s,i), base)
err == 0 || throw(ArgumentError("invalid BigInt: $(repr(s))"))
return sgn < 0 ? -z : z
if err != 0
nothrow && return _n
throw(ArgumentError("invalid BigInt: $(repr(s))"))
end
Nullable(sgn < 0 ? -z : z)
end

Base.maybeint_internal(::Type{BigInt}, s::AbstractString, base::Int) = parse_bigint(s, base, true)
Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int) = get(parse_bigint(s, base, false))

function BigInt(x::Union(Clong,Int32))
z = BigInt()
ccall((:__gmpz_set_si, :libgmp), Void, (Ptr{BigInt}, Clong), &z, x)
Expand Down
76 changes: 75 additions & 1 deletion base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1487,27 +1487,33 @@ parseint{T<:Integer}(::Type{T}, c::Char, base::Integer) = convert(T,parseint(c,b
parseint{T<:Integer}(::Type{T}, c::Char) = convert(T,parseint(c))

function parseint_next(s::AbstractString, i::Int=start(s))
done(s,i) && throw(ArgumentError("premature end of integer: $(repr(s))"))
done(s,i) && (return Char(0), 0, 0)
j = i
c, i = next(s,i)
c, i, j
end

function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
c, i, j = parseint_next(s)

while isspace(c)
c, i, j = parseint_next(s,i)
end
(j == 0) && (return 0, 0, 0)

sgn = 1
if signed
if c == '-' || c == '+'
(c == '-') && (sgn = -1)
c, i, j = parseint_next(s,i)
end
end

while isspace(c)
c, i, j = parseint_next(s,i)
end
(j == 0) && (return 0, 0, 0)

if base == 0
if c == '0' && !done(s,i)
c, i = next(s,i)
Expand All @@ -1522,9 +1528,71 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
return sgn, base, j
end

safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2)
safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) :
(n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) :
((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2)

#safe_sub{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 < (typemin(T) + n2)) : (n1 > (typemax(T) + n2))) ? Nullable{T}() : Nullable{T}(n1 - n2)
#safe_div{T<:Integer}(n1::T, n2::T) = ((n1 == typemin(T)) && (n2 == T(-1))) ? Nullable{T}() : Nullable{T}(div(n1, n2))
#safe_abs{T<:Integer}(n::T) = (n == typemin(T)) ? Nullable{T}() : abs(n)

function maybeint_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int)
_n = Nullable{T}()
sgn, base, i = parseint_preamble(T<:Signed,s,base)
(i == 0) && return _n
c, i = parseint_next(s,i)
(i == 0) && return _n

base = convert(T,base)
m::T = div(typemax(T)-base+1,base)
n::T = 0
while n <= m
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
d < base || return _n
n *= base
n += d
if done(s,i)
n *= sgn
return Nullable{T}(n)
end
c, i = next(s,i)
isspace(c) && break
end
(T <: Signed) && (n *= sgn)
while !isspace(c)
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
d < base || return _n
(T <: Signed) && (d *= sgn)

safe_n = safe_mul(n, base)
isnull(safe_n) || (safe_n = safe_add(get(safe_n), d))
isnull(safe_n) && return Nullable{T}()
n = get(safe_n)
done(s,i) && return Nullable{T}(n)
c, i = next(s,i)
end
while !done(s,i)
c, i = next(s,i)
isspace(c) || return _n
end
return Nullable{T}(n)
end
maybeint_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
maybeint_internal(T, s, base, base <= 36 ? 10 : 36)
maybeint{T<:Integer}(::Type{T}, s::AbstractString) = maybeint_internal(T,s,0)

function parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int)
sgn, base, i = parseint_preamble(T<:Signed,s,base)
(i == 0) && throw(ArgumentError("premature end of integer: $(repr(s))"))

c, i = parseint_next(s,i)
(i == 0) && throw(ArgumentError("premature end of integer: $(repr(s))"))

base = convert(T,base)
## FIXME: remove 128-bit specific code once 128-bit div doesn't rely on BigInt
m::T = T===UInt128 || T===Int128 ? typemax(T) : div(typemax(T)-base+1,base)
Expand Down Expand Up @@ -1608,6 +1676,12 @@ end
float(x::AbstractString) = parsefloat(x)
parsefloat(x::AbstractString) = parsefloat(Float64,x)

maybefloat64(s::AbstractString) = ccall(:jl_maybe_strtod, Nullable{Float64}, (Ptr{UInt8},), s)
maybefloat64(s::SubString) = ccall(:jl_maybe_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)

maybefloat32(s::AbstractString) = ccall(:jl_maybe_strtof, Nullable{Float32}, (Ptr{UInt8},), s)
maybefloat32(s::SubString) = ccall(:jl_maybe_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)

float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a)

# find the index of the first occurrence of a value in a byte array
Expand Down
174 changes: 124 additions & 50 deletions src/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -737,13 +737,34 @@ DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a)

// printing -------------------------------------------------------------------

DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out)
int substr_isspace(char *p, char *pend) {
while (p != pend) {
if (!isspace((unsigned char)*p)) {
return 0;
}
p++;
}
return 1;
}

int str_isspace(char *p) {
while (*p != '\0') {
if (!isspace((unsigned char)*p)) {
return 0;
}
p++;
}
return 1;
}

DLLEXPORT jl_nullable_float64_t jl_maybe_substrtod(char *str, size_t offset, int len)
{
char *p;
errno = 0;
char *bstr = str+offset;
char *pend = bstr+len;
int err = 0;

errno = 0;
if (!(*pend == '\0' || isspace((unsigned char)*pend) || *pend == ',')) {
// confusing data outside substring. must copy.
char *newstr = (char*)malloc(len+1);
Expand All @@ -752,52 +773,80 @@ DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out)
bstr = newstr;
pend = bstr+len;
}
*out = strtod_c(bstr, &p);
if (p == bstr ||
(errno==ERANGE && (*out==0 || *out==HUGE_VAL || *out==-HUGE_VAL)))
double out = strtod_c(bstr, &p);

if (errno==ERANGE && (out==0 || out==HUGE_VAL || out==-HUGE_VAL)) {
err = 1;
// Deal with case where the substring might be something like "1 ",
// which is OK, and "1 X", which we don't allow.
while (p != pend) {
if (!isspace((unsigned char)*p)) {
err = 1;
break;
}
p++;
}
else if (p == bstr) {
err = 1;
}
else {
// Deal with case where the substring might be something like "1 ",
// which is OK, and "1 X", which we don't allow.
err = substr_isspace(p, pend) ? 0 : 1;
}

if (bstr != str+offset)
free(bstr);
return err;

return (jl_nullable_float64_t){(uint8_t)err, out};
}

DLLEXPORT int jl_strtod(char *str, double *out)
DLLEXPORT jl_nullable_float64_t jl_maybe_strtod(char *str)
{
char *p;
int err = 0;

errno = 0;
*out = strtod_c(str, &p);
if (p == str ||
(errno==ERANGE && (*out==0 || *out==HUGE_VAL || *out==-HUGE_VAL)))
return 1;
while (*p != '\0') {
if (!isspace((unsigned char)*p))
return 1;
p++;
double out = strtod_c(str, &p);

if (errno==ERANGE && (out==0 || out==HUGE_VAL || out==-HUGE_VAL)) {
err = 1;
}
else if (p == str) {
err = 1;
}
else {
err = str_isspace(p) ? 0 : 1;
}
return 0;

return (jl_nullable_float64_t){(uint8_t)err, out};
}

DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out)
{
jl_nullable_float64_t nd = jl_maybe_substrtod(str, offset, len);
if(0 == nd.isnull) {
*out = nd.value;
return 0;
}
return 1;
}

DLLEXPORT int jl_strtod(char *str, double *out)
{
jl_nullable_float64_t nd = jl_maybe_strtod(str);
if(0 == nd.isnull) {
*out = nd.value;
return 0;
}
return 1;
}

// MSVC pre-2013 did not define HUGE_VALF
#ifndef HUGE_VALF
#define HUGE_VALF (1e25f * 1e25f)
#endif

DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out)
DLLEXPORT jl_nullable_float32_t jl_maybe_substrtof(char *str, size_t offset, int len)
{
char *p;
errno = 0;
char *bstr = str+offset;
char *pend = bstr+len;
int err = 0;

errno = 0;
if (!(*pend == '\0' || isspace((unsigned char)*pend) || *pend == ',')) {
// confusing data outside substring. must copy.
char *newstr = (char*)malloc(len+1);
Expand All @@ -807,46 +856,71 @@ DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out)
pend = bstr+len;
}
#if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_)
*out = (float)strtod_c(bstr, &p);
float out = (float)strtod_c(bstr, &p);
#else
*out = strtof_c(bstr, &p);
float out = strtof_c(bstr, &p);
#endif

if (p == bstr ||
(errno==ERANGE && (*out==0 || *out==HUGE_VALF || *out==-HUGE_VALF)))
if (errno==ERANGE && (out==0 || out==HUGE_VALF || out==-HUGE_VALF)) {
err = 1;
// Deal with case where the substring might be something like "1 ",
// which is OK, and "1 X", which we don't allow.
while (p != pend) {
if (!isspace((unsigned char)*p)) {
err = 1;
break;
}
p++;
}
else if (p == bstr) {
err = 1;
}
else {
// Deal with case where the substring might be something like "1 ",
// which is OK, and "1 X", which we don't allow.
err = substr_isspace(p, pend) ? 0 : 1;
}

if (bstr != str+offset)
free(bstr);
return err;

return (jl_nullable_float32_t){(uint8_t)err, out};
}

DLLEXPORT int jl_strtof(char *str, float *out)
DLLEXPORT jl_nullable_float32_t jl_maybe_strtof(char *str)
{
char *p;
int err = 0;

errno = 0;
#if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_)
*out = (float)strtod_c(str, &p);
float out = (float)strtod_c(str, &p);
#else
*out = strtof_c(str, &p);
float out = strtof_c(str, &p);
#endif
if (p == str ||
(errno==ERANGE && (*out==0 || *out==HUGE_VALF || *out==-HUGE_VALF)))
return 1;
while (*p != '\0') {
if (!isspace((unsigned char)*p))
return 1;
p++;
if (errno==ERANGE && (out==0 || out==HUGE_VALF || out==-HUGE_VALF)) {
err = 1;
}
else if (p == str) {
err = 1;
}
return 0;
else {
err = str_isspace(p) ? 0 : 1;
}

return (jl_nullable_float32_t){(uint8_t)err, out};
}

DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out)
{
jl_nullable_float32_t nf = jl_maybe_substrtof(str, offset, len);
if(0 == nf.isnull) {
*out = nf.value;
return 0;
}
return 1;
}

DLLEXPORT int jl_strtof(char *str, float *out)
{
jl_nullable_float32_t nf = jl_maybe_strtof(str);
if(0 == nf.isnull) {
*out = nf.value;
return 0;
}
return 1;
}

// showing --------------------------------------------------------------------
Expand Down
Loading

0 comments on commit 7f8b140

Please sign in to comment.