Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

String to int conversion #3937

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/reference/pysupported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ the user, of course.)
The following functions, attributes and methods are currently supported:

* ``len()``
* ``int()`` (convert strings to integer)
* ``+`` (concatenation of strings)
* ``in``, ``.contains()``
* ``==``, ``<``, ``<=``, ``>``, ``>=`` (comparison)
Expand Down
29 changes: 29 additions & 0 deletions numba/_helperlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
*/

#include "_pymodule.h"
#include <errno.h>
#include <limits.h>
#include <stddef.h>
#include <stdio.h>
#include <math.h>
Expand Down Expand Up @@ -1124,6 +1126,33 @@ numba_extract_unicode(PyObject *obj, Py_ssize_t *length, int *kind,
}


/*
* defined string conversion to integer
* base must be >= 2 and <= 36, or 0 that checked on Python side
*/
NUMBA_EXPORT_FUNC(int64_t)
numba_str2int_unicode(const char* str, int64_t base)
{
if (!str)
{
return 0;
}

int64_t result = 0;
errno = 0;

// Use strtoll() instead strtol()
// because int64_t is always expected by Python
result = strtoll(str, NULL, base);
if (errno != 0)
{
return 0;
}

return result;
}


/*
* defined break point for gdb
*/
Expand Down
1 change: 1 addition & 0 deletions numba/_helpermod.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ build_c_helpers_dict(void)

/* Unicode string support */
declmethod(extract_unicode);
declmethod(str2int_unicode);

/* for gdb breakpoint */
declmethod(gdb_breakpoint);
Expand Down
67 changes: 67 additions & 0 deletions numba/tests/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,14 @@ def literal_iter_stopiteration_usecase():
next(i)


def str2int_usecase(x):
return int(x)


def str2int_usecase_base(x, base):
return int(x, base)


class BaseTest(MemoryLeakMixin, TestCase):
def setUp(self):
super(BaseTest, self).setUp()
Expand Down Expand Up @@ -670,6 +678,65 @@ def pyfunc(x):
self.assertEqual(pyfunc(*args), cfunc(*args),
msg='failed on {}'.format(args))

def test_str2int(self):

INT_CASES = [
('123', [10, 16]),
(' 000168000', [10, 9]),
(' 000111000', [2, 4, 8, 10, 16, 35]),
('2147483647', []),
('2147483648', []),
('-2147483648', []),
('-2147483649', []),
(' 893 ', []),
(' -567 ', [8, 16]),
(' +567 ', [8, 16]),
('+567', [0]),
('-567', [])
]

INT_EXTRA_CASES = [
(' 0A00F200', [16, 32]),
('A00F200', [16, 32]),
('0xA00F200', []),
(' 0x0A00F200', [16]),
(' 0x7fffffffffffffff', [16]),
('-9223372036854775808', [])
]

# form with no parameter
pyfunc = str2int_usecase
cfunc = njit(pyfunc)

for string, base in INT_CASES:
self.assertEqual(pyfunc(string),
cfunc(string),
"int('%s')?" % string)

# parametrized form
pyfunc = str2int_usecase_base
cfunc = njit(pyfunc)

# haven't found portable construction to concatenate CASES
for items in INT_CASES, INT_EXTRA_CASES:
for string, base_arr in items:
for base in base_arr:
self.assertEqual(pyfunc(string, base),
cfunc(string, base),
"int('%s', %d)?" % (string, base))

def test_str2int_err(self):
self.disable_leak_check()

pyfunc = str2int_usecase_base
cfunc = njit(pyfunc)

for base in [-40, -1, 1, 37, 123]:
with self.assertRaises(ValueError) as raises:
cfunc('123', base)
self.assertIn('Base must be >= 2 and <= 36, or 0',
str(raises.exception))


@unittest.skipUnless(_py34_or_later,
'unicode support requires Python 3.4 or later')
Expand Down
52 changes: 51 additions & 1 deletion numba/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import numpy as np
from llvmlite.ir import IntType, Constant
from llvmlite import ir

from numba.extending import (
models,
Expand All @@ -15,7 +16,8 @@
intrinsic,
)
from numba.targets.imputils import (lower_constant, lower_cast, lower_builtin,
iternext_impl, impl_ret_new_ref, RefType)
iternext_impl, impl_ret_new_ref,
impl_ret_untracked, RefType)
from numba.datamodel import register_default, StructModel
from numba import cgutils
from numba import types
Expand Down Expand Up @@ -810,3 +812,51 @@ def iternext_unicode(context, builder, sig, args, result):
# bump index for next cycle
nindex = cgutils.increment_index(builder, index)
builder.store(nindex, iterobj.index)


@intrinsic
def _str2int_unicode(typingctx, string_ty, base_ty):
"""Wrap numba_str2int_unicode

Returns integer number from converted string.
"""
resty = types.int64
sig = resty(string_ty, base_ty)

def codegen(context, builder, sig, args):
[string_arg, base_arg] = args
# convert the string LLVM IR arg into a struct, this gives
# access to the struct members
string_struct = cgutils.create_struct_proxy(types.unicode_type)(context, builder,
value=string_arg)
# function definition prototype
fnty = ir.FunctionType(ir.IntType(64), [ir.IntType(8).as_pointer(), ir.IntType(64)])
# inserting the function definition into the IR with name
fn = builder.module.get_or_insert_function(fnty, name='numba_str2int_unicode')
# calling the function using the `.data` member of the UnicodeModel
n = builder.call(fn, [string_struct.data, base_arg])
# define the return, which is the same type as the base_ty
return impl_ret_untracked(context, builder, sig.return_type, n)

return sig, codegen


@overload(int)
def int_overload(string, base=10):
if isinstance(base, types.Optional):
base = base.type # catch optional type with invalid non-None type

if not isinstance(string, types.UnicodeType):
raise TypingError('First parameter should be UnicodeType')

if not (base == 10 or
isinstance(base, (types.Omitted, types.Integer))):
raise TypingError('Base parameter should be Integer')

def impl(string, base=10):
if base >= 2 and base <= 36 or base == 0:
return _str2int_unicode(string, base)
else:
raise ValueError('Base must be >= 2 and <= 36, or 0')

return impl