diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 8bf6c44b1d669..714120a79e39a 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -945,6 +945,7 @@ if(LLVM_LIBC_FULL_BUILD) # arpa/inet.h entrypoints libc.src.arpa.inet.htonl libc.src.arpa.inet.htons + libc.src.arpa.inet.inet_aton libc.src.arpa.inet.ntohl libc.src.arpa.inet.ntohs diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index dffccbab9a8e9..f6bbb346d10e5 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -1077,6 +1077,7 @@ if(LLVM_LIBC_FULL_BUILD) # arpa/inet.h entrypoints libc.src.arpa.inet.htonl libc.src.arpa.inet.htons + libc.src.arpa.inet.inet_aton libc.src.arpa.inet.ntohl libc.src.arpa.inet.ntohs diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index b4ab073ec912f..aa455e80ec5d3 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1113,6 +1113,7 @@ if(LLVM_LIBC_FULL_BUILD) # arpa/inet.h entrypoints libc.src.arpa.inet.htonl libc.src.arpa.inet.htons + libc.src.arpa.inet.inet_aton libc.src.arpa.inet.ntohl libc.src.arpa.inet.ntohs diff --git a/libc/docs/dev/undefined_behavior.rst b/libc/docs/dev/undefined_behavior.rst index aeeaf17c09aa5..444d10dd08e9c 100644 --- a/libc/docs/dev/undefined_behavior.rst +++ b/libc/docs/dev/undefined_behavior.rst @@ -156,3 +156,10 @@ parsed as normal. For l64a it's unspecified what happens if the input value is negative. For LLVM-libc, all inputs to l64a are treated as unsigned 32 bit ints. Additionally, the return of l64a is in a thread-local buffer that's overwritten on each call. + +`inet_aton` and Non-Standard Binary Integers +-------------------------------------------- +The current implementation of the `inet_aton` function utilizes +`internal::strtointeger` to parse IPv4 numbers-and-dots notations. This +approach may permit the use of binary integers (prefixed with 0b), which is not +supported by the standard. diff --git a/libc/include/arpa/inet.yaml b/libc/include/arpa/inet.yaml index 10cd56d6ce786..6e0629072b6ef 100644 --- a/libc/include/arpa/inet.yaml +++ b/libc/include/arpa/inet.yaml @@ -1,7 +1,8 @@ header: arpa/inet.h header_template: inet.h.def macros: [] -types: [] +types: + - type_name: in_addr enums: [] objects: [] functions: @@ -17,6 +18,13 @@ functions: return_type: uint16_t arguments: - type: uint16_t + - name: inet_aton + standards: + - llvm_libc_ext + return_type: int + arguments: + - type: const char * + - type: in_addr * - name: ntohl standards: - POSIX diff --git a/libc/src/arpa/inet/CMakeLists.txt b/libc/src/arpa/inet/CMakeLists.txt index 1f39a076fde91..bb43e24ec9d0b 100644 --- a/libc/src/arpa/inet/CMakeLists.txt +++ b/libc/src/arpa/inet/CMakeLists.txt @@ -22,6 +22,19 @@ add_entrypoint_object( libc.src.__support.common ) +add_entrypoint_object( + inet_aton + SRCS + inet_aton.cpp + HDRS + inet_aton.h + DEPENDS + libc.include.arpa_inet + libc.include.llvm-libc-types.in_addr + libc.src.__support.common + libc.src.__support.str_to_integer +) + add_entrypoint_object( ntohl SRCS diff --git a/libc/src/arpa/inet/inet_aton.cpp b/libc/src/arpa/inet/inet_aton.cpp new file mode 100644 index 0000000000000..9e36f13fd589b --- /dev/null +++ b/libc/src/arpa/inet/inet_aton.cpp @@ -0,0 +1,57 @@ +//===-- Implementation of inet_aton function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/arpa/inet/inet_aton.h" +#include "src/__support/common.h" +#include "src/__support/endian_internal.h" +#include "src/__support/str_to_integer.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, inet_aton, (const char *cp, in_addr *inp)) { + constexpr int IPV4_MAX_DOT_NUM = 3; + unsigned long parts[IPV4_MAX_DOT_NUM + 1] = {0}; + int dot_num = 0; + + for (; dot_num <= IPV4_MAX_DOT_NUM; ++dot_num) { + auto result = internal::strtointeger(cp, 0); + parts[dot_num] = result; + + if (result.has_error() || result.parsed_len == 0) + return 0; + char next_char = *(cp + result.parsed_len); + if (next_char != '.' && next_char != '\0') + return 0; + else if (next_char == '\0') + break; + else + cp += (result.parsed_len + 1); + } + + if (dot_num > IPV4_MAX_DOT_NUM) + return 0; + + // converts the Internet host address cp from the IPv4 numbers-and-dots + // notation into binary form (in network byte order) + unsigned long result = 0; + for (int i = 0; i <= dot_num; ++i) { + unsigned long max_part = + i == dot_num ? (0xffffffffUL >> (8 * dot_num)) : 0xffUL; + if (parts[i] > max_part) + return 0; + int shift = i == dot_num ? 0 : 8 * (IPV4_MAX_DOT_NUM - i); + result |= parts[i] << shift; + } + + if (inp) + inp->s_addr = Endian::to_big_endian(static_cast(result)); + + return 1; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/arpa/inet/inet_aton.h b/libc/src/arpa/inet/inet_aton.h new file mode 100644 index 0000000000000..ea387d1f6b2f6 --- /dev/null +++ b/libc/src/arpa/inet/inet_aton.h @@ -0,0 +1,21 @@ +//===-- Implementation header of inet_aton ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H +#define LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H + +#include "include/llvm-libc-types/in_addr.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int inet_aton(const char *cp, in_addr *inp); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H diff --git a/libc/test/src/arpa/inet/CMakeLists.txt b/libc/test/src/arpa/inet/CMakeLists.txt index 6e78e3a50e612..d24cd4450d895 100644 --- a/libc/test/src/arpa/inet/CMakeLists.txt +++ b/libc/test/src/arpa/inet/CMakeLists.txt @@ -26,6 +26,17 @@ add_libc_unittest( libc.src.arpa.inet.ntohs ) +add_libc_unittest( + inet_aton + SUITE + libc_arpa_inet_unittests + SRCS + inet_aton_test.cpp + DEPENDS + libc.src.arpa.inet.htonl + libc.src.arpa.inet.inet_aton +) + add_libc_unittest( ntohl SUITE diff --git a/libc/test/src/arpa/inet/inet_aton_test.cpp b/libc/test/src/arpa/inet/inet_aton_test.cpp new file mode 100644 index 0000000000000..c9c97870e0dff --- /dev/null +++ b/libc/test/src/arpa/inet/inet_aton_test.cpp @@ -0,0 +1,92 @@ +//===-- Unittests for inet_aton -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/arpa/inet/htonl.h" +#include "src/arpa/inet/inet_aton.h" +#include "test/UnitTest/Test.h" + +namespace LIBC_NAMESPACE_DECL { + +TEST(LlvmLibcInetAton, ValidTest) { + in_addr a; + + // a.b.c.d + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("127.1.2.4", &a)); + ASSERT_EQ(htonl(0x7f010204), a.s_addr); + + // a.b.c + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("127.1.4", &a)); + ASSERT_EQ(htonl(0x7f010004), a.s_addr); + + // a.b + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("127.1", &a)); + ASSERT_EQ(htonl(0x7f000001), a.s_addr); + + // a + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("0x7f000001", &a)); + ASSERT_EQ(htonl(0x7f000001), a.s_addr); + + // Hex (0x) and mixed-case hex digits. + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("0xFf.0.0.1", &a)); + ASSERT_EQ(htonl(0xff000001), a.s_addr); + + // Hex (0X) and mixed-case hex digits. + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("0XfF.0.0.1", &a)); + ASSERT_EQ(htonl(0xff000001), a.s_addr); + + // Octal. + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("0177.0.0.1", &a)); + ASSERT_EQ(htonl(0x7f000001), a.s_addr); + + a.s_addr = 0; + ASSERT_EQ(1, inet_aton("036", &a)); + ASSERT_EQ(htonl(036U), a.s_addr); +} + +TEST(LlvmLibcInetAton, InvalidTest) { + ASSERT_EQ(0, inet_aton("", nullptr)); // Empty. + ASSERT_EQ(0, inet_aton("x", nullptr)); // Leading junk. + ASSERT_EQ(0, inet_aton("127.0.0.1x", nullptr)); // Trailing junk. + ASSERT_EQ(0, inet_aton("09.0.0.1", nullptr)); // Invalid octal. + ASSERT_EQ(0, inet_aton("0xg.0.0.1", nullptr)); // Invalid hex. + ASSERT_EQ(0, inet_aton("1.2.3.4.5", nullptr)); // Too many dots. + ASSERT_EQ(0, inet_aton("1.2.3.4.", nullptr)); // Trailing dot. + + // Out of range a.b.c.d form. + ASSERT_EQ(0, inet_aton("999.0.0.1", nullptr)); + ASSERT_EQ(0, inet_aton("0.999.0.1", nullptr)); + ASSERT_EQ(0, inet_aton("0.0.999.1", nullptr)); + ASSERT_EQ(0, inet_aton("0.0.0.999", nullptr)); + + // Out of range a.b.c form. + ASSERT_EQ(0, inet_aton("256.0.0", nullptr)); + ASSERT_EQ(0, inet_aton("0.256.0", nullptr)); + ASSERT_EQ(0, inet_aton("0.0.0x10000", nullptr)); + + // Out of range a.b form. + ASSERT_EQ(0, inet_aton("256.0", nullptr)); + ASSERT_EQ(0, inet_aton("0.0x1000000", nullptr)); + + // Out of range a form. + ASSERT_EQ(0, inet_aton("0x100000000", nullptr)); + + // 64-bit overflow. + ASSERT_EQ(0, inet_aton("0x10000000000000000", nullptr)); + + // Out of range octal. + ASSERT_EQ(0, inet_aton("0400.0.0.1", nullptr)); +} + +} // namespace LIBC_NAMESPACE_DECL