Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,7 @@ if(LLVM_LIBC_FULL_BUILD)
# arpa/inet.h entrypoints
libc.src.arpa.inet.htonl
libc.src.arpa.inet.htons
libc.src.arpa.inet.inet_aton
libc.src.arpa.inet.ntohl
libc.src.arpa.inet.ntohs

Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1077,6 +1077,7 @@ if(LLVM_LIBC_FULL_BUILD)
# arpa/inet.h entrypoints
libc.src.arpa.inet.htonl
libc.src.arpa.inet.htons
libc.src.arpa.inet.inet_aton
libc.src.arpa.inet.ntohl
libc.src.arpa.inet.ntohs

Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,7 @@ if(LLVM_LIBC_FULL_BUILD)
# arpa/inet.h entrypoints
libc.src.arpa.inet.htonl
libc.src.arpa.inet.htons
libc.src.arpa.inet.inet_aton
libc.src.arpa.inet.ntohl
libc.src.arpa.inet.ntohs

Expand Down
7 changes: 7 additions & 0 deletions libc/docs/dev/undefined_behavior.rst
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,10 @@ parsed as normal. For l64a it's unspecified what happens if the input value is
negative. For LLVM-libc, all inputs to l64a are treated as unsigned 32 bit ints.
Additionally, the return of l64a is in a thread-local buffer that's overwritten
on each call.

`inet_aton` and Non-Standard Binary Integers
--------------------------------------------
The current implementation of the `inet_aton` function utilizes
`internal::strtointeger` to parse IPv4 numbers-and-dots notations. This
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: replace internal::strtointeger with "the same code as strtol"

approach may permit the use of binary integers (prefixed with 0b), which is not
supported by the standard.
10 changes: 9 additions & 1 deletion libc/include/arpa/inet.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
header: arpa/inet.h
header_template: inet.h.def
macros: []
types: []
types:
- type_name: in_addr
enums: []
objects: []
functions:
Expand All @@ -17,6 +18,13 @@ functions:
return_type: uint16_t
arguments:
- type: uint16_t
- name: inet_aton
standards:
- llvm_libc_ext
return_type: int
arguments:
- type: const char *
- type: in_addr *
- name: ntohl
standards:
- POSIX
Expand Down
13 changes: 13 additions & 0 deletions libc/src/arpa/inet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,19 @@ add_entrypoint_object(
libc.src.__support.common
)

add_entrypoint_object(
inet_aton
SRCS
inet_aton.cpp
HDRS
inet_aton.h
DEPENDS
libc.include.arpa_inet
libc.include.llvm-libc-types.in_addr
libc.src.__support.common
libc.src.__support.str_to_integer
)

add_entrypoint_object(
ntohl
SRCS
Expand Down
57 changes: 57 additions & 0 deletions libc/src/arpa/inet/inet_aton.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//===-- Implementation of inet_aton function ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/arpa/inet/inet_aton.h"
#include "src/__support/common.h"
#include "src/__support/endian_internal.h"
#include "src/__support/str_to_integer.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(int, inet_aton, (const char *cp, in_addr *inp)) {
constexpr int IPV4_MAX_DOT_NUM = 3;
unsigned long parts[IPV4_MAX_DOT_NUM + 1] = {0};
int dot_num = 0;

for (; dot_num <= IPV4_MAX_DOT_NUM; ++dot_num) {
auto result = internal::strtointeger<unsigned long>(cp, 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a good use of this function, but you should add note that this may allow binary integers (with a leading 0b) which isn't in the standard in the undefined behavior doc: https://github.com/llvm/llvm-project/blob/main/libc/docs/dev/undefined_behavior.rst

Copy link
Contributor Author

@c8ef c8ef Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't tested it yet, but a quick look at the code suggests strtointeger might not support binary integers when base = 0. The test file, libc/test/src/__support/str_to_integer_test.cpp, lacks binary integer coverage either.

LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
// sequence of the decimal digits and the letters a (or A) through f (or F)
// with values 10 through 15 respectively." (C standard 6.4.4.1)
if (is_hex_start(src, src_len))
return 16;
// An octal number is defined as "the prefix 0 optionally followed by a
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
// number that starts with 0, including just 0, is an octal number.
if (src_len > 0 && src[0] == '0')
return 8;
// A decimal number is defined as beginning "with a nonzero digit and
// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
return 10;
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test case for binary integers added in c86be34.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, in that case I forgot we haven't added that yet. I'd say don't worry about testing that it doesn't work because it's not a problem if it does and we'll have to change it when binary support is added to strtointeger.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if it does and we'll have to change it when binary support is added to strtointeger.

Yes, so I think that's why we want to keep the binary integers test cases. That way, once binary support is added, we can immediately know if anything needs to be handled on the inet_aton side.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would we need to add beyond a comment? Calling this function with an 0b prefix is undefined behavior so the result doesn't need to be consistent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Binary integers test removed, undefined_behavior.rst added(Not quite sure about the wording).

parts[dot_num] = result;

if (result.has_error() || result.parsed_len == 0)
return 0;
char next_char = *(cp + result.parsed_len);
if (next_char != '.' && next_char != '\0')
return 0;
else if (next_char == '\0')
break;
else
cp += (result.parsed_len + 1);
}

if (dot_num > IPV4_MAX_DOT_NUM)
return 0;

// converts the Internet host address cp from the IPv4 numbers-and-dots
// notation into binary form (in network byte order)
unsigned long result = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for readability it would be good to have a comment explaining what the format you're parsing is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you prefer the simple term or the comprehensive term?
For the simple term, we could just use the comment a[.b[.c[.d]]]. For the comprehensive term, we can quote something from the standard, like below:

       a.b.c.d
              Each of the four numeric parts specifies a byte of the
              address; the bytes are assigned in left-to-right order to
              produce the binary address.

       a.b.c  Parts a and b specify the first two bytes of the binary
              address.  Part c is interpreted as a 16-bit value that
              defines the rightmost two bytes of the binary address.
              This notation is suitable for specifying (outmoded) Class B
              network addresses.

       a.b    Part a specifies the first byte of the binary address.
              Part b is interpreted as a 24-bit value that defines the
              rightmost three bytes of the binary address.  This notation
              is suitable for specifying (outmoded) Class A network
              addresses.

       a      The value a is interpreted as a 32-bit value that is stored
              directly into the binary address without any byte
              rearrangement.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd go with something shorter than a quote from the standard, but a bit more explanation than just a[.b[.c[.d]]]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment about the format added.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good, I'd say add a[.b[.c[.d]]] in addition.

for (int i = 0; i <= dot_num; ++i) {
unsigned long max_part =
i == dot_num ? (0xffffffffUL >> (8 * dot_num)) : 0xffUL;
if (parts[i] > max_part)
return 0;
int shift = i == dot_num ? 0 : 8 * (IPV4_MAX_DOT_NUM - i);
result |= parts[i] << shift;
}

if (inp)
inp->s_addr = Endian::to_big_endian(static_cast<uint32_t>(result));

return 1;
}

} // namespace LIBC_NAMESPACE_DECL
21 changes: 21 additions & 0 deletions libc/src/arpa/inet/inet_aton.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Implementation header of inet_aton ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H
#define LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H

#include "include/llvm-libc-types/in_addr.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

int inet_aton(const char *cp, in_addr *inp);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H
11 changes: 11 additions & 0 deletions libc/test/src/arpa/inet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ add_libc_unittest(
libc.src.arpa.inet.ntohs
)

add_libc_unittest(
inet_aton
SUITE
libc_arpa_inet_unittests
SRCS
inet_aton_test.cpp
DEPENDS
libc.src.arpa.inet.htonl
libc.src.arpa.inet.inet_aton
)

add_libc_unittest(
ntohl
SUITE
Expand Down
92 changes: 92 additions & 0 deletions libc/test/src/arpa/inet/inet_aton_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
//===-- Unittests for inet_aton -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/arpa/inet/htonl.h"
#include "src/arpa/inet/inet_aton.h"
#include "test/UnitTest/Test.h"

namespace LIBC_NAMESPACE_DECL {

TEST(LlvmLibcInetAton, ValidTest) {
in_addr a;

// a.b.c.d
a.s_addr = 0;
ASSERT_EQ(1, inet_aton("127.1.2.4", &a));
ASSERT_EQ(htonl(0x7f010204), a.s_addr);

// a.b.c
a.s_addr = 0;
ASSERT_EQ(1, inet_aton("127.1.4", &a));
ASSERT_EQ(htonl(0x7f010004), a.s_addr);

// a.b
a.s_addr = 0;
ASSERT_EQ(1, inet_aton("127.1", &a));
ASSERT_EQ(htonl(0x7f000001), a.s_addr);

// a
a.s_addr = 0;
ASSERT_EQ(1, inet_aton("0x7f000001", &a));
ASSERT_EQ(htonl(0x7f000001), a.s_addr);

// Hex (0x) and mixed-case hex digits.
a.s_addr = 0;
ASSERT_EQ(1, inet_aton("0xFf.0.0.1", &a));
ASSERT_EQ(htonl(0xff000001), a.s_addr);

// Hex (0X) and mixed-case hex digits.
a.s_addr = 0;
ASSERT_EQ(1, inet_aton("0XfF.0.0.1", &a));
ASSERT_EQ(htonl(0xff000001), a.s_addr);

// Octal.
a.s_addr = 0;
ASSERT_EQ(1, inet_aton("0177.0.0.1", &a));
ASSERT_EQ(htonl(0x7f000001), a.s_addr);

a.s_addr = 0;
ASSERT_EQ(1, inet_aton("036", &a));
ASSERT_EQ(htonl(036U), a.s_addr);
}

TEST(LlvmLibcInetAton, InvalidTest) {
ASSERT_EQ(0, inet_aton("", nullptr)); // Empty.
ASSERT_EQ(0, inet_aton("x", nullptr)); // Leading junk.
ASSERT_EQ(0, inet_aton("127.0.0.1x", nullptr)); // Trailing junk.
ASSERT_EQ(0, inet_aton("09.0.0.1", nullptr)); // Invalid octal.
ASSERT_EQ(0, inet_aton("0xg.0.0.1", nullptr)); // Invalid hex.
ASSERT_EQ(0, inet_aton("1.2.3.4.5", nullptr)); // Too many dots.
ASSERT_EQ(0, inet_aton("1.2.3.4.", nullptr)); // Trailing dot.

// Out of range a.b.c.d form.
ASSERT_EQ(0, inet_aton("999.0.0.1", nullptr));
ASSERT_EQ(0, inet_aton("0.999.0.1", nullptr));
ASSERT_EQ(0, inet_aton("0.0.999.1", nullptr));
ASSERT_EQ(0, inet_aton("0.0.0.999", nullptr));

// Out of range a.b.c form.
ASSERT_EQ(0, inet_aton("256.0.0", nullptr));
ASSERT_EQ(0, inet_aton("0.256.0", nullptr));
ASSERT_EQ(0, inet_aton("0.0.0x10000", nullptr));

// Out of range a.b form.
ASSERT_EQ(0, inet_aton("256.0", nullptr));
ASSERT_EQ(0, inet_aton("0.0x1000000", nullptr));

// Out of range a form.
ASSERT_EQ(0, inet_aton("0x100000000", nullptr));

// 64-bit overflow.
ASSERT_EQ(0, inet_aton("0x10000000000000000", nullptr));

// Out of range octal.
ASSERT_EQ(0, inet_aton("0400.0.0.1", nullptr));
}

} // namespace LIBC_NAMESPACE_DECL
Loading