[libc] implement `inet_aton` #162651

c8ef · 2025-10-09T13:22:05Z

This patch adds the implementation for inet_aton function. Since this function is not explicitly included in POSIX, I have marked it with llvm_libc_ext. It is widely available and commonly used, and can also be used to implement inet_addr, which is included in POSIX.

llvmbot · 2025-10-09T13:22:42Z

@llvm/pr-subscribers-libc

@llvm/pr-subscribers-backend-risc-v

Author: Connector Switch (c8ef)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/162651.diff

9 Files Affected:

(modified) libc/config/linux/aarch64/entrypoints.txt (+1)
(modified) libc/config/linux/riscv/entrypoints.txt (+1)
(modified) libc/config/linux/x86_64/entrypoints.txt (+1)
(modified) libc/include/arpa/inet.yaml (+9-1)
(modified) libc/src/arpa/inet/CMakeLists.txt (+14)
(added) libc/src/arpa/inet/inet_aton.cpp (+63)
(added) libc/src/arpa/inet/inet_aton.h (+21)
(modified) libc/test/src/arpa/inet/CMakeLists.txt (+13)
(added) libc/test/src/arpa/inet/inet_aton_test.cpp (+93)

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 8bf6c44b1d669..714120a79e39a 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -945,6 +945,7 @@ if(LLVM_LIBC_FULL_BUILD)
     # arpa/inet.h entrypoints
     libc.src.arpa.inet.htonl
     libc.src.arpa.inet.htons
+    libc.src.arpa.inet.inet_aton
     libc.src.arpa.inet.ntohl
     libc.src.arpa.inet.ntohs
 
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index dffccbab9a8e9..f6bbb346d10e5 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -1077,6 +1077,7 @@ if(LLVM_LIBC_FULL_BUILD)
     # arpa/inet.h entrypoints
     libc.src.arpa.inet.htonl
     libc.src.arpa.inet.htons
+    libc.src.arpa.inet.inet_aton
     libc.src.arpa.inet.ntohl
     libc.src.arpa.inet.ntohs
 
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index b4ab073ec912f..aa455e80ec5d3 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1113,6 +1113,7 @@ if(LLVM_LIBC_FULL_BUILD)
     # arpa/inet.h entrypoints
     libc.src.arpa.inet.htonl
     libc.src.arpa.inet.htons
+    libc.src.arpa.inet.inet_aton
     libc.src.arpa.inet.ntohl
     libc.src.arpa.inet.ntohs
 
diff --git a/libc/include/arpa/inet.yaml b/libc/include/arpa/inet.yaml
index 10cd56d6ce786..edc8b4e52763f 100644
--- a/libc/include/arpa/inet.yaml
+++ b/libc/include/arpa/inet.yaml
@@ -1,7 +1,8 @@
 header: arpa/inet.h
 header_template: inet.h.def
 macros: []
-types: []
+types:
+  - type_name: in_addr
 enums: []
 objects: []
 functions:
@@ -17,6 +18,13 @@ functions:
     return_type: uint16_t
     arguments:
       - type: uint16_t
+  - name: inet_aton
+    standards:
+      - POSIX
+    return_type: int
+    arguments:
+      - type: const char *
+      - type: in_addr *
   - name: ntohl
     standards:
       - POSIX
diff --git a/libc/src/arpa/inet/CMakeLists.txt b/libc/src/arpa/inet/CMakeLists.txt
index 1f39a076fde91..fc17c395a6a24 100644
--- a/libc/src/arpa/inet/CMakeLists.txt
+++ b/libc/src/arpa/inet/CMakeLists.txt
@@ -22,6 +22,20 @@ add_entrypoint_object(
     libc.src.__support.common
 )
 
+add_entrypoint_object(
+  inet_aton
+  SRCS
+    inet_aton.cpp
+  HDRS
+    inet_aton.h
+  DEPENDS
+    libc.include.arpa_inet
+    libc.include.llvm-libc-types.in_addr
+    libc.src.__support.common
+    libc.src.__support.str_to_integer
+    libc.src.arpa.inet.htonl
+)
+
 add_entrypoint_object(
   ntohl
   SRCS
diff --git a/libc/src/arpa/inet/inet_aton.cpp b/libc/src/arpa/inet/inet_aton.cpp
new file mode 100644
index 0000000000000..183b147cc0c10
--- /dev/null
+++ b/libc/src/arpa/inet/inet_aton.cpp
@@ -0,0 +1,63 @@
+//===-- Implementation of inet_aton function ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/arpa/inet/inet_aton.h"
+#include "src/__support/common.h"
+#include "src/__support/str_to_integer.h"
+#include "src/arpa/inet/htonl.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, inet_aton, (const char *cp, in_addr *inp)) {
+  unsigned long parts[4] = {0};
+  int dot_num = 0;
+
+  for (; dot_num < 4; ++dot_num) {
+    auto result = internal::strtointeger<unsigned long>(cp, 0);
+    parts[dot_num] = result;
+
+    if (result.has_error() || result.parsed_len == 0)
+      return 0;
+    char next_char = *(cp + result.parsed_len);
+    if (next_char != '.' && next_char != '\0')
+      return 0;
+    else if (next_char == '\0')
+      break;
+    else
+      cp += (result.parsed_len + 1);
+  }
+
+  unsigned long result = 0;
+  if (dot_num == 0) {
+    if (parts[0] > 0xffffffff)
+      return 0;
+    result = parts[0];
+  } else if (dot_num == 1) {
+    if (parts[0] > 0xff || parts[1] > 0xffffff)
+      return 0;
+    result = (parts[0] << 24) | parts[1];
+  } else if (dot_num == 2) {
+    if (parts[0] > 0xff || parts[1] > 0xff || parts[2] > 0xffff)
+      return 0;
+    result = (parts[0] << 24) | (parts[1] << 16) | parts[2];
+  } else if (dot_num == 3) {
+    if (parts[0] > 0xff || parts[1] > 0xff || parts[2] > 0xff ||
+        parts[3] > 0xff)
+      return 0;
+    result = (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3];
+  } else {
+    return 0;
+  }
+
+  if (inp)
+    inp->s_addr = LIBC_NAMESPACE::htonl(static_cast<uint32_t>(result));
+
+  return 1;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/arpa/inet/inet_aton.h b/libc/src/arpa/inet/inet_aton.h
new file mode 100644
index 0000000000000..ea387d1f6b2f6
--- /dev/null
+++ b/libc/src/arpa/inet/inet_aton.h
@@ -0,0 +1,21 @@
+//===-- Implementation header of inet_aton ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H
+#define LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H
+
+#include "include/llvm-libc-types/in_addr.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int inet_aton(const char *cp, in_addr *inp);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H
diff --git a/libc/test/src/arpa/inet/CMakeLists.txt b/libc/test/src/arpa/inet/CMakeLists.txt
index 6e78e3a50e612..42fa49fe07267 100644
--- a/libc/test/src/arpa/inet/CMakeLists.txt
+++ b/libc/test/src/arpa/inet/CMakeLists.txt
@@ -26,6 +26,19 @@ add_libc_unittest(
     libc.src.arpa.inet.ntohs
 )
 
+add_libc_unittest(
+  inet_aton
+  SUITE
+    libc_arpa_inet_unittests
+  SRCS
+    inet_aton_test.cpp
+  CXX_STANDARD
+    20
+  DEPENDS
+    libc.src.arpa.inet.htonl
+    libc.src.arpa.inet.inet_aton
+)
+
 add_libc_unittest(
   ntohl
   SUITE
diff --git a/libc/test/src/arpa/inet/inet_aton_test.cpp b/libc/test/src/arpa/inet/inet_aton_test.cpp
new file mode 100644
index 0000000000000..0b438740b99b0
--- /dev/null
+++ b/libc/test/src/arpa/inet/inet_aton_test.cpp
@@ -0,0 +1,93 @@
+//===-- Unittests for inet_aton -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/arpa/inet/htonl.h"
+#include "src/arpa/inet/inet_aton.h"
+#include "test/UnitTest/Test.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+TEST(LlvmLibcInetAton, ValidTest) {
+  in_addr a;
+
+  // a.b.c.d
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("127.1.2.4", &a));
+  ASSERT_EQ(htonl(0x7f010204), a.s_addr);
+
+  // a.b.c
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("127.1.4", &a));
+  ASSERT_EQ(htonl(0x7f010004), a.s_addr);
+
+  // a.b
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("127.1", &a));
+  ASSERT_EQ(htonl(0x7f000001), a.s_addr);
+
+  // a
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("0x7f000001", &a));
+  ASSERT_EQ(htonl(0x7f000001), a.s_addr);
+
+  // Hex (0x) and mixed-case hex digits.
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("0xFf.0.0.1", &a));
+  ASSERT_EQ(htonl(0xff000001), a.s_addr);
+
+  // Hex (0X) and mixed-case hex digits.
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("0XfF.0.0.1", &a));
+  ASSERT_EQ(htonl(0xff000001), a.s_addr);
+
+  // Octal.
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("0177.0.0.1", &a));
+  ASSERT_EQ(htonl(0x7f000001), a.s_addr);
+
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("036", &a));
+  ASSERT_EQ(htonl(036U), a.s_addr);
+}
+
+TEST(LlvmLibcInetAton, InvalidTest) {
+  ASSERT_EQ(0, inet_aton("", nullptr));           // Empty.
+  ASSERT_EQ(0, inet_aton("x", nullptr));          // Leading junk.
+  ASSERT_EQ(0, inet_aton("127.0.0.1x", nullptr)); // Trailing junk.
+  ASSERT_EQ(0, inet_aton("09.0.0.1", nullptr));   // Invalid octal.
+  ASSERT_EQ(0, inet_aton("0xg.0.0.1", nullptr));  // Invalid hex.
+
+  ASSERT_EQ(0, inet_aton("1.2.3.4.5", nullptr)); // Too many dots.
+  ASSERT_EQ(0, inet_aton("1.2.3.4.", nullptr));  // Trailing dot.
+
+  // Out of range a.b.c.d form.
+  ASSERT_EQ(0, inet_aton("999.0.0.1", nullptr));
+  ASSERT_EQ(0, inet_aton("0.999.0.1", nullptr));
+  ASSERT_EQ(0, inet_aton("0.0.999.1", nullptr));
+  ASSERT_EQ(0, inet_aton("0.0.0.999", nullptr));
+
+  // Out of range a.b.c form.
+  ASSERT_EQ(0, inet_aton("256.0.0", nullptr));
+  ASSERT_EQ(0, inet_aton("0.256.0", nullptr));
+  ASSERT_EQ(0, inet_aton("0.0.0x10000", nullptr));
+
+  // Out of range a.b form.
+  ASSERT_EQ(0, inet_aton("256.0", nullptr));
+  ASSERT_EQ(0, inet_aton("0.0x1000000", nullptr));
+
+  // Out of range a form.
+  ASSERT_EQ(0, inet_aton("0x100000000", nullptr));
+
+  // 64-bit overflow.
+  ASSERT_EQ(0, inet_aton("0x10000000000000000", nullptr));
+
+  // Out of range octal.
+  ASSERT_EQ(0, inet_aton("0400.0.0.1", nullptr));
+}
+
+} // namespace LIBC_NAMESPACE_DECL

libc/src/arpa/inet/inet_aton.cpp

libc/test/src/arpa/inet/CMakeLists.txt

libc/src/arpa/inet/inet_aton.cpp

michaelrj-google

Overall looks good, just a few comments

libc/src/arpa/inet/inet_aton.cpp

michaelrj-google · 2025-10-09T17:47:35Z

libc/src/arpa/inet/inet_aton.cpp

+  int dot_num = 0;
+
+  for (; dot_num < 4; ++dot_num) {
+    auto result = internal::strtointeger<unsigned long>(cp, 0);


this is a good use of this function, but you should add note that this may allow binary integers (with a leading 0b) which isn't in the standard in the undefined behavior doc: https://github.com/llvm/llvm-project/blob/main/libc/docs/dev/undefined_behavior.rst

I haven't tested it yet, but a quick look at the code suggests strtointeger might not support binary integers when base = 0. The test file, libc/test/src/__support/str_to_integer_test.cpp, lacks binary integer coverage either.

llvm-project/libc/src/__support/str_to_integer.h

Lines 57 to 71 in 88ba06d

LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {

// A hexadecimal number is defined as "the prefix 0x or 0X followed by a

// sequence of the decimal digits and the letters a (or A) through f (or F)

// with values 10 through 15 respectively." (C standard 6.4.4.1)

if (is_hex_start(src, src_len))

return 16;

// An octal number is defined as "the prefix 0 optionally followed by a

// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any

// number that starts with 0, including just 0, is an octal number.

if (src_len > 0 && src[0] == '0')

return 8;

// A decimal number is defined as beginning "with a nonzero digit and

// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)

return 10;

}

Test case for binary integers added in c86be34.

ah, in that case I forgot we haven't added that yet. I'd say don't worry about testing that it doesn't work because it's not a problem if it does and we'll have to change it when binary support is added to strtointeger.

if it does and we'll have to change it when binary support is added to strtointeger.

Yes, so I think that's why we want to keep the binary integers test cases. That way, once binary support is added, we can immediately know if anything needs to be handled on the inet_aton side.

What would we need to add beyond a comment? Calling this function with an 0b prefix is undefined behavior so the result doesn't need to be consistent.

Binary integers test removed, undefined_behavior.rst added(Not quite sure about the wording).

michaelrj-google · 2025-10-10T17:45:56Z

libc/src/arpa/inet/inet_aton.cpp

+  int dot_num = 0;
+
+  for (; dot_num < 4; ++dot_num) {
+    auto result = internal::strtointeger<unsigned long>(cp, 0);


ah, in that case I forgot we haven't added that yet. I'd say don't worry about testing that it doesn't work because it's not a problem if it does and we'll have to change it when binary support is added to strtointeger.

michaelrj-google · 2025-10-10T17:46:58Z

libc/src/arpa/inet/inet_aton.cpp

+        i == dot_num ? (0xffffffffUL >> (8 * dot_num)) : 0xffUL;
+    if (parts[i] > max_part)
+      return 0;
+    int shift = i == dot_num ? 0 : 8 * (3 - i);


to avoid magic constants, could you explain what the numbers here are representing? Specifically, why is this 3 - i?

case 0: if (parts[0] > 0xffffffff) return 0; result = parts[0]; break; case 1: if (parts[0] > 0xff || parts[1] > 0xffffff) return 0; result = (parts[0] << 24) | parts[1]; break; case 2: if (parts[0] > 0xff || parts[1] > 0xff || parts[2] > 0xffff) return 0; result = (parts[0] << 24) | (parts[1] << 16) | parts[2]; break; case 3: if (parts[0] > 0xff || parts[1] > 0xff || parts[2] > 0xff || parts[3] > 0xff) return 0; result = (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]; break;

It's basically a direct translation of the switch code above. The idea is that the last part doesn't need to be shifted, while the left part needs to be shifted to the higher bits. Perhaps we should use a constant like MAX_DOT_NUM=3 throughout this function. I'll make this change tonight.

Magic number removed in e2b5b52.

michaelrj-google · 2025-10-10T17:47:16Z

libc/src/arpa/inet/inet_aton.cpp

+  if (dot_num > 3)
+    return 0;
+
+  unsigned long result = 0;


for readability it would be good to have a comment explaining what the format you're parsing is.

Would you prefer the simple term or the comprehensive term?
For the simple term, we could just use the comment a[.b[.c[.d]]]. For the comprehensive term, we can quote something from the standard, like below:

a.b.c.d Each of the four numeric parts specifies a byte of the address; the bytes are assigned in left-to-right order to produce the binary address. a.b.c Parts a and b specify the first two bytes of the binary address. Part c is interpreted as a 16-bit value that defines the rightmost two bytes of the binary address. This notation is suitable for specifying (outmoded) Class B network addresses. a.b Part a specifies the first byte of the binary address. Part b is interpreted as a 24-bit value that defines the rightmost three bytes of the binary address. This notation is suitable for specifying (outmoded) Class A network addresses. a The value a is interpreted as a 32-bit value that is stored directly into the binary address without any byte rearrangement.

I'd go with something shorter than a quote from the standard, but a bit more explanation than just a[.b[.c[.d]]]

Comment about the format added.

Address #162651 (comment).

…163983) Address llvm/llvm-project#162651 (comment).

[libc] implement inet_aton

9774e2d

llvmbot added backend:RISC-V libc labels Oct 9, 2025

c8ef requested review from jhuber6, lntue and michaelrj-google October 9, 2025 13:33

[libc] implement inet_aton

67fbf2e

lntue reviewed Oct 9, 2025

View reviewed changes

libc/src/arpa/inet/inet_aton.cpp Outdated Show resolved Hide resolved

lntue reviewed Oct 9, 2025

View reviewed changes

libc/test/src/arpa/inet/CMakeLists.txt Outdated Show resolved Hide resolved

lntue reviewed Oct 9, 2025

View reviewed changes

libc/src/arpa/inet/inet_aton.cpp Outdated Show resolved Hide resolved

address review comments

247a96e

michaelrj-google reviewed Oct 9, 2025

View reviewed changes

c8ef added 3 commits October 10, 2025 22:34

test rejected binary integers

c86be34

rewrite as a loop

a8b5f06

add missing long

ec35143

c8ef requested review from lntue and michaelrj-google October 10, 2025 15:22

michaelrj-google reviewed Oct 10, 2025

View reviewed changes

use IPV4_MAX_DOT_NUM

e2b5b52

c8ef requested a review from michaelrj-google October 13, 2025 02:14

address review comments

87d19e8

c8ef mentioned this pull request Oct 17, 2025

[libc][NFC] Remove cpp20 requirement for arpa/inet test #163983

Merged

c8ef added a commit that referenced this pull request Oct 17, 2025

[libc][NFC] Remove cpp20 requirement for arpa/inet test (#163983)

404099d

Address #162651 (comment).

llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Oct 17, 2025

Automerge: [libc][NFC] Remove cpp20 requirement for arpa/inet test (#…

73ef900

…163983) Address llvm/llvm-project#162651 (comment).

	LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
	// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
	// sequence of the decimal digits and the letters a (or A) through f (or F)
	// with values 10 through 15 respectively." (C standard 6.4.4.1)
	if (is_hex_start(src, src_len))
	return 16;
	// An octal number is defined as "the prefix 0 optionally followed by a
	// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
	// number that starts with 0, including just 0, is an octal number.
	if (src_len > 0 && src[0] == '0')
	return 8;
	// A decimal number is defined as beginning "with a nonzero digit and
	// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
	return 10;
	}

[libc] implement inet_aton #162651

Are you sure you want to change the base?

[libc] implement inet_aton #162651

Uh oh!

Conversation

c8ef commented Oct 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Oct 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

michaelrj-google left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

c8ef Oct 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[libc] implement `inet_aton` #162651

[libc] implement `inet_aton` #162651

c8ef commented Oct 9, 2025 •

edited

Loading

llvmbot commented Oct 9, 2025 •

edited

Loading

c8ef Oct 10, 2025 •

edited

Loading