Skip to content

Conversation

c8ef
Copy link
Contributor

@c8ef c8ef commented Oct 9, 2025

This patch adds the implementation for inet_aton function. Since this function is not explicitly included in POSIX, I have marked it with llvm_libc_ext. It is widely available and commonly used, and can also be used to implement inet_addr, which is included in POSIX.

@llvmbot
Copy link
Member

llvmbot commented Oct 9, 2025

@llvm/pr-subscribers-libc

@llvm/pr-subscribers-backend-risc-v

Author: Connector Switch (c8ef)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/162651.diff

9 Files Affected:

  • (modified) libc/config/linux/aarch64/entrypoints.txt (+1)
  • (modified) libc/config/linux/riscv/entrypoints.txt (+1)
  • (modified) libc/config/linux/x86_64/entrypoints.txt (+1)
  • (modified) libc/include/arpa/inet.yaml (+9-1)
  • (modified) libc/src/arpa/inet/CMakeLists.txt (+14)
  • (added) libc/src/arpa/inet/inet_aton.cpp (+63)
  • (added) libc/src/arpa/inet/inet_aton.h (+21)
  • (modified) libc/test/src/arpa/inet/CMakeLists.txt (+13)
  • (added) libc/test/src/arpa/inet/inet_aton_test.cpp (+93)
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 8bf6c44b1d669..714120a79e39a 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -945,6 +945,7 @@ if(LLVM_LIBC_FULL_BUILD)
     # arpa/inet.h entrypoints
     libc.src.arpa.inet.htonl
     libc.src.arpa.inet.htons
+    libc.src.arpa.inet.inet_aton
     libc.src.arpa.inet.ntohl
     libc.src.arpa.inet.ntohs
 
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index dffccbab9a8e9..f6bbb346d10e5 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -1077,6 +1077,7 @@ if(LLVM_LIBC_FULL_BUILD)
     # arpa/inet.h entrypoints
     libc.src.arpa.inet.htonl
     libc.src.arpa.inet.htons
+    libc.src.arpa.inet.inet_aton
     libc.src.arpa.inet.ntohl
     libc.src.arpa.inet.ntohs
 
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index b4ab073ec912f..aa455e80ec5d3 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1113,6 +1113,7 @@ if(LLVM_LIBC_FULL_BUILD)
     # arpa/inet.h entrypoints
     libc.src.arpa.inet.htonl
     libc.src.arpa.inet.htons
+    libc.src.arpa.inet.inet_aton
     libc.src.arpa.inet.ntohl
     libc.src.arpa.inet.ntohs
 
diff --git a/libc/include/arpa/inet.yaml b/libc/include/arpa/inet.yaml
index 10cd56d6ce786..edc8b4e52763f 100644
--- a/libc/include/arpa/inet.yaml
+++ b/libc/include/arpa/inet.yaml
@@ -1,7 +1,8 @@
 header: arpa/inet.h
 header_template: inet.h.def
 macros: []
-types: []
+types:
+  - type_name: in_addr
 enums: []
 objects: []
 functions:
@@ -17,6 +18,13 @@ functions:
     return_type: uint16_t
     arguments:
       - type: uint16_t
+  - name: inet_aton
+    standards:
+      - POSIX
+    return_type: int
+    arguments:
+      - type: const char *
+      - type: in_addr *
   - name: ntohl
     standards:
       - POSIX
diff --git a/libc/src/arpa/inet/CMakeLists.txt b/libc/src/arpa/inet/CMakeLists.txt
index 1f39a076fde91..fc17c395a6a24 100644
--- a/libc/src/arpa/inet/CMakeLists.txt
+++ b/libc/src/arpa/inet/CMakeLists.txt
@@ -22,6 +22,20 @@ add_entrypoint_object(
     libc.src.__support.common
 )
 
+add_entrypoint_object(
+  inet_aton
+  SRCS
+    inet_aton.cpp
+  HDRS
+    inet_aton.h
+  DEPENDS
+    libc.include.arpa_inet
+    libc.include.llvm-libc-types.in_addr
+    libc.src.__support.common
+    libc.src.__support.str_to_integer
+    libc.src.arpa.inet.htonl
+)
+
 add_entrypoint_object(
   ntohl
   SRCS
diff --git a/libc/src/arpa/inet/inet_aton.cpp b/libc/src/arpa/inet/inet_aton.cpp
new file mode 100644
index 0000000000000..183b147cc0c10
--- /dev/null
+++ b/libc/src/arpa/inet/inet_aton.cpp
@@ -0,0 +1,63 @@
+//===-- Implementation of inet_aton function ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/arpa/inet/inet_aton.h"
+#include "src/__support/common.h"
+#include "src/__support/str_to_integer.h"
+#include "src/arpa/inet/htonl.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, inet_aton, (const char *cp, in_addr *inp)) {
+  unsigned long parts[4] = {0};
+  int dot_num = 0;
+
+  for (; dot_num < 4; ++dot_num) {
+    auto result = internal::strtointeger<unsigned long>(cp, 0);
+    parts[dot_num] = result;
+
+    if (result.has_error() || result.parsed_len == 0)
+      return 0;
+    char next_char = *(cp + result.parsed_len);
+    if (next_char != '.' && next_char != '\0')
+      return 0;
+    else if (next_char == '\0')
+      break;
+    else
+      cp += (result.parsed_len + 1);
+  }
+
+  unsigned long result = 0;
+  if (dot_num == 0) {
+    if (parts[0] > 0xffffffff)
+      return 0;
+    result = parts[0];
+  } else if (dot_num == 1) {
+    if (parts[0] > 0xff || parts[1] > 0xffffff)
+      return 0;
+    result = (parts[0] << 24) | parts[1];
+  } else if (dot_num == 2) {
+    if (parts[0] > 0xff || parts[1] > 0xff || parts[2] > 0xffff)
+      return 0;
+    result = (parts[0] << 24) | (parts[1] << 16) | parts[2];
+  } else if (dot_num == 3) {
+    if (parts[0] > 0xff || parts[1] > 0xff || parts[2] > 0xff ||
+        parts[3] > 0xff)
+      return 0;
+    result = (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3];
+  } else {
+    return 0;
+  }
+
+  if (inp)
+    inp->s_addr = LIBC_NAMESPACE::htonl(static_cast<uint32_t>(result));
+
+  return 1;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/arpa/inet/inet_aton.h b/libc/src/arpa/inet/inet_aton.h
new file mode 100644
index 0000000000000..ea387d1f6b2f6
--- /dev/null
+++ b/libc/src/arpa/inet/inet_aton.h
@@ -0,0 +1,21 @@
+//===-- Implementation header of inet_aton ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H
+#define LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H
+
+#include "include/llvm-libc-types/in_addr.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int inet_aton(const char *cp, in_addr *inp);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_ARPA_INET_INET_ATON_H
diff --git a/libc/test/src/arpa/inet/CMakeLists.txt b/libc/test/src/arpa/inet/CMakeLists.txt
index 6e78e3a50e612..42fa49fe07267 100644
--- a/libc/test/src/arpa/inet/CMakeLists.txt
+++ b/libc/test/src/arpa/inet/CMakeLists.txt
@@ -26,6 +26,19 @@ add_libc_unittest(
     libc.src.arpa.inet.ntohs
 )
 
+add_libc_unittest(
+  inet_aton
+  SUITE
+    libc_arpa_inet_unittests
+  SRCS
+    inet_aton_test.cpp
+  CXX_STANDARD
+    20
+  DEPENDS
+    libc.src.arpa.inet.htonl
+    libc.src.arpa.inet.inet_aton
+)
+
 add_libc_unittest(
   ntohl
   SUITE
diff --git a/libc/test/src/arpa/inet/inet_aton_test.cpp b/libc/test/src/arpa/inet/inet_aton_test.cpp
new file mode 100644
index 0000000000000..0b438740b99b0
--- /dev/null
+++ b/libc/test/src/arpa/inet/inet_aton_test.cpp
@@ -0,0 +1,93 @@
+//===-- Unittests for inet_aton -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/arpa/inet/htonl.h"
+#include "src/arpa/inet/inet_aton.h"
+#include "test/UnitTest/Test.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+TEST(LlvmLibcInetAton, ValidTest) {
+  in_addr a;
+
+  // a.b.c.d
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("127.1.2.4", &a));
+  ASSERT_EQ(htonl(0x7f010204), a.s_addr);
+
+  // a.b.c
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("127.1.4", &a));
+  ASSERT_EQ(htonl(0x7f010004), a.s_addr);
+
+  // a.b
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("127.1", &a));
+  ASSERT_EQ(htonl(0x7f000001), a.s_addr);
+
+  // a
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("0x7f000001", &a));
+  ASSERT_EQ(htonl(0x7f000001), a.s_addr);
+
+  // Hex (0x) and mixed-case hex digits.
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("0xFf.0.0.1", &a));
+  ASSERT_EQ(htonl(0xff000001), a.s_addr);
+
+  // Hex (0X) and mixed-case hex digits.
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("0XfF.0.0.1", &a));
+  ASSERT_EQ(htonl(0xff000001), a.s_addr);
+
+  // Octal.
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("0177.0.0.1", &a));
+  ASSERT_EQ(htonl(0x7f000001), a.s_addr);
+
+  a.s_addr = 0;
+  ASSERT_EQ(1, inet_aton("036", &a));
+  ASSERT_EQ(htonl(036U), a.s_addr);
+}
+
+TEST(LlvmLibcInetAton, InvalidTest) {
+  ASSERT_EQ(0, inet_aton("", nullptr));           // Empty.
+  ASSERT_EQ(0, inet_aton("x", nullptr));          // Leading junk.
+  ASSERT_EQ(0, inet_aton("127.0.0.1x", nullptr)); // Trailing junk.
+  ASSERT_EQ(0, inet_aton("09.0.0.1", nullptr));   // Invalid octal.
+  ASSERT_EQ(0, inet_aton("0xg.0.0.1", nullptr));  // Invalid hex.
+
+  ASSERT_EQ(0, inet_aton("1.2.3.4.5", nullptr)); // Too many dots.
+  ASSERT_EQ(0, inet_aton("1.2.3.4.", nullptr));  // Trailing dot.
+
+  // Out of range a.b.c.d form.
+  ASSERT_EQ(0, inet_aton("999.0.0.1", nullptr));
+  ASSERT_EQ(0, inet_aton("0.999.0.1", nullptr));
+  ASSERT_EQ(0, inet_aton("0.0.999.1", nullptr));
+  ASSERT_EQ(0, inet_aton("0.0.0.999", nullptr));
+
+  // Out of range a.b.c form.
+  ASSERT_EQ(0, inet_aton("256.0.0", nullptr));
+  ASSERT_EQ(0, inet_aton("0.256.0", nullptr));
+  ASSERT_EQ(0, inet_aton("0.0.0x10000", nullptr));
+
+  // Out of range a.b form.
+  ASSERT_EQ(0, inet_aton("256.0", nullptr));
+  ASSERT_EQ(0, inet_aton("0.0x1000000", nullptr));
+
+  // Out of range a form.
+  ASSERT_EQ(0, inet_aton("0x100000000", nullptr));
+
+  // 64-bit overflow.
+  ASSERT_EQ(0, inet_aton("0x10000000000000000", nullptr));
+
+  // Out of range octal.
+  ASSERT_EQ(0, inet_aton("0400.0.0.1", nullptr));
+}
+
+} // namespace LIBC_NAMESPACE_DECL

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Overall looks good, just a few comments

int dot_num = 0;

for (; dot_num < 4; ++dot_num) {
auto result = internal::strtointeger<unsigned long>(cp, 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a good use of this function, but you should add note that this may allow binary integers (with a leading 0b) which isn't in the standard in the undefined behavior doc: https://github.com/llvm/llvm-project/blob/main/libc/docs/dev/undefined_behavior.rst

Copy link
Contributor Author

@c8ef c8ef Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't tested it yet, but a quick look at the code suggests strtointeger might not support binary integers when base = 0. The test file, libc/test/src/__support/str_to_integer_test.cpp, lacks binary integer coverage either.

LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
// sequence of the decimal digits and the letters a (or A) through f (or F)
// with values 10 through 15 respectively." (C standard 6.4.4.1)
if (is_hex_start(src, src_len))
return 16;
// An octal number is defined as "the prefix 0 optionally followed by a
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
// number that starts with 0, including just 0, is an octal number.
if (src_len > 0 && src[0] == '0')
return 8;
// A decimal number is defined as beginning "with a nonzero digit and
// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
return 10;
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test case for binary integers added in c86be34.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, in that case I forgot we haven't added that yet. I'd say don't worry about testing that it doesn't work because it's not a problem if it does and we'll have to change it when binary support is added to strtointeger.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if it does and we'll have to change it when binary support is added to strtointeger.

Yes, so I think that's why we want to keep the binary integers test cases. That way, once binary support is added, we can immediately know if anything needs to be handled on the inet_aton side.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would we need to add beyond a comment? Calling this function with an 0b prefix is undefined behavior so the result doesn't need to be consistent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Binary integers test removed, undefined_behavior.rst added(Not quite sure about the wording).

int dot_num = 0;

for (; dot_num < 4; ++dot_num) {
auto result = internal::strtointeger<unsigned long>(cp, 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, in that case I forgot we haven't added that yet. I'd say don't worry about testing that it doesn't work because it's not a problem if it does and we'll have to change it when binary support is added to strtointeger.

i == dot_num ? (0xffffffffUL >> (8 * dot_num)) : 0xffUL;
if (parts[i] > max_part)
return 0;
int shift = i == dot_num ? 0 : 8 * (3 - i);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to avoid magic constants, could you explain what the numbers here are representing? Specifically, why is this 3 - i?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  case 0:
    if (parts[0] > 0xffffffff)
      return 0;
    result = parts[0];
    break;
  case 1:
    if (parts[0] > 0xff || parts[1] > 0xffffff)
      return 0;
    result = (parts[0] << 24) | parts[1];
    break;
  case 2:
    if (parts[0] > 0xff || parts[1] > 0xff || parts[2] > 0xffff)
      return 0;
    result = (parts[0] << 24) | (parts[1] << 16) | parts[2];
    break;
  case 3:
    if (parts[0] > 0xff || parts[1] > 0xff || parts[2] > 0xff ||
        parts[3] > 0xff)
      return 0;
    result = (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3];
    break;

It's basically a direct translation of the switch code above. The idea is that the last part doesn't need to be shifted, while the left part needs to be shifted to the higher bits. Perhaps we should use a constant like MAX_DOT_NUM=3 throughout this function. I'll make this change tonight.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Magic number removed in e2b5b52.

if (dot_num > 3)
return 0;

unsigned long result = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for readability it would be good to have a comment explaining what the format you're parsing is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you prefer the simple term or the comprehensive term?
For the simple term, we could just use the comment a[.b[.c[.d]]]. For the comprehensive term, we can quote something from the standard, like below:

       a.b.c.d
              Each of the four numeric parts specifies a byte of the
              address; the bytes are assigned in left-to-right order to
              produce the binary address.

       a.b.c  Parts a and b specify the first two bytes of the binary
              address.  Part c is interpreted as a 16-bit value that
              defines the rightmost two bytes of the binary address.
              This notation is suitable for specifying (outmoded) Class B
              network addresses.

       a.b    Part a specifies the first byte of the binary address.
              Part b is interpreted as a 24-bit value that defines the
              rightmost three bytes of the binary address.  This notation
              is suitable for specifying (outmoded) Class A network
              addresses.

       a      The value a is interpreted as a 32-bit value that is stored
              directly into the binary address without any byte
              rearrangement.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd go with something shorter than a quote from the standard, but a bit more explanation than just a[.b[.c[.d]]]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment about the format added.

@c8ef c8ef requested a review from michaelrj-google October 13, 2025 02:14
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants