From 1f568e049d668340d2a11583da7aa118d0add44b Mon Sep 17 00:00:00 2001
From: Nobuyoshi Nakada <nobu@ruby-lang.org>
Date: Mon, 27 Nov 2023 20:15:54 +0900
Subject: [PATCH] Place a space between certain character class letters only

---
 lib/rdoc/markup/parser.rb             |  2 +-
 lib/rdoc/markup/to_html.rb            |  4 +++-
 lib/rdoc/text.rb                      |  6 ++++++
 test/rdoc/test_rdoc_markup_to_html.rb | 28 +++++++++++++++++++++++----
 4 files changed, 34 insertions(+), 6 deletions(-)
diff --git a/lib/rdoc/markup/parser.rb b/lib/rdoc/markup/parser.rb
index 0029df7e65..2ad4a65808 100644
--- a/lib/rdoc/markup/parser.rb
+++ b/lib/rdoc/markup/parser.rb
@@ -218,7 +218,7 @@ def build_paragraph margin
 
         break if peek_token.first == :BREAK
 
-        data << ' ' if skip :NEWLINE
+        data << ' ' if skip :NEWLINE and /#{SPACE_SEPARATED_LETTER_CLASS}\z/o.match?(data)
       else
         unget
         break
diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb
index 6c9f5733a2..fb38924a04 100644
--- a/lib/rdoc/markup/to_html.rb
+++ b/lib/rdoc/markup/to_html.rb
@@ -202,7 +202,9 @@ def accept_block_quote block_quote
   def accept_paragraph paragraph
     @res << "\n<p>"
     text = paragraph.text @hard_break
-    text = text.gsub(/\r?\n/, ' ')
+    text = text.gsub(/(#{SPACE_SEPARATED_LETTER_CLASS})?\K\r?\n(?=(?(1)(#{SPACE_SEPARATED_LETTER_CLASS})?))/o) {
+      defined?($2) && ' '
+    }
     @res << to_html(text)
     @res << "</p>\n"
   end
diff --git a/lib/rdoc/text.rb b/lib/rdoc/text.rb
index 0bc4aba428..6f1a2b8d15 100644
--- a/lib/rdoc/text.rb
+++ b/lib/rdoc/text.rb
@@ -309,4 +309,10 @@ def wrap(txt, line_len = 76)
     res.join.strip
   end
 
+  ##
+  # Character class to be separated by a space when concatenating
+  # lines.
+
+  SPACE_SEPARATED_LETTER_CLASS = /[\p{Nd}\p{Lc}\p{Pc}]/
+
 end
diff --git a/test/rdoc/test_rdoc_markup_to_html.rb b/test/rdoc/test_rdoc_markup_to_html.rb
index 6897c8132e..2dd8cf922d 100644
--- a/test/rdoc/test_rdoc_markup_to_html.rb
+++ b/test/rdoc/test_rdoc_markup_to_html.rb
@@ -257,7 +257,7 @@ def accept_paragraph_br
   end
 
   def accept_paragraph_break
-    assert_equal "\n<p>hello<br> world</p>\n", @to.res.join
+    assert_equal "\n<p>hello<br>world</p>\n", @to.res.join
   end
 
   def accept_paragraph_i
@@ -391,11 +391,31 @@ def test_accept_heading_pipe
   end
 
   def test_accept_paragraph_newline
-    @to.start_accepting
+    hellos = ["hello", "\u{393 3b5 3b9 3ac} \u{3c3 3bf 3c5}"]
+    worlds = ["world", "\u{3ba 3cc 3c3 3bc 3bf 3c2}"]
+    ohayo, sekai = %W"\u{304a 306f 3088 3046} \u{4e16 754c}"
+
+    hellos.product(worlds) do |hello, world|
+      @to.start_accepting
+      @to.accept_paragraph para("#{hello}\n", "#{world}\n")
+      assert_equal "\n<p>#{hello} #{world}</p>\n", @to.res.join
+    end
+
+    hellos.each do |hello|
+      @to.start_accepting
+      @to.accept_paragraph para("#{hello}\n", "#{sekai}\n")
+      assert_equal "\n<p>#{hello}#{sekai}</p>\n", @to.res.join
+    end
 
-    @to.accept_paragraph para("hello\n", "world\n")
+    worlds.each do |world|
+      @to.start_accepting
+      @to.accept_paragraph para("#{ohayo}\n", "#{world}\n")
+      assert_equal "\n<p>#{ohayo}#{world}</p>\n", @to.res.join
+    end
 
-    assert_equal "\n<p>hello world </p>\n", @to.res.join
+    @to.start_accepting
+    @to.accept_paragraph para("#{ohayo}\n", "#{sekai}\n")
+    assert_equal "\n<p>#{ohayo}#{sekai}</p>\n", @to.res.join
   end
 
   def test_accept_heading_output_decoration