diff --git a/Gemfile.lock b/Gemfile.lock index f5b7b63..9f80514 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - emoji_regex (3.2.3) + emoji_regex (14.0.0.pre.1) GEM remote: https://rubygems.org/ diff --git a/README.md b/README.md index 30fab98..9fb6b7e 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,13 @@ [![Gem Version](https://badge.fury.io/rb/emoji_regex.svg)](https://rubygems.org/gems/emoji_regex) [![Node & Ruby CI](https://github.com/ticky/ruby-emoji-regex/workflows/Node%20&%20Ruby%20CI/badge.svg)](https://github.com/ticky/ruby-emoji-regex/actions?query=workflow%3A%22Node+%26+Ruby+CI%22) -A set of Ruby regular expressions for matching Unicode Emoji symbols. +A Ruby regular expression for matching Unicode Emoji symbols. ## Background -This is based upon the fantastic work from [Mathias Bynens'](https://mathiasbynens.be/) [`emoji-regex`](https://github.com/mathiasbynens/emoji-regex) Javascript package. `emoji-regex` is cleverly assembled based upon data from the Unicode Consortium. +This is based upon the fantastic work from [Mathias Bynens'](https://mathiasbynens.be/) [`emoji-test-regex-pattern`](https://github.com/mathiasbynens/emoji-test-regex-pattern) package. `emoji-test-regex-pattern` is cleverly assembled based upon data from the Unicode Consortium. -The regular expressions provided herein are derived from that pacakge. +The regular expressions provided herein are derived from that package. ## Installation @@ -18,29 +18,7 @@ gem install emoji_regex ## Usage -`emoji_regex` provides these regular expressions: - -* `EmojiRegex::RGIEmoji` is the regex you most likely want. It matches all emoji recommended for general interchange, as defined by [the Unicode standard's `RGI_Emoji` property](https://unicode.org/reports/tr51/#def_rgi_set). In a future version, this regular expression will be renamed to `EmojiRegex::Regex` and all other regexes removed. - -* `EmojiRegex::Regex` is deprecated, and will be replaced with `RGIEmoji` in a future major version. It matches emoji which present as emoji by default, and those which present as emoji when combined with `U+FE0F VARIATION SELECTOR-16`. - -* `EmojiRegex::Text` is deprecated, and will be removed in a future major version. It matches emoji which present as text by default (regardless of variation selector), as well as those which present as emoji by default. - -### RGI vs Emoji vs Text Presentation - -`RGI_Emoji` is a property of emoji symbols, defined in [Unicode Technical Report #51](https://unicode.org/reports/tr51/#def_rgi_set) which marks emoji as being supported by major vendors and therefore expected to be usable generally. In most cases, this is the property you will want when seeking emoji characters. - -`Emoji_Presentation` is another such property, [defined in UTR#51](http://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files) which controls whether symbols are intended to be rendered as emoji by default. - -Generally, for emoji which re-use Unicode code points which existed before Emoji itself was introduced to Unicode, `Emoji_Presentation` is `false`. `Emoji_Presentation` may be `true` but `RGI_Emoji` false for characters with non-standard emoji-like representations in certain conditions. Notable cases are the Emoji Keycap Sequences (#️⃣, 1️⃣, 9️⃣, *️⃣, etc.) which are sequences composed of three characters; the base character, an `U+FE0F VARIATION SELECTOR-16`, and finally the `U+20E3 COMBINING ENCLOSING KEYCAP`. - -These characters, therefore, are matched to varying degrees of precision by each of the regular expressions included in this package; - -- `#` is matched only by `EmojiRegex::Text` as it is considered to be a text part of a possible emoji. -- `#️` is matched by `EmojiRegex::Regex` as well as `EmojiRegex::Text` as it has `Emoji_Presentation` despite not being a generally accepted Emoji or recommended for general interchange. -- `#️⃣` is matched by all three regular expressions, as it is recommended for general interchange. - -It's most likely that the regular expression you want is `EmojiRegex::RGIEmoji`! ☺️ +`emoji_regex` provides the `EmojiRegex::Regex` regular expression, which matches emoji, as defined by [the Unicode standard's `emoji-test` data file](https://unicode.org/Public/emoji/14.0/emoji-test.txt). ### Example @@ -49,78 +27,24 @@ require 'emoji_regex' text = < ( - // request the regex - regexFactory() - // convert regex to string so we can replace things - .toString() - // replace incompatible single-byte escapes - .replace(/\\x([a-zA-Z0-9]{2})/g, '\\u00$1') - // remove incompatible flags - .slice(0, -2) -); - -const emojiRegexRGI = replaceEscapes(require('emoji-regex/es2015/RGI_Emoji')); -const emojiRegex = replaceEscapes(require('emoji-regex/es2015')); -const emojiRegexText = replaceEscapes(require('emoji-regex/es2015/text')); +const emojiRegex = fs.readFileSync(path.join(__dirname, '../node_modules/emoji-test-regex-pattern/dist/latest/java.txt'), 'utf8') + .trim() + // replace incompatible escapes + .replace(/\\x\{([a-zA-Z0-9]{5})\}/g, '\\u{$1}') + .replace(/\\x\{([a-zA-Z0-9]{4})\}/g, '\\u$1') + .replace(/\\x([a-zA-Z0-9]{2})/g, '\\u00$1') const input = fs.readFileSync(path.join(__dirname, '../src/emoji_regex.rb'), 'utf8'); -const output = input - .replace('/% emojiRegexRGI %/', emojiRegexRGI) - .replace('/% emojiRegex %/', emojiRegex) - .replace('/% emojiRegexText %/', emojiRegexText); +const output = input.replace('/% emojiRegex %/', `/${emojiRegex}/`) fs.writeFileSync(path.join(__dirname, '../lib/emoji_regex.rb'), output); diff --git a/spec/emoji_regex/emoji_regex_spec.rb b/spec/emoji_regex/emoji_regex_spec.rb index a129a32..83c66a6 100644 --- a/spec/emoji_regex/emoji_regex_spec.rb +++ b/spec/emoji_regex/emoji_regex_spec.rb @@ -19,62 +19,6 @@ end end - describe '::RGIEmoji' do - let(:subject) { EmojiRegex::RGIEmoji } - - it "matches ⌚️ watch (default emoji presentation character (Emoji_Presentation))" do - expect("\u{231A}".scan(subject)).to eql(["\u{231A}"]) - end - - it "matches ↔️ left-right arrow (default text presentation character rendered as emoji)" do - expect("\u{2194}\u{FE0F}".scan(subject)).to eql(["\u{2194}\u{FE0F}"]) - end - - it "matches 👩 woman (emoji modifier base (Emoji_Modifier_Base))" do - expect("\u{1F469}".scan(subject)).to eql(["\u{1F469}"]) - end - - it "matches 👩🏿 woman: dark skin tone (emoji modifier base followed by a modifier)" do - expect("\u{1F469}\u{1F3FF}".scan(subject)).to eql(["\u{1F469}\u{1F3FF}"]) - end - - it "matches 👩🏾‍✈️🏿 woman pilot: medium-dark skin tone (emoji modifier base followed by a modifier, with ZWJ sequence and default text representation character)" do - expect("\u{1F469}\u{1F3FE}\u{200D}\u{2708}\u{FE0F}".scan(subject)).to eql(["\u{1F469}\u{1F3FE}\u{200D}\u{2708}\u{FE0F}"]) - end - - it "matches 🧏🏻‍♀️ deaf woman: light skin tone (emoji modifier bae followed by a modifier and gender ZWJ sequence)" do - expect("\u{1F9CF}\u{1F3FB}\u{200D}\u{2640}\u{FE0F}".scan(subject)).to eql(["\u{1F9CF}\u{1F3FB}\u{200D}\u{2640}\u{FE0F}"]) - end - - it "matches 🪴 potted plant" do - expect("\u{1FAB4}".scan(subject)).to eql(["\u{1FAB4}"]) - end - - it "matches 🤌🏼 pinched fingers: medium-light skin tone (emoji modifier base followed by a modifier)" do - expect("\u{1F90C}\u{1F3FC}".scan(subject)).to eql(["\u{1F90C}\u{1F3FC}"]) - end - - it "matches 💏🏿 couple kissing: dark skin tone (emoji 13.1 sequence)" do - expect("\u{1F48F}\u{1F3FF}".scan(subject)).to eql(["\u{1F48F}\u{1F3FF}"]) - end - - it "doesn't match # (default text presentation character)" do - expect("#".scan(subject)).to eql([]) - end - - it "doesn't match #️ (default text presentation character with emoji variation selector)" do - expect("#\u{FE0F}".scan(subject)).to eql([]) - end - - it "matches #️⃣ (default text presentation character with emoji variation selector and combining enclosing keycap)" do - expect("#\u{FE0F}\u{20E3}".scan(subject)).to eql(["#\u{FE0F}\u{20E3}"]) - end - - it "doesn't match non-emojis" do - expect("abc".scan(subject)).to eql([]) - end - end - describe '::Regex' do let(:subject) { EmojiRegex::Regex } @@ -118,68 +62,8 @@ expect("#".scan(subject)).to eql([]) end - it "matches #️ (default text presentation character with emoji variation selector)" do - expect("#\u{FE0F}".scan(subject)).to eql(["#\u{FE0F}"]) - end - - it "matches #️⃣ (default text presentation character with emoji variation selector and combining enclosing keycap)" do - expect("#\u{FE0F}\u{20E3}".scan(subject)).to eql(["#\u{FE0F}\u{20E3}"]) - end - - it "doesn't match non-emojis" do - expect("abc".scan(subject)).to eql([]) - end - end - - describe '::Text' do - let(:subject) { EmojiRegex::Text } - - it "matches ⌚️ watch (default emoji presentation character (Emoji_Presentation))" do - expect("\u{231A}".scan(subject)).to eql(["\u{231A}"]) - end - - it "matches ↔ left-right arrow (default text representation character)" do - expect("\u{2194}".scan(subject)).to eql(["\u{2194}"]) - end - - it "matches ↔️ left-right arrow (default text presentation character rendered as emoji)" do - expect("\u{2194}\u{FE0F}".scan(subject)).to eql(["\u{2194}\u{FE0F}"]) - end - - it "matches 👩 woman (emoji modifier base (Emoji_Modifier_Base))" do - expect("\u{1F469}".scan(subject)).to eql(["\u{1F469}"]) - end - - it "matches 👩🏿 woman: dark skin tone (emoji modifier base followed by a modifier)" do - expect("\u{1F469}\u{1F3FF}".scan(subject)).to eql(["\u{1F469}\u{1F3FF}"]) - end - - it "matches 👩🏾‍✈️🏿 woman pilot: medium-dark skin tone (emoji modifier base followed by a modifier, with ZWJ sequence and default text representation character)" do - expect("\u{1F469}\u{1F3FE}\u{200D}\u{2708}\u{FE0F}".scan(subject)).to eql(["\u{1F469}\u{1F3FE}\u{200D}\u{2708}\u{FE0F}"]) - end - - it "matches 🧏🏻‍♀️ deaf woman: light skin tone (emoji modifier bae followed by a modifier and gender ZWJ sequence)" do - expect("\u{1F9CF}\u{1F3FB}\u{200D}\u{2640}\u{FE0F}".scan(subject)).to eql(["\u{1F9CF}\u{1F3FB}\u{200D}\u{2640}\u{FE0F}"]) - end - - it "matches 🪴 potted plant" do - expect("\u{1FAB4}".scan(subject)).to eql(["\u{1FAB4}"]) - end - - it "matches 🤌🏼 pinched fingers: medium-light skin tone (emoji modifier base followed by a modifier)" do - expect("\u{1F90C}\u{1F3FC}".scan(subject)).to eql(["\u{1F90C}\u{1F3FC}"]) - end - - it "matches 💏🏿 couple kissing: dark skin tone (emoji 13.1 sequence)" do - expect("\u{1F48F}\u{1F3FF}".scan(subject)).to eql(["\u{1F48F}\u{1F3FF}"]) - end - - it "matches # (default text presentation character)" do - expect("#".scan(subject)).to eql(["#"]) - end - - it "matches #️ (default text presentation character with emoji variation selector)" do - expect("#\u{FE0F}".scan(subject)).to eql(["#\u{FE0F}"]) + it "doesn't match #️ (default text presentation character with emoji variation selector)" do + expect("#\u{FE0F}".scan(subject)).to eql([]) end it "matches #️⃣ (default text presentation character with emoji variation selector and combining enclosing keycap)" do diff --git a/src/emoji_regex.rb b/src/emoji_regex.rb index a0c5a15..294aad1 100644 --- a/src/emoji_regex.rb +++ b/src/emoji_regex.rb @@ -1,22 +1,6 @@ module EmojiRegex - # Matches emoji which are recommended for general interchange, as defined by the `RGI_Emoji` property in the Unicode standard. + # Matches characters which are emoji, as defined by the Unicode standard's `emoji-test` data file, https://unicode.org/Public/emoji/14.0/emoji-test.txt # # "#️⃣" (U+0023,U+FE0F,U+20E3) is matched, but not "#️" (U+0023,U+FE0F) or "#" (U+0023). - RGIEmoji = /% emojiRegexRGI %/ - - # Matches emoji which present as emoji by default, and those which present as emoji when combined with `U+FE0F VARIATION SELECTOR-16`. - # - # "#️⃣" (U+0023,U+FE0F,U+20E3) and "#️" (U+0023,U+FE0F) are matched, but not "#" (U+0023). - # - # @deprecated Please use {RGIEmoji} instead. RGIEmoji will become the only regex, and be renamed to Emoji in a future major release. Regex = /% emojiRegex %/ - deprecate_constant :Regex - - # Matches emoji which present as text by default (regardless of variation selector), as well as those which present as emoji by default. - # - # All of "#" (U+0023), "#️" (U+0023,U+FE0F) and "#️⃣" (U+0023,U+FE0F,U+20E3) are matched. - # - # @deprecated Please use {RGIEmoji} instead. RGIEmoji will become the only regex, and be renamed to Emoji in a future major release. - Text = /% emojiRegexText %/ - deprecate_constant :Text end diff --git a/yarn.lock b/yarn.lock index 92c6532..8233256 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2,7 +2,7 @@ # yarn lockfile v1 -emoji-regex@9.2.2: - version "9.2.2" - resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.2.tgz#840c8803b0d8047f4ff0cf963176b32d4ef3ed72" - integrity sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg== +emoji-test-regex-pattern@1.7.1: + version "1.7.1" + resolved "https://registry.yarnpkg.com/emoji-test-regex-pattern/-/emoji-test-regex-pattern-1.7.1.tgz#bb47d2b636b0488ccb31288f3446c9fbba8b1d50" + integrity sha512-WsfAZPq/l8Z0eGNV2wILneYHwKLPybgu8s/JzMM5MPu1P4vy3FiB3/wUi4Gr6dfI7vdAkjewgTaTvy277lXc/w==