Skip to content
This repository has been archived by the owner on Nov 10, 2020. It is now read-only.

Commit

Permalink
Merge pull request #2 from nitely/feature/unicode_10
Browse files Browse the repository at this point in the history
feature/unicode_10
  • Loading branch information
nitely committed Oct 10, 2017
2 parents 3f58f5d + 02cbd11 commit aad3468
Show file tree
Hide file tree
Showing 13 changed files with 296 additions and 226 deletions.
5 changes: 4 additions & 1 deletion .editorconfig
Expand Up @@ -21,4 +21,7 @@ indent_size = 4
indent_style = space
indent_size = 2


# Override for Makefile
[{Makefile, makefile, GNUmakefile}]
indent_style = tab
indent_size = 4
5 changes: 5 additions & 0 deletions .travis.yml
Expand Up @@ -5,11 +5,16 @@ language: python
# Use container-based infrastructure
sudo: false

notifications:
email:
on_success: never

python:
- "2.7"
- "3.3"
- "3.4"
- "3.5"
- "3.6"

install:
- pip install coveralls
Expand Down
10 changes: 10 additions & 0 deletions CHANGES
@@ -1,3 +1,13 @@
0.4
---

* Support for Unicode 10

0.3
---

* Support for Unicode 9

0.2
---

Expand Down
7 changes: 7 additions & 0 deletions CONTRIBUTORS.md
@@ -0,0 +1,7 @@
# Authors:

* Esteban C Borsani @nitely

# Contributors:

* Henry Cooke @prehensile
22 changes: 22 additions & 0 deletions Makefile
@@ -0,0 +1,22 @@
clean:
rm -fr dist/ build/ *.egg-info/

docs:
cd docs && make clean && make html

test:
python runtests.py

gen:
python ./build.py

bench:
python ./benchmark.py

sdist: test clean
python setup.py sdist

release: test clean
python setup.py sdist upload

.PHONY: clean test sdist release docs gen bench
10 changes: 5 additions & 5 deletions README.md
Expand Up @@ -5,12 +5,12 @@
[![pypi](https://img.shields.io/pypi/v/emoji-unicode.svg?style=flat-square)](https://pypi.python.org/pypi/emoji-unicode)
[![licence](https://img.shields.io/pypi/l/emoji-unicode.svg?style=flat-square)](https://raw.githubusercontent.com/nitely/emoji-unicode/master/LICENSE)

Replace unicode emojis in a text. Supports *Unicode 9* standard.
Replace unicode emojis in a text. Supports *Unicode 10* standard.

## Compatibility

* Python 2.7 ([wide-build](http://emoji-unicode.readthedocs.org/en/latest/python2.html)),
3.3, 3.4 and 3.5 (recommended)
3.3, 3.4, 3.5 and +3.6 (recommended)

## Install

Expand Down Expand Up @@ -128,21 +128,21 @@ writing the result into `pattern.py`.
To generate the `pattern.py` file, run:

```
$ python ./build.py
make gen
```

## Tests

```
$ python ./runtests.py
make test
```

## Benchmark

This will run some silly benchmarks.

```
$ python ./benchmark.py
make bench
```

Here is the output on my machine:
Expand Down
2 changes: 1 addition & 1 deletion emoji_unicode/__init__.py
Expand Up @@ -6,7 +6,7 @@
from .parser import replace, normalize
from .models import Emoji

__version__ = '0.2'
__version__ = '0.4'

__all__ = [
'RE_PATTERN_TEMPLATE',
Expand Down
22 changes: 17 additions & 5 deletions emoji_unicode/data_parser.py
Expand Up @@ -57,13 +57,25 @@ def _parse(line):
)


EMOJI_EXCLUDE = {str(n) for n in range(0, 10)} | {'#', '*'}


def parse():
with io.open(os.path.join(DIR, 'emoji-data.txt'), mode='r', encoding='utf-8') as fh:
return [
_parse(line)
for line in fh.readlines()
if not line.startswith('#')
]
cps = []

for line in fh.readlines():
if line.startswith('#'):
continue

cp = _parse(line)

if cp.split('-')[0] in EMOJI_EXCLUDE:
continue

cps.append(cp)

return cps


def read_template():
Expand Down
428 changes: 218 additions & 210 deletions emoji_unicode/emoji-data.txt

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions emoji_unicode/pattern.py
Expand Up @@ -3,15 +3,15 @@
from __future__ import unicode_literals


CODE_POINTS = '\xa9\xae\u203c\u2049\u2122\u2139\u2194-\u2199\u21a9-\u21aa\u231a-\u231b\u2328\u23cf\u23e9-\u23f3\u23f8-\u23fa\u24c2\u25aa-\u25ab\u25b6\u25c0\u25fb-\u25fe\u2600-\u2604\u260e\u2611\u2614-\u2615\u2618\u261d\u2620\u2622-\u2623\u2626\u262a\u262e-\u262f\u2638-\u263a\u2640\u2642\u2648-\u2653\u2660\u2663\u2665-\u2666\u2668\u267b\u267f\u2692-\u2697\u2699\u269b-\u269c\u26a0-\u26a1\u26aa-\u26ab\u26b0-\u26b1\u26bd-\u26be\u26c4-\u26c5\u26c8\u26ce\u26cf\u26d1\u26d3-\u26d4\u26e9-\u26ea\u26f0-\u26f5\u26f7-\u26fa\u26fd\u2702\u2705\u2708-\u2709\u270a-\u270b\u270c-\u270d\u270f\u2712\u2714\u2716\u271d\u2721\u2728\u2733-\u2734\u2744\u2747\u274c\u274e\u2753-\u2755\u2757\u2763-\u2764\u2795-\u2797\u27a1\u27b0\u27bf\u2934-\u2935\u2b05-\u2b07\u2b1b-\u2b1c\u2b50\u2b55\u3030\u303d\u3297\u3299\U0001f004\U0001f0cf\U0001f170-\U0001f171\U0001f17e\U0001f17f\U0001f18e\U0001f191-\U0001f19a\U0001f1e6-\U0001f1ff\U0001f201-\U0001f202\U0001f21a\U0001f22f\U0001f232-\U0001f23a\U0001f250-\U0001f251\U0001f300-\U0001f320\U0001f321\U0001f324-\U0001f32c\U0001f32d-\U0001f32f\U0001f330-\U0001f335\U0001f336\U0001f337-\U0001f37c\U0001f37d\U0001f37e-\U0001f37f\U0001f380-\U0001f393\U0001f396-\U0001f397\U0001f399-\U0001f39b\U0001f39e-\U0001f39f\U0001f3a0-\U0001f3c4\U0001f3c5\U0001f3c6-\U0001f3ca\U0001f3cb-\U0001f3ce\U0001f3cf-\U0001f3d3\U0001f3d4-\U0001f3df\U0001f3e0-\U0001f3f0\U0001f3f3-\U0001f3f5\U0001f3f7\U0001f3f8-\U0001f3ff\U0001f400-\U0001f43e\U0001f43f\U0001f440\U0001f441\U0001f442-\U0001f4f7\U0001f4f8\U0001f4f9-\U0001f4fc\U0001f4fd\U0001f4ff\U0001f500-\U0001f53d\U0001f549-\U0001f54a\U0001f54b-\U0001f54e\U0001f550-\U0001f567\U0001f56f-\U0001f570\U0001f573-\U0001f579\U0001f57a\U0001f587\U0001f58a-\U0001f58d\U0001f590\U0001f595-\U0001f596\U0001f5a4\U0001f5a5\U0001f5a8\U0001f5b1-\U0001f5b2\U0001f5bc\U0001f5c2-\U0001f5c4\U0001f5d1-\U0001f5d3\U0001f5dc-\U0001f5de\U0001f5e1\U0001f5e3\U0001f5e8\U0001f5ef\U0001f5f3\U0001f5fa\U0001f5fb-\U0001f5ff\U0001f600\U0001f601-\U0001f610\U0001f611\U0001f612-\U0001f614\U0001f615\U0001f616\U0001f617\U0001f618\U0001f619\U0001f61a\U0001f61b\U0001f61c-\U0001f61e\U0001f61f\U0001f620-\U0001f625\U0001f626-\U0001f627\U0001f628-\U0001f62b\U0001f62c\U0001f62d\U0001f62e-\U0001f62f\U0001f630-\U0001f633\U0001f634\U0001f635-\U0001f640\U0001f641-\U0001f642\U0001f643-\U0001f644\U0001f645-\U0001f64f\U0001f680-\U0001f6c5\U0001f6cb-\U0001f6cf\U0001f6d0\U0001f6d1-\U0001f6d2\U0001f6e0-\U0001f6e5\U0001f6e9\U0001f6eb-\U0001f6ec\U0001f6f0\U0001f6f3\U0001f6f4-\U0001f6f6\U0001f910-\U0001f918\U0001f919-\U0001f91e\U0001f920-\U0001f927\U0001f930\U0001f933-\U0001f93a\U0001f93c-\U0001f93e\U0001f940-\U0001f945\U0001f947-\U0001f94b\U0001f950-\U0001f95e\U0001f980-\U0001f984\U0001f985-\U0001f991\U0001f9c0' # Template string
CODE_POINTS = '\xa9\xae\u203c\u2049\u2122\u2139\u2194-\u2199\u21a9-\u21aa\u231a-\u231b\u2328\u23cf\u23e9-\u23f3\u23f8-\u23fa\u24c2\u25aa-\u25ab\u25b6\u25c0\u25fb-\u25fe\u2600-\u2604\u260e\u2611\u2614-\u2615\u2618\u261d\u2620\u2622-\u2623\u2626\u262a\u262e-\u262f\u2638-\u263a\u2640\u2642\u2648-\u2653\u2660\u2663\u2665-\u2666\u2668\u267b\u267f\u2692-\u2697\u2699\u269b-\u269c\u26a0-\u26a1\u26aa-\u26ab\u26b0-\u26b1\u26bd-\u26be\u26c4-\u26c5\u26c8\u26ce\u26cf\u26d1\u26d3-\u26d4\u26e9-\u26ea\u26f0-\u26f5\u26f7-\u26fa\u26fd\u2702\u2705\u2708-\u2709\u270a-\u270b\u270c-\u270d\u270f\u2712\u2714\u2716\u271d\u2721\u2728\u2733-\u2734\u2744\u2747\u274c\u274e\u2753-\u2755\u2757\u2763-\u2764\u2795-\u2797\u27a1\u27b0\u27bf\u2934-\u2935\u2b05-\u2b07\u2b1b-\u2b1c\u2b50\u2b55\u3030\u303d\u3297\u3299\U0001f004\U0001f0cf\U0001f170-\U0001f171\U0001f17e\U0001f17f\U0001f18e\U0001f191-\U0001f19a\U0001f1e6-\U0001f1ff\U0001f201-\U0001f202\U0001f21a\U0001f22f\U0001f232-\U0001f23a\U0001f250-\U0001f251\U0001f300-\U0001f320\U0001f321\U0001f324-\U0001f32c\U0001f32d-\U0001f32f\U0001f330-\U0001f335\U0001f336\U0001f337-\U0001f37c\U0001f37d\U0001f37e-\U0001f37f\U0001f380-\U0001f393\U0001f396-\U0001f397\U0001f399-\U0001f39b\U0001f39e-\U0001f39f\U0001f3a0-\U0001f3c4\U0001f3c5\U0001f3c6-\U0001f3ca\U0001f3cb-\U0001f3ce\U0001f3cf-\U0001f3d3\U0001f3d4-\U0001f3df\U0001f3e0-\U0001f3f0\U0001f3f3-\U0001f3f5\U0001f3f7\U0001f3f8-\U0001f3ff\U0001f400-\U0001f43e\U0001f43f\U0001f440\U0001f441\U0001f442-\U0001f4f7\U0001f4f8\U0001f4f9-\U0001f4fc\U0001f4fd\U0001f4ff\U0001f500-\U0001f53d\U0001f549-\U0001f54a\U0001f54b-\U0001f54e\U0001f550-\U0001f567\U0001f56f-\U0001f570\U0001f573-\U0001f579\U0001f57a\U0001f587\U0001f58a-\U0001f58d\U0001f590\U0001f595-\U0001f596\U0001f5a4\U0001f5a5\U0001f5a8\U0001f5b1-\U0001f5b2\U0001f5bc\U0001f5c2-\U0001f5c4\U0001f5d1-\U0001f5d3\U0001f5dc-\U0001f5de\U0001f5e1\U0001f5e3\U0001f5e8\U0001f5ef\U0001f5f3\U0001f5fa\U0001f5fb-\U0001f5ff\U0001f600\U0001f601-\U0001f610\U0001f611\U0001f612-\U0001f614\U0001f615\U0001f616\U0001f617\U0001f618\U0001f619\U0001f61a\U0001f61b\U0001f61c-\U0001f61e\U0001f61f\U0001f620-\U0001f625\U0001f626-\U0001f627\U0001f628-\U0001f62b\U0001f62c\U0001f62d\U0001f62e-\U0001f62f\U0001f630-\U0001f633\U0001f634\U0001f635-\U0001f640\U0001f641-\U0001f642\U0001f643-\U0001f644\U0001f645-\U0001f64f\U0001f680-\U0001f6c5\U0001f6cb-\U0001f6cf\U0001f6d0\U0001f6d1-\U0001f6d2\U0001f6e0-\U0001f6e5\U0001f6e9\U0001f6eb-\U0001f6ec\U0001f6f0\U0001f6f3\U0001f6f4-\U0001f6f6\U0001f6f7-\U0001f6f8\U0001f910-\U0001f918\U0001f919-\U0001f91e\U0001f91f\U0001f920-\U0001f927\U0001f928-\U0001f92f\U0001f930\U0001f931-\U0001f932\U0001f933-\U0001f93a\U0001f93c-\U0001f93e\U0001f940-\U0001f945\U0001f947-\U0001f94b\U0001f94c\U0001f950-\U0001f95e\U0001f95f-\U0001f96b\U0001f980-\U0001f984\U0001f985-\U0001f991\U0001f992-\U0001f997\U0001f9c0\U0001f9d0-\U0001f9e6' # Template string

TXT_VARIATION = '\uFE0E'
EMO_VARIATION = '\uFE0F'
FITZ_MODIFIER = '\U0001F3FB-\U0001F3FF'
KC_MODIFIER = '\u20E3'
ZWJ = '\u200D'
FLAGS = '\U0001F1E6-\U0001F1FF'
KEY_CAPS = '0-9\*#'
KEY_CAPS = r'0-9\*#'

RE_PATTERN_TEMPLATE = (
r'(?P<emoji>'
Expand Down
2 changes: 1 addition & 1 deletion emoji_unicode/pattern_template.py
Expand Up @@ -11,7 +11,7 @@
KC_MODIFIER = '\u20E3'
ZWJ = '\u200D'
FLAGS = '\U0001F1E6-\U0001F1FF'
KEY_CAPS = '0-9\*#'
KEY_CAPS = r'0-9\*#'

RE_PATTERN_TEMPLATE = (
r'(?P<emoji>'
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -14,7 +14,7 @@

setup(
name='emoji-unicode',
version='0.3',
version='0.4',
description='Replace unicode emojis by its corresponding image representation. Supports Unicode 9 standard.',
author='Esteban Castro Borsani',
author_email='ecastroborsani@gmail.com',
Expand Down
3 changes: 3 additions & 0 deletions tests/tests.py
Expand Up @@ -229,6 +229,9 @@ def tearDown(self):

def test_parse(self):
res = set(data_parser.parse())
self.assertFalse('\u0023' in res)
self.assertFalse('\u002A' in res)
self.assertFalse('\u0030-\u0039' in res)
self.assertTrue('\u00A9' in res)
self.assertTrue('\u2194-\u2199' in res) # range

Expand Down

0 comments on commit aad3468

Please sign in to comment.