Skip to content

Commit

Permalink
feat(emoji): request data from url and store clean json dictionary
Browse files Browse the repository at this point in the history
This module request data of emoji from unicode.org and store clean
dictionary of all data emoji in `.json` file. For now only get
`full-emoji-list` and `full-emoji-modifiers` data fro url.

* Function `emoji_get_url_data` request emoji data url and waiting
raise for status if status ok returning data text for further
processing. This funciton is used in a function that clears data.
* Function `emoji_clean_url_data` clean up previously requested
data and store this data in dictionary `.json` file in `json`
folder. Default this function save a `emoji-list` data. You can
use parameter to debug what data is sored. Defaults for the end
function printing only stats.

In the folder `json/` there are already generated json data by
calling functions `emoji_clean_url_data(emoji_url=emoji_lis_url)`
and `emoji_clean_url_data(emoji_url=emoji_mod_url)`.

TODO
* Now we need to do some functions that extract this json data.
* Also we need to do a function that asks if you want to check
for new data on the unicode website and download and clean it.
  • Loading branch information
mdsanima committed Jun 7, 2021
1 parent ab24e34 commit bfa0a94
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 0 deletions.
193 changes: 193 additions & 0 deletions src/mdsanima_dev/emoji.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
"""
# Emoji Module
Request data of Emoji from unicode.org and store json dict all data.
"""

import json
import requests
import pathlib
from datetime import datetime
from colors import get_complex_color

HERE = pathlib.Path(__file__).parent

emoji_lis_url = 'http://www.unicode.org/emoji/charts/full-emoji-list.html'
emoji_mod_url = 'http://www.unicode.org/emoji/charts/full-emoji-modifiers.html'

# emo_data = 'json/emoji-list-test.txt'

# def read_file(path_file):
# with open(path_file, 'r', encoding='utf-8') as r:
# lines = r.read().splitlines()
# lines_len = len(lines)
# return lines, lines_len


def emoji_get_url_data(emoji_url):
"""
This function request emoji data url and waiting raise for status if
status ok returning data text for further processing.
Args:
emoji_url (str): Link to emoticons.
Returns:
str: Html code as string text.
Usage:
Assigning function calling to a variable.
.. code::
eud = emoji_get_url_data(emoji_lis_url)
"""
url = str(emoji_url)
res = requests.get(url)
res.raise_for_status()
emoji_url_data = res.text

return emoji_url_data


def emoji_clean_url_data(
bhead:bool=False,
mhead:bool=False,
ucode:bool=False,
emoji:bool=False,
ename:bool=False,
emoji_url:str=emoji_lis_url
) -> dict:
mds = get_complex_color

# lines, lines_len = read_file(emo_data)

eud = emoji_get_url_data(emoji_url)
lines = eud.splitlines()
lines_len = len(lines)

cnt_bighead = 0
cnt_mediumhead = 0
cnt_emoji = 0
emoji_version = ''

n_line = '\n'
l_line = ' '

bhead_end = n_line if bhead else l_line
mhead_end = n_line if mhead else l_line
ucode_end = n_line if ucode else l_line
emoji_end = n_line if emoji else l_line
ename_end = n_line if ename else l_line

emo = {}
emodt = {}

for i in range(15, lines_len):
line = lines[i]
line_split = line.rsplit("'")

if line.find("h1>") >= 1:
emo_version = line.replace("<h1>", "")\
.replace("</h1>", "").split(", ")
emoji_version = emo_version[1]
emoji_list_dt = emo_version[0]

if line.find("'bighead'") >= 1:
cnt_bighead += 1
emo_bighead = line_split[7]\
.replace("-", " ").replace("&amp;_", "")
mds('\rBIG HEAD'.ljust(13), 34, '-> ')
mds(emo_bighead.ljust(94), 88, str(bhead_end))

emo[emo_bighead] = {}

if line.find("'mediumhead'") >= 1:
cnt_mediumhead += 1
emo_mediumhead = line_split[7]\
.replace("-", " ").replace("&amp;_", "")\
.replace(" ", "_")
mds('\r MEDIUM'.ljust(13), 24, '-> ')
mds(emo_mediumhead.ljust(94), 36, str(mhead_end))

emo[emo_bighead][emo_mediumhead] = {}

if line.find("'code'") >= 1:
emo_code_list = []
emo_code = line_split[6]\
.replace(">", "").replace("</a</td", "")
for co_de in emo_code.split(' '):
ucode_ck = 'U000' if len(co_de)==7 else 'U0000'
emo_code_list.append(co_de.replace("U+", ucode_ck))
emo_code = str(emo_code_list).replace(",", "")\
.replace("[", "").replace("]", "")\
.replace("'", "")
mds('\r CODE'.ljust(13), 100, '=> ')
mds(emo_code.ljust(94), 239, str(ucode_end))

if line.find("'chars'") >= 1:
cnt_emoji += 1
emo_emoji = line_split[2]\
.replace(">", '').replace("</td", '')
mds('\r EMOJI'.ljust(13), 149, '=> ')
print(emo_emoji.ljust(94), end=emoji_end)

if line.find("class='name'") >= 1:
emo_name = line_split[2]\
.replace(">", '').replace("</td", '')\
.replace("-", ' ').replace(":", "")\
.replace(",", '').replace(".", '')\
.replace("(", '').replace(")", '')\
.replace("’", '').replace("“", '')\
.replace("”", '').replace("&amp;", '')\
.replace(" ", ' ').replace(" ", ' ')\
.replace("!", '').replace(" ", "_").lower()
if emo_name.lower() == str(" colspan="):
part = line_split[14]\
.replace(">", '').replace("</td", '')\
.partition(' ')
emo_name = part[2]\
.replace(":", "").replace(" ", '_')
mds('\r NAME'.ljust(13), 148, '=> ')
mds(emo_name.ljust(94), 244, str(ename_end))

emo[emo_bighead][emo_mediumhead][emo_name] = {
'number': cnt_emoji, 'code': emo_code, 'emoji': emo_emoji
}

mds('\r' + '-' * 30, 228)
mds('\r' + 'FOUND BIG HEAD'.ljust(20), 34, ' -> ')
mds(str(cnt_bighead).ljust(94), 32)
mds('FOUND MEDIUM HEAD'.ljust(20), 31, ' -> ')
mds(str(cnt_mediumhead).ljust(10), 29)
mds('FOUND EMOJI'.ljust(20), 149, ' -> ')
mds(str(cnt_emoji).ljust(10), 197)
mds('EMOJI VERSION'.ljust(20), 106, ' -> ')
mds(str(emoji_version).ljust(10), 112)
mds(str(emoji_list_dt)[:20].ljust(20).upper(), 117, ' -> ')
mds('DONE'.ljust(18), 107)
mds('-' * 30, 228)

now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

emodt['emoji_data'] = {
'emoji_version': emoji_version,
'emoji_src': emoji_list_dt,
'emoji_url': emoji_lis_url,
'json_generated_dt': now
}

emodt['emoji_stats'] = {
'big_head': cnt_bighead,
'medium_head': cnt_mediumhead,
'emoji': cnt_emoji
}

emo_dic = {'emo': emo, 'src': emodt}

em_li = 'json/emoji-list.json'
em_mo = 'json/emoji-modifiers.json'
wri_emo = em_li if emoji_url==emoji_lis_url else em_mo

with open(HERE / wri_emo, 'w', encoding='utf-8') as w:
json.dump(emo_dic, w, ensure_ascii=False)


#emoji_clean_url_data(emoji_url=emoji_lis_url)
#emoji_clean_url_data(emoji_url=emoji_mod_url)
1 change: 1 addition & 0 deletions src/mdsanima_dev/json/emoji-list.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/mdsanima_dev/json/emoji-modifiers.json

Large diffs are not rendered by default.

0 comments on commit bfa0a94

Please sign in to comment.