Skip to content

Commit

Permalink
Merge pull request #8 from nsoma97/add_readme
Browse files Browse the repository at this point in the history
Add readme
  • Loading branch information
nsomabalint committed Dec 27, 2020
2 parents 25a0bbf + 51443ff commit f96bca6
Show file tree
Hide file tree
Showing 22 changed files with 120 additions and 27 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/datetime-parser-cicd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ jobs:
pip install -r requirements.txt
- name: Static analysis with mypy
run: |
python -m mypy --ignore-missing-imports src
python -m mypy --ignore-missing-imports hun_date_parser
- name: Static analysis with flake8
run: |
python -m flake8 --max-line-length=120 --per-file-ignores='patterns.py:E501' src
python -m flake8 --max-line-length=120 --per-file-ignores='patterns.py:E501' hun_date_parser
- name: Test with pytest
run: |
pytest
67 changes: 66 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,66 @@
# hun-date-parser
<h1 align="center">Hungarian Date Parser</h1>

<p align="center">
<i>A tool for extracting datetime intervals from Hungarian sentences and turning datetime objects into Hungarian text.</i>
</p>


<div align="center">
<img src="https://img.shields.io/github/stars/nsoma97/hun-date-parser" alt="Stars Badge"/>
<img src="https://img.shields.io/github/issues/nsoma97/hun-date-parser" alt="Issues Badge"/>
<img src="https://img.shields.io/github/license/nsoma97/hun-date-parser?color=2b9348" alt="License Badge"/>
<img src="https://img.shields.io/github/workflow/status/nsoma97/hun-date-parser/Datetime Parser Pipeline" alt="Tests"/>
</div>

<br>


Install and try the package with `pip install hun-date-parser`.

## :fire: Usage

If not specified otherwise, relative dates (eg.: tomorrow, next week, etc.) are calculated relative to the current datetime, at the time when the DatetimeExtractor is instanciated.

```python
from hun_date_parser import DatetimeExtractor

datetime_extractor = DatetimeExtractor()

datetime_extractor.parse_datetime('találkozzunk jövő kedd délután!')
# {'start_date': datetime.datetime(2020, 12, 29, 12, 0), 'end_date': datetime.datetime(2020, 12, 29, 17, 59, 59)}

datetime_extractor.parse_datetime('találkozzunk szombaton háromnegyed nyolc előtt két perccel')
# {'start_date': datetime.datetime(2020, 12, 26, 7, 43), 'end_date': datetime.datetime(2020, 12, 26, 7, 43, 59)}
```
The date parser is also capable of parsing explicit intervals from the text even when only one side of the interval is specified.
```python
datetime_extractor.parse_datetime('2020 decemberétől 2021 januárig')
# {'start_date': datetime.datetime(2020, 12, 1, 0, 0), 'end_date': datetime.datetime(2021, 1, 31, 23, 59, 59)}


datetime_extractor.parse_datetime('2020 decemberéig')
# {'start_date': None, 'end_date': datetime.datetime(2020, 12, 31, 23, 59, 59)}
```

The library is also capable of turning datetime objects into their Hungarian text representation.

```python
from datetime import datetime
from hun_date_parser import DatetimeTextualizer

datetime_textualizer = DatetimeTextualizer()

datetime_textualizer.generate_candidates()

datetime_textualizer.generate_candidates(datetime(2020, 12, 20, 18, 34), time_precision=2)
# {'date': ['ezen a héten vasárnap', '2020 december 20'],
# 'times': ['tizennyolc óra harmincnégy perc', '18:34', 'este hat óra harmincnégy perc', 'este fél 7 után 4 perccel']}
```

## :pencil: License

This project is licensed under [Apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) license. Feel free to use it in your own projects.

## :wrench: Contribute

Any help or feedback in further developing the library is welcome!
4 changes: 4 additions & 0 deletions hun_date_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from hun_date_parser.date_textualizer.datetime_textualizer import DatetimeTextualizer
from hun_date_parser.date_parser.datetime_extractor import DatetimeExtractor

__all__ = ["DatetimeTextualizer", "DatetimeExtractor"]
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from .patterns import (R_ISO_DATE, R_NAMED_MONTH, R_TODAY, R_TOMORROW, R_NTOMORROW, R_YESTERDAY, R_NYESTERDAY,
R_WEEKDAY, R_WEEK, R_YEAR)
from src.utils import remove_accent, word_to_num, Year, Month, Week, Day
from hun_date_parser.utils import remove_accent, word_to_num, Year, Month, Week, Day


def match_iso_date(s: str) -> List[Dict[str, Any]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
from typing import Dict, List, Union
from copy import copy

from src.date_parser.structure_parsers import match_multi_match, match_interval
from src.date_parser.date_parsers import (match_named_month, match_iso_date, match_weekday, match_relative_day,
match_week, match_named_year)
from src.date_parser.time_parsers import match_digi_clock, match_time_words
from src.utils import Year, Month, Week, Day, Daypart, Hour, Minute, monday_of_calenderweek
from hun_date_parser.date_parser.structure_parsers import match_multi_match, match_interval
from hun_date_parser.date_parser.date_parsers import (match_named_month, match_iso_date, match_weekday,
match_relative_day,
match_week, match_named_year)
from hun_date_parser.date_parser.time_parsers import match_digi_clock, match_time_words
from hun_date_parser.utils import Year, Month, Week, Day, Daypart, Hour, Minute, monday_of_calenderweek

daypart_mapping = [
(3, 5),
Expand All @@ -25,8 +26,6 @@ def assamble_datetime(now: datetime, dateparts: Union[List[Union[Year, Month, We
bottom: bool = True):
res_dt = []

print(dateparts)

if dateparts == 'OPEN':
return None
if not dateparts:
Expand Down Expand Up @@ -78,10 +77,12 @@ def assamble_datetime(now: datetime, dateparts: Union[List[Union[Year, Month, We
dp = dp_match[0][0]
if bottom:
res_dt.append(daypart_mapping[dp][0])
else:
y, m, d, h, mi, se = res_dt
next_day = datetime(y, m, d, h, mi, se) + timedelta(days=1)
elif dp == 5:
y, m, d = res_dt
next_day = datetime(y, m, d) + timedelta(days=1)
res_dt = [next_day.year, next_day.month, next_day.day, daypart_mapping[dp][1]]
else:
res_dt.append(daypart_mapping[dp][1])

if date_type == Hour and len(res_dt) == 3:
if dp_match:
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Dict, List, Any

from .patterns import R_DIGI, R_HOUR_MIN, R_HOUR_MIN_REV
from src.utils import remove_accent, word_to_num, Hour, Minute, Daypart
from hun_date_parser.utils import remove_accent, word_to_num, Hour, Minute, Daypart

NAN = -1

Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from datetime import datetime

from src.date_textualizer.date2text import date2text, date2full_text
from src.date_textualizer.time2text import time2absolutetexttime, time2digi, time2relitivetexttime, time2lifelike
from hun_date_parser.date_textualizer.date2text import date2text, date2full_text
from hun_date_parser.date_textualizer.time2text import time2absolutetexttime, time2digi, time2relitivetexttime, \
time2lifelike


class DatetimeTextualizer:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import time

from src.utils import num_to_word
from hun_date_parser.utils import num_to_word

hours_word = {
0: 'nulla',
Expand Down
File renamed without changes.
22 changes: 22 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import setuptools

with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()

setuptools.setup(
name="hun-date-parser",
version="0.0.1",
author="Soma Nagy",
author_email="nagysomabalint@gmail.com",
description="A tool for extracting datetime intervals from Hungarian sentences and turning datetime objects into Hungarian text.",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/nsoma97/hun-date-parser",
packages=setuptools.find_packages(),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
],
python_requires='>=3.6',
)
Empty file removed src/date_textualizer/__init__.py
Empty file.
6 changes: 3 additions & 3 deletions test/test_date_parsers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from datetime import datetime

from src.date_parser.date_parsers import Year, Month, Week, Day
from src.date_parser.date_parsers import (match_iso_date, match_named_month, match_relative_day, match_weekday,
match_week, match_named_year)
from hun_date_parser.date_parser.date_parsers import Year, Month, Week, Day
from hun_date_parser.date_parser.date_parsers import (match_iso_date, match_named_month, match_relative_day, match_weekday,
match_week, match_named_year)


def test_match_iso_date():
Expand Down
2 changes: 1 addition & 1 deletion test/test_datetime2text.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import time

from src.date_textualizer.time2text import time2lifelike
from hun_date_parser.date_textualizer.time2text import time2lifelike


def test_time2lifelike():
Expand Down
2 changes: 1 addition & 1 deletion test/test_datetime_extractor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import datetime

from src.date_parser.datetime_extractor import DatetimeExtractor, assamble_datetime
from hun_date_parser.date_parser.datetime_extractor import DatetimeExtractor, assamble_datetime


def test_datetime_extractor():
Expand Down
2 changes: 1 addition & 1 deletion test/test_structure_parsers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from src.date_parser.structure_parsers import match_interval, match_multi_match
from hun_date_parser.date_parser.structure_parsers import match_interval, match_multi_match


def test_match_interval():
Expand Down
4 changes: 2 additions & 2 deletions test/test_time_parsers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from src.utils import Daypart, Hour, Minute
from src.date_parser.time_parsers import match_digi_clock, match_time_words
from hun_date_parser.utils import Daypart, Hour, Minute
from hun_date_parser.date_parser.time_parsers import match_digi_clock, match_time_words


def test_match_digi_clock():
Expand Down
2 changes: 1 addition & 1 deletion test/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from src.utils import word_to_num, num_to_word, remove_accent
from hun_date_parser.utils import word_to_num, num_to_word, remove_accent


def test_remove_accent():
Expand Down

0 comments on commit f96bca6

Please sign in to comment.