Skip to content

Commit

Permalink
Merge branch 'master' into adding-swedish-language
Browse files Browse the repository at this point in the history
  • Loading branch information
waqasshabbir committed May 21, 2017
2 parents 58c8ffc + 29ba09b commit f563ee2
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 13 deletions.
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ Supported languages
* Hebrew
* Hindi
* Hungarian
* Georgian
* German
* Indonesian
* Italian
Expand Down
92 changes: 92 additions & 0 deletions data/languagefiles/ka.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
name: Georgian

dateorder: DMY

skip: ["ის", "ზე", "დაახლოებით", "და"]

monday:
- ორშაბათი
- ორშ
tuesday:
- სამშაბათი
- სამ
wednesday:
- ოთხშაბათი
- ოთხ
thursday:
- ხუთშაბათი
- ხუთ
friday:
- პარასკევი
- პარ
saturday:
- შაბათი
- შაბ
sunday:
- კვირა
- კვი

january:
- იანვარი
- იან
february:
- თებერვალი
- თებ
march:
- მარტი
- მარ
april:
- აპრილი
- აპრ
may:
- მაისი
- მაი
june:
- ივნისი
- ივნ
july:
- ივლისი
- ივლ
august:
- აგვისტო
- აგვ
september:
- სექტემბერი
- სექ
october:
- ოქტომბერი
- ოქტ
november:
- ნოემბერი
- ნოე
december:
- დეკემბერი
- დეკ

year:
- წლის
- წ.
month:
- თვე
week:
- კვირა
day:
- დღე
hour:
- საათი
minute:
- წუთი
second:
- წამი

ago:
- წინ
in:
- დღეიდან

simplifications:
- ახლა: now
- გუშინ: 1 დღე
- ხვალ: დღეიდან 1 დღე
- დღეს: 0 დღე
- ერთ: 1
4 changes: 3 additions & 1 deletion data/languages.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
base:
skip: [" ", ".", ",", ";", "-", "/", "'", "|", "@", "[", "]", ","]

languageorder: ['en', 'ar', 'be', 'bg', 'bn', 'cs', 'da', 'de', 'es', 'fa', 'fi', 'fr', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'th', 'tl', 'tr', 'uk', 'vi', 'zh']

languageorder: ['en', 'ar', 'be', 'bg', 'bn', 'cs', 'da', 'de', 'es', 'fa', 'fi', 'fr', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'th', 'tl', 'tr', 'uk', 'vi', 'zh']

ar: !include languagefiles/ar.yaml
be: !include languagefiles/be.yaml
Expand All @@ -21,6 +22,7 @@ hu: !include languagefiles/hu.yaml
id: !include languagefiles/id.yaml
it: !include languagefiles/it.yaml
ja: !include languagefiles/ja.yaml
ka: !include languagefiles/ka.yaml
nl: !include languagefiles/nl.yaml
pl: !include languagefiles/pl.yaml
pt: !include languagefiles/pt.yaml
Expand Down
23 changes: 17 additions & 6 deletions dateparser/languages/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class Language(object):
_dictionary = None
_normalized_dictionary = None
_simplifications = None
_simplification_patterns = None
_normalized_simplifications = None
_splitters = None
_wordchars = None
Expand Down Expand Up @@ -59,14 +60,24 @@ def translate(self, date_string, keep_formatting=False, settings=None):
def _simplify(self, date_string, settings=None):
date_string = date_string.lower()
for simplification in self._get_simplifications(settings=settings):
pattern, replacement = list(simplification.items())[0]
if not self.info.get('no_word_spacing', False):
replacement = wrap_replacement_for_regex(replacement, pattern)
pattern = r'(\A|\d|_|\W)%s(\d|_|\W|\Z)' % pattern
date_string = re.sub(
pattern, replacement, date_string, flags=re.IGNORECASE | re.UNICODE).lower()
pattern, replacement = self._get_simplification_substitution(simplification)
date_string = pattern.sub(replacement, date_string).lower()
return date_string

def _get_simplification_substitution(self, simplification):
pattern, replacement = list(simplification.items())[0]
if not self.info.get('no_word_spacing', False):
replacement = wrap_replacement_for_regex(replacement, pattern)
pattern = r'(\A|\d|_|\W)%s(\d|_|\W|\Z)' % pattern

if self._simplification_patterns is None:
self._simplification_patterns = {}

if pattern not in self._simplification_patterns:
self._simplification_patterns[pattern] = re.compile(pattern, flags=re.IGNORECASE | re.UNICODE)
pattern = self._simplification_patterns[pattern]
return pattern, replacement

def _clear_future_words(self, words):
freshness_words = set(['day', 'week', 'month', 'year', 'hour', 'minute', 'second'])
if set(words).isdisjoint(freshness_words):
Expand Down
3 changes: 3 additions & 0 deletions tests/test_date_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,9 @@ def setUp(self):
param('ग्यारह जुलाई 1994, 11:12',datetime(1994, 7, 11, 11, 12)),
param('१७ अक्टूबर २०१८',datetime(2018, 10, 17, 0, 0)),
param('12 जनवरी 1997 11:08 अपराह्न',datetime(1997, 1, 12, 23, 8)),
# Georgian dates
param('2011 წლის 17 მარტი, ოთხშაბათი', datetime(2011, 3, 17, 0, 0)),
param('2015 წ. 12 ივნ, 15:34', datetime(2015, 6, 12, 15, 34))
])
def test_dates_parsing(self, date_string, expected):
self.given_parser(settings={'NORMALIZE': False,
Expand Down
11 changes: 6 additions & 5 deletions tests/test_freshness_date_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime, timedelta, date, time
from functools import wraps
import pytz
import regex as re

from dateutil.relativedelta import relativedelta
from mock import Mock, patch
Expand Down Expand Up @@ -308,7 +309,7 @@ def setUp(self):
ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
period='day'),
# param('এখন', ago={'seconds': 0}, period='day'),
# Hindi dates
param('1 घंटे पहले', ago={'hours': 1},period='day'),
param('15 मिनट पहले',ago={'minutes':15},period='day'),
Expand All @@ -317,7 +318,7 @@ def setUp(self):
param('1 वर्ष 7 महीने', ago={'years': 1, 'months': 7}, period='month'),
param('आज', ago={'days': 0}, period='day'),
])

def test_relative_past_dates(self, date_string, ago, period):
self.given_parser(settings={'NORMALIZE': False})
self.given_date_string(date_string)
Expand Down Expand Up @@ -581,7 +582,7 @@ def test_relative_past_dates(self, date_string, ago, period):
ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
period='day'),
# param('এখন', ago={'seconds': 0}, period='day'),
# Hindi dates
param('1 घंटे पहले', ago={'hours': 1},period='day'),
param('15 मिनट पहले',ago={'minutes':15},period='day'),
Expand Down Expand Up @@ -680,7 +681,7 @@ def test_normalized_relative_dates(self, date_string, ago, period):
param('gelecek hafta', in_future={'weeks': 1}, period='week'),
param('gelecek ay', in_future={'months': 1}, period='month'),
param('gelecek yıl', in_future={'years': 1}, period='year'),
#Hindi dates
#param('1 वर्ष 10 महीने में', in_future={'years': 1, 'months': 10}, period='month'),
param('15 घंटे बाद', in_future={'hours': 15}, period='day'),
Expand Down Expand Up @@ -716,7 +717,7 @@ def test_dates_not_supported_by_date_time(self, date_string):
self.given_parser()
self.given_date_string(date_string)
self.when_date_is_parsed()
self.then_error_was_raised(ValueError, ['year is out of range',
self.then_error_was_raised(ValueError, ['is out of range',
"('year must be in 1..9999'"])

@parameterized.expand([
Expand Down
6 changes: 5 additions & 1 deletion tests/test_languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,11 @@ def test_translation(self, shortname, datetime_string, expected_translation):
param('sv', "förrgår", "2 day"),
param('sv', "förra månaden", "1 month"),
param('sv', "nästa månad", "in 1 month"),
# Georgian
param('ka', 'გუშინ', '1 day'),
param('ka', 'დღეს', '0 day'),
param('ka', 'ერთ თვე', '1 month'),
param('ka', 'დღეიდან ერთ კვირა', 'in 1 week'),
])
def test_freshness_translation(self, shortname, datetime_string, expected_translation):
# Finnish language use "t" as hour, so empty SKIP_TOKENS.
Expand Down

0 comments on commit f563ee2

Please sign in to comment.