In [53]:
import arrow
from datetime import datetime as dt 

In [60]:
from enum import Enum
class Token(Enum):
    NUM=0
    PLUS=1
    MINUS=2
    DAY=3
    WEEK=4
    MONTH=5
    END=6

In [79]:
class DateTranslator:
    def __init__(self, init_date=None):
        if init_date is None:
            self.data = arrow.now()
        else:
            self.data = dt.strptime(init_date, "%Y%m%d")
            self.data = arrow.get(self.data)
          
                
    def tokenizer(self, date_phrase):
        #return list of tokens
        tokens=[]
        token_map={'מחר':((Token.PLUS,0),(Token.NUM,1),(Token.DAY,0)),
                  'אתמול':((Token.MINUS,0),(Token.NUM,1),(Token.DAY,0)),
                  'היום': None,
                   'בעוד':Token.PLUS,
                  'לפני':Token.MINUS,
                  'חודשים':Token.MONTH,
                  'שבועות':Token.WEEK,
                  'ימים':Token.DAY,
                  'חודש':((Token.NUM, 1),(Token.MONTH, 0)),
                  'שבוע':((Token.NUM, 1),(Token.WEEK, 0)),
                  'יום':((Token.NUM, 1),(Token.DAY, 0)),
                  'חודשיים':((Token.NUM, 2),(Token.MONTH, 0)),
                  'שבועיים':((Token.NUM, 2),(Token.WEEK, 0)),
                  'יומיים':((Token.NUM, 2),(Token.DAY, 0)),
                  '.':Token.END}
        
        num_to_int={'אחד':1,
                  'שניים':2,
                  'שני': 2,
                   'שנים':2,
                  'שלושה':3,
                  'ארבעה':4,
                  'חמישה':5,
                  'שישה':6,
                  'שבעה':7,
                  'שמונה':8,
                  'תשעה':9,
                  'עשרה':10,
                  'עשר':10,
                  'עשרים':20,
                  'שלושים':30}
        
        parts=date_phrase[:-1].split()+['.']
        for p in parts:
            if p[0]=='ו':
                p = p[1:]
            if p in token_map:
                if type(token_map[p])==tuple:
                    tokens.extend(token_map[p])
                else:
                    tokens.append((token_map[p],0))
            else:
                tokens.append((Token.NUM, num_to_int[p]))

        return tokens
    

  
    def translate_date(self, date_tokens):
        #return a string with the translated date in format yyyymmdd
        sign = 0
        time = 0
        for element in date_tokens:
            if element[0] == Token.MINUS:
                sign = -1
            if element[0] == Token.PLUS:
                sign = 1
            if element[0] == Token.NUM:
                time += element[1] 
            if element[0] == Token.DAY:
                dat = sign*time
                self.data = self.data.shift(days=dat) 
                time = 0
            if element[0] == Token.WEEK:
                dat = sign*time
                self.data = self.data.shift(weeks=dat) 
                time = 0
            if element[0] == Token.MONTH:
                dat = sign*time
                self.data = self.data.shift(months=dat) 
                time = 0
        self.data =  self.data.format('YYYYMMDD')
        
        
    def run(self, date_phrase):
        #return translated date in format yyyymmdd
        token_list = self.tokenizer(date_phrase)
        self.translate_date(token_list)
        return self.data

In [80]:
def main():
    #driver function
    date_translator=DateTranslator('20200611')
    phrase = "בעוד שלושה ימים."
    print(date_translator.run(phrase))

main()

20200614


In [62]:
data = ["לפני חמישה ימים.",
"לפני יומיים.",
"בעוד שלושה שבועות.",
"בעוד שבוע וארבעה ימים.",
"לפני שלושה חודשים וארבעה ימים.",
"לפני חודשיים ויום אחד.",
"לפני חודש אחד עשר שבועות ויומיים."]

In [73]:
def main():
    #driver function
    for phrase in data:
        date_translator=DateTranslator('20200611')
        print(date_translator.run(phrase))

main()

20200606
20200609
20200702
20200622
20200307
20200410
20200222
