Add support for Portuguese (Portugal) (PT) (#198)

* add support for Portuguese (Portugal) (PT) * FIX ADD SUPPORT TO PORTUGUESE (PORTUGAL) [PT] removed venv/ from .gitignore. correction from "milião" to "milhão" done on merge function. removed hack for negword, creating a backup of the original one. * add support PT (Portugal) (PT) * add support PT (Portugal) (PT) * add support PT (Portugal) (PT) * add support PT (Portugal) (PT) new tests
savoirfairelinux · Oct 20, 2018 · 0f63859 · 0f63859
1 parent 6d30cb9
commit 0f63859
Show file tree

Hide file tree

Showing 5 changed files with 711 additions and 105 deletions.
diff --git a/README.rst b/README.rst
@@ -96,6 +96,7 @@ Besides the numerical argument, there are two main optional arguments.
 * ``lv`` (Latvian)
 * ``no`` (Norwegian)
 * ``pl`` (Polish)
+* ``pt`` (Portuguese)
 * ``pt_BR`` (Portuguese - Brazilian)
 * ``ro`` (Romanian)
 * ``ru`` (Russian)

diff --git a/num2words/__init__.py b/num2words/__init__.py
@@ -36,6 +36,7 @@
 from . import lang_JA
 from . import lang_NO
 from . import lang_DK
+from . import lang_PT
 from . import lang_PT_BR
 from . import lang_HE
 from . import lang_IT
@@ -72,6 +73,7 @@
     'sl': lang_SL.Num2Word_SL(),
     'no': lang_NO.Num2Word_NO(),
     'dk': lang_DK.Num2Word_DK(),
+    'pt': lang_PT.Num2Word_PT(),
     'pt_BR': lang_PT_BR.Num2Word_PT_BR(),
     'he': lang_HE.Num2Word_HE(),
     'it': lang_IT.Num2Word_IT(),

diff --git a/num2words/lang_PT.py b/num2words/lang_PT.py
@@ -0,0 +1,247 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2003, Taro Ogawa.  All Rights Reserved.
+# Copyright (c) 2013, Savoir-faire Linux inc.  All Rights Reserved.
+
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301 USA
+
+from __future__ import division, unicode_literals
+
+import re
+
+from .lang_EU import Num2Word_EU
+
+DOLLAR = ('dólar', 'dólares')
+CENTS = ('cêntimo', 'cêntimos')
+
+
+class Num2Word_PT(Num2Word_EU):
+
+    CURRENCY_FORMS = {
+        'AUD': (DOLLAR, CENTS),
+        'CAD': (DOLLAR, CENTS),
+        'EUR': (('euro', 'euros'), CENTS),
+        'GBP': (('libra', 'libras'), ('péni', 'pence')),
+        'USD': (DOLLAR, CENTS),
+    }
+
+    GIGA_SUFFIX = None
+    MEGA_SUFFIX = "ilião"
+
+    def setup(self):
+        super(Num2Word_PT, self).setup()
+        lows = ["quatr", "tr", "b", "m"]
+        self.high_numwords = self.gen_high_numwords([], [], lows)
+        self.negword = "menos "
+        self.pointword = "vírgula"
+        self.exclude_title = ["e", "vírgula", "menos"]
+
+        self.mid_numwords = [
+            (1000, "mil"), (100, "cem"), (90, "noventa"),
+            (80, "oitenta"), (70, "setenta"), (60, "sessenta"),
+            (50, "cinquenta"), (40, "quarenta"), (30, "trinta")
+        ]
+        self.low_numwords = [
+            "vinte", "dezanove", "dezoito", "dezassete", "dezasseis",
+            "quinze", "catorze", "treze", "doze", "onze", "dez",
+            "nove", "oito", "sete", "seis", "cinco", "quatro", "três", "dois",
+            "um", "zero"
+        ]
+        self.ords = [
+            {
+                0: "",
+                1: "primeiro",
+                2: "segundo",
+                3: "terceiro",
+                4: "quarto",
+                5: "quinto",
+                6: "sexto",
+                7: "sétimo",
+                8: "oitavo",
+                9: "nono",
+            },
+            {
+                0: "",
+                1: "décimo",
+                2: "vigésimo",
+                3: "trigésimo",
+                4: "quadragésimo",
+                5: "quinquagésimo",
+                6: "sexagésimo",
+                7: "septuagésimo",
+                8: "octogésimo",
+                9: "nonagésimo",
+            },
+            {
+                0: "",
+                1: "centésimo",
+                2: "ducentésimo",
+                3: "tricentésimo",
+                4: "quadrigentésimo",
+                5: "quingentésimo",
+                6: "seiscentésimo",
+                7: "septigentésimo",
+                8: "octigentésimo",
+                9: "nongentésimo",
+            },
+        ]
+        self.thousand_separators = {
+            3: "milésimo",
+            6: "milionésimo",
+            9: "milésimo milionésimo",
+            12: "bilionésimo",
+            15: "milésimo bilionésimo"
+        }
+        self.hundreds = {
+            1: "cento",
+            2: "duzentos",
+            3: "trezentos",
+            4: "quatrocentos",
+            5: "quinhentos",
+            6: "seiscentos",
+            7: "setecentos",
+            8: "oitocentos",
+            9: "novecentos",
+        }
+
+    def merge(self, curr, next):
+        ctext, cnum, ntext, nnum = curr + next
+
+        if cnum == 1:
+            if nnum < 1000000:
+                return next
+            ctext = "um"
+        elif cnum == 100 and not nnum % 1000 == 0:
+            ctext = "cento"
+
+        if nnum < cnum:
+            if cnum < 100:
+                return ("%s e %s" % (ctext, ntext), cnum + nnum)
+            return ("%s e %s" % (ctext, ntext), cnum + nnum)
+
+        elif (not nnum % 1000000000) and cnum > 1:
+            ntext = ntext[:-4] + "liões"
+        elif (not nnum % 1000000) and cnum > 1:
+            ntext = ntext[:-4] + "lhões"
+        # correct "milião" to "milhão"
+        if ntext == 'milião':
+            ntext = 'milhão'
+        if nnum == 100:
+            ctext = self.hundreds[cnum]
+            ntext = ""
+
+        else:
+            ntext = " " + ntext
+
+        return (ctext + ntext, cnum * nnum)
+
+    def to_cardinal(self, value):
+        result = super(Num2Word_PT, self).to_cardinal(value)
+
+        # Transforms "mil e cento e catorze" into "mil cento e catorze"
+        # Transforms "cem milhões e duzentos mil e duzentos e dez" em "cem
+        # milhões duzentos mil duzentos e dez" but "cem milhões e duzentos
+        # mil e duzentos" in "cem milhões duzentos mil e duzentos" and not in
+        # "cem milhões duzentos mil duzentos"
+        for ext in (
+                'mil', 'milhão', 'milhões', 'mil milhões',
+                'bilião', 'biliões', 'mil biliões'):
+            if re.match('.*{} e \w*entos? (?=.*e)'.format(ext), result):
+                result = result.replace(
+                    '{} e'.format(ext), '{}'.format(ext)
+                )
+
+        return result
+
+    # for the ordinal conversion the code is similar to pt_BR code,
+    # although there are other rules that are probably more correct in
+    # Portugal. Concerning numbers from 2000th on, saying "dois
+    # milésimos" instead of "segundo milésimo" (the first number
+    # would be used in the cardinal form instead of the ordinal) is better.
+    # This was not implemented.
+    # source:
+    # https://ciberduvidas.iscte-iul.pt/consultorio/perguntas/a-forma-por-extenso-de-2000-e-de-outros-ordinais/16428
+    def to_ordinal(self, value):
+        # Before changing this function remember this is used by pt-BR
+        # so act accordingly
+        self.verify_ordinal(value)
+
+        result = []
+        value = str(value)
+        thousand_separator = ''
+
+        for idx, char in enumerate(value[::-1]):
+            if idx and idx % 3 == 0:
+                thousand_separator = self.thousand_separators[idx]
+
+            if char != '0' and thousand_separator:
+                # avoiding "segundo milionésimo milésimo" for 6000000,
+                # for instance
+                result.append(thousand_separator)
+                thousand_separator = ''
+
+            result.append(self.ords[idx % 3][int(char)])
+
+        result = ' '.join(result[::-1])
+        result = result.strip()
+        result = re.sub('\s+', ' ', result)
+
+        if result.startswith('primeiro') and value != '1':
+            # avoiding "primeiro milésimo", "primeiro milionésimo" and so on
+            result = result[9:]
+
+        return result
+
+    def to_ordinal_num(self, value):
+        # Before changing this function remember this is used by pt-BR
+        # so act accordingly
+        self.verify_ordinal(value)
+        return "%sº" % (value)
+
+    def to_year(self, val, longval=True):
+        # Before changing this function remember this is used by pt-BR
+        # so act accordingly
+        if val < 0:
+            return self.to_cardinal(abs(val)) + ' antes de Cristo'
+        return self.to_cardinal(val)
+
+    def to_currency(self, val, currency='EUR', cents=True, seperator=' e',
+                    adjective=False):
+        # change negword because base.to_currency() does not need space after
+        backup_negword = self.negword
+        self.negword = self.negword[:-1]
+        result = super(Num2Word_PT, self).to_currency(
+            val, currency=currency, cents=cents, seperator=seperator,
+            adjective=adjective)
+        # undo the change on negword
+        self.negword = backup_negword
+
+        # transforms "milhões euros" em "milhões de euros"
+        try:
+            cr1, _ = self.CURRENCY_FORMS[currency]
+        except KeyError:
+            raise NotImplementedError(
+                'Currency code "%s" not implemented for "%s"' %
+                (currency, self.__class__.__name__))
+
+        for ext in (
+                'milhão', 'milhões', 'bilião',
+                'biliões', 'trilião', 'triliões'):
+            if re.match('.*{} (?={})'.format(ext, cr1[1]), result):
+                result = result.replace(
+                    '{}'.format(ext), '{} de'.format(ext), 1
+                )
+        # do not print "e zero cêntimos"
+        result = result.replace(' e zero cêntimos', '')
+        return result