From ac56fcc66f984401799df6ea350bf16b548e742c Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 4 May 2020 12:57:06 -0400 Subject: [PATCH] PEP 615: Tests and implementation for zoneinfo This is the initial implementation of PEP 615, the zoneinfo module, ported from the standalone reference implementation (see https://www.python.org/dev/peps/pep-0615/#reference-implementation for a link, which has a more detailed commit history). This includes (hopefully) all functional elements described in the PEP, but documentation is found in a separate PR. This includes: 1. A pure python implementation of the ZoneInfo class 2. A C accelerated implementation of the ZoneInfo class 3. Tests with 100% branch coverage for the Python code (though C code coverage is less than 100%). 4. A compile-time configuration option on Linux (though not on Windows) Differences from the reference implementation: - The module is arranged slightly differently: the accelerated module is `_zoneinfo` rather than `zoneinfo._czoneinfo`, which also necessitates some changes in the test support function. (Suggested by Victor Stinner and Steve Dower.) - The tests are arranged slightly differently and do not include the property tests. The tests live at test/test_zoneinfo/test_zoneinfo.py rather than test/test_zoneinfo.py or test/test_zoneinfo/__init__.py because we may do some refactoring in the future that would likely require this separation anyway; we may: - include the property tests - automatically run all the tests against both pure Python and C, rather than manually constructing C and Python test classes (similar to the way this works with test_datetime.py, which generates C and Python test cases from datetimetester.py). - This includes a compile-time configuration option on Linux (though not on Windows); added with much help from Thomas Wouters. - Integration into the CPython build system is obviously different from building a standalone zoneinfo module wheel. - This includes configuration to install the tzdata package as part of CI, though only on the coverage jobs. Introducing a PyPI dependency as part of the CI build was controversial, and this is seen as less of a major change, since the coverage jobs already depend on pip and PyPI. Additional changes that were introduced as part of this PR, most / all of which were backported to the reference implementation: - Fixed reference and memory leaks With much debugging help from Pablo Galindo - Added smoke tests ensuring that the C and Python modules are built The import machinery can be somewhat fragile, and the "seamlessly falls back to pure Python" nature of this module makes it so that a problem building the C extension or a failure to import the pure Python version might easily go unnoticed. - Adjustments to zoneinfo.__dir__ Suggested by Petr Viktorin. - Slight refactorings as suggested by Steve Dower. - Removed unnecessary if check on std_abbr Discovered this because of a missing line in branch coverage. --- .github/workflows/coverage.yml | 1 + .travis.yml | 1 + Lib/sysconfig.py | 1 + Lib/test/test_zoneinfo/__init__.py | 1 + Lib/test/test_zoneinfo/__main__.py | 3 + Lib/test/test_zoneinfo/_support.py | 76 + .../test_zoneinfo/data/update_test_data.py | 122 + .../test_zoneinfo/data/zoneinfo_data.json | 190 ++ Lib/test/test_zoneinfo/test_zoneinfo.py | 1994 ++++++++++++ Lib/zoneinfo/__init__.py | 29 + Lib/zoneinfo/_common.py | 166 + Lib/zoneinfo/_tzpath.py | 110 + Lib/zoneinfo/_zoneinfo.py | 755 +++++ Makefile.pre.in | 3 + Misc/requirements-test.txt | 1 + Modules/Setup | 1 + Modules/_zoneinfo.c | 2695 +++++++++++++++++ PCbuild/_zoneinfo.vcxproj | 109 + PCbuild/_zoneinfo.vcxproj.filters | 16 + PCbuild/lib.pyproj | 8 + PCbuild/pcbuild.proj | 2 +- PCbuild/pcbuild.sln | 2 + PCbuild/readme.txt | 1 + Tools/msi/lib/lib_files.wxs | 2 +- configure | 46 + configure.ac | 36 + setup.py | 14 + 27 files changed, 6383 insertions(+), 2 deletions(-) create mode 100644 Lib/test/test_zoneinfo/__init__.py create mode 100644 Lib/test/test_zoneinfo/__main__.py create mode 100644 Lib/test/test_zoneinfo/_support.py create mode 100644 Lib/test/test_zoneinfo/data/update_test_data.py create mode 100644 Lib/test/test_zoneinfo/data/zoneinfo_data.json create mode 100644 Lib/test/test_zoneinfo/test_zoneinfo.py create mode 100644 Lib/zoneinfo/__init__.py create mode 100644 Lib/zoneinfo/_common.py create mode 100644 Lib/zoneinfo/_tzpath.py create mode 100644 Lib/zoneinfo/_zoneinfo.py create mode 100644 Misc/requirements-test.txt create mode 100644 Modules/_zoneinfo.c create mode 100644 PCbuild/_zoneinfo.vcxproj create mode 100644 PCbuild/_zoneinfo.vcxproj.filters diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 75bdf83f6c5dbb..6dd973bf8e4ad7 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -48,6 +48,7 @@ jobs: ./python -m venv .venv source ./.venv/bin/activate python -m pip install -U coverage + python -m pip install -r Misc/requirements-test.txt python -m test.pythoninfo - name: 'Tests with coverage' run: > diff --git a/.travis.yml b/.travis.yml index 3c2fb4bdc78755..133385fbf5c719 100644 --- a/.travis.yml +++ b/.travis.yml @@ -87,6 +87,7 @@ matrix: # Need a venv that can parse covered code. - ./python -m venv venv - ./venv/bin/python -m pip install -U coverage + - ./venv/bin/python -m pip install -r Misc/requirements-test.txt - ./venv/bin/python -m test.pythoninfo script: # Skip tests that re-run the entire test suite. diff --git a/Lib/sysconfig.py b/Lib/sysconfig.py index eaee837f10e330..bf04ac541e6b02 100644 --- a/Lib/sysconfig.py +++ b/Lib/sysconfig.py @@ -546,6 +546,7 @@ def get_config_vars(*args): if os.name == 'nt': _init_non_posix(_CONFIG_VARS) + _CONFIG_VARS['TZPATH'] = '' if os.name == 'posix': _init_posix(_CONFIG_VARS) # For backward compatibility, see issue19555 diff --git a/Lib/test/test_zoneinfo/__init__.py b/Lib/test/test_zoneinfo/__init__.py new file mode 100644 index 00000000000000..98cc4412ae16c2 --- /dev/null +++ b/Lib/test/test_zoneinfo/__init__.py @@ -0,0 +1 @@ +from .test_zoneinfo import * diff --git a/Lib/test/test_zoneinfo/__main__.py b/Lib/test/test_zoneinfo/__main__.py new file mode 100644 index 00000000000000..5cc4e055d5e660 --- /dev/null +++ b/Lib/test/test_zoneinfo/__main__.py @@ -0,0 +1,3 @@ +import unittest + +unittest.main('test.test_zoneinfo') diff --git a/Lib/test/test_zoneinfo/_support.py b/Lib/test/test_zoneinfo/_support.py new file mode 100644 index 00000000000000..6bd8d8dc0fbfee --- /dev/null +++ b/Lib/test/test_zoneinfo/_support.py @@ -0,0 +1,76 @@ +import contextlib +import functools +import sys +import threading +import unittest +from test.support import import_fresh_module + +OS_ENV_LOCK = threading.Lock() +TZPATH_LOCK = threading.Lock() +TZPATH_TEST_LOCK = threading.Lock() + + +def call_once(f): + """Decorator that ensures a function is only ever called once.""" + lock = threading.Lock() + cached = functools.lru_cache(None)(f) + + @functools.wraps(f) + def inner(): + with lock: + return cached() + + return inner + + +@call_once +def get_modules(): + """Retrieve two copies of zoneinfo: pure Python and C accelerated. + + Because this function manipulates the import system in a way that might + be fragile or do unexpected things if it is run many times, it uses a + `call_once` decorator to ensure that this is only ever called exactly + one time — in other words, when using this function you will only ever + get one copy of each module rather than a fresh import each time. + """ + import zoneinfo as c_module + + py_module = import_fresh_module("zoneinfo", blocked=["_zoneinfo"]) + + return py_module, c_module + + +@contextlib.contextmanager +def set_zoneinfo_module(module): + """Make sure sys.modules["zoneinfo"] refers to `module`. + + This is necessary because `pickle` will refuse to serialize + an type calling itself `zoneinfo.ZoneInfo` unless `zoneinfo.ZoneInfo` + refers to the same object. + """ + + NOT_PRESENT = object() + old_zoneinfo = sys.modules.get("zoneinfo", NOT_PRESENT) + sys.modules["zoneinfo"] = module + yield + if old_zoneinfo is not NOT_PRESENT: + sys.modules["zoneinfo"] = old_zoneinfo + else: # pragma: nocover + sys.modules.pop("zoneinfo") + + +class ZoneInfoTestBase(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.klass = cls.module.ZoneInfo + super().setUpClass() + + @contextlib.contextmanager + def tzpath_context(self, tzpath, lock=TZPATH_LOCK): + with lock: + old_path = self.module.TZPATH + try: + self.module.reset_tzpath(tzpath) + yield + finally: + self.module.reset_tzpath(old_path) diff --git a/Lib/test/test_zoneinfo/data/update_test_data.py b/Lib/test/test_zoneinfo/data/update_test_data.py new file mode 100644 index 00000000000000..f531ab316a1f21 --- /dev/null +++ b/Lib/test/test_zoneinfo/data/update_test_data.py @@ -0,0 +1,122 @@ +""" +Script to automatically generate a JSON file containing time zone information. + +This is done to allow "pinning" a small subset of the tzdata in the tests, +since we are testing properties of a file that may be subject to change. For +example, the behavior in the far future of any given zone is likely to change, +but "does this give the right answer for this file in 2040" is still an +important property to test. + +This must be run from a computer with zoneinfo data installed. +""" +from __future__ import annotations + +import base64 +import functools +import json +import lzma +import pathlib +import textwrap +import typing + +import zoneinfo + +KEYS = [ + "Africa/Abidjan", + "Africa/Casablanca", + "America/Los_Angeles", + "America/Santiago", + "Asia/Tokyo", + "Australia/Sydney", + "Europe/Dublin", + "Europe/Lisbon", + "Europe/London", + "Pacific/Kiritimati", + "UTC", +] + +TEST_DATA_LOC = pathlib.Path(__file__).parent + + +@functools.lru_cache(maxsize=None) +def get_zoneinfo_path() -> pathlib.Path: + """Get the first zoneinfo directory on TZPATH containing the "UTC" zone.""" + key = "UTC" + for path in map(pathlib.Path, zoneinfo.TZPATH): + if (path / key).exists(): + return path + else: + raise OSError("Cannot find time zone data.") + + +def get_zoneinfo_metadata() -> typing.Dict[str, str]: + path = get_zoneinfo_path() + + tzdata_zi = path / "tzdata.zi" + if not tzdata_zi.exists(): + # tzdata.zi is necessary to get the version information + raise OSError("Time zone data does not include tzdata.zi.") + + with open(tzdata_zi, "r") as f: + version_line = next(f) + + _, version = version_line.strip().rsplit(" ", 1) + + if ( + not version[0:4].isdigit() + or len(version) < 5 + or not version[4:].isalpha() + ): + raise ValueError( + "Version string should be YYYYx, " + + "where YYYY is the year and x is a letter; " + + f"found: {version}" + ) + + return {"version": version} + + +def get_zoneinfo(key: str) -> bytes: + path = get_zoneinfo_path() + + with open(path / key, "rb") as f: + return f.read() + + +def encode_compressed(data: bytes) -> typing.List[str]: + compressed_zone = lzma.compress(data) + raw = base64.b85encode(compressed_zone) + + raw_data_str = raw.decode("utf-8") + + data_str = textwrap.wrap(raw_data_str, width=70) + return data_str + + +def load_compressed_keys() -> typing.Dict[str, typing.List[str]]: + output = {key: encode_compressed(get_zoneinfo(key)) for key in KEYS} + + return output + + +def update_test_data(fname: str = "zoneinfo_data.json") -> None: + TEST_DATA_LOC.mkdir(exist_ok=True, parents=True) + + # Annotation required: https://github.com/python/mypy/issues/8772 + json_kwargs: typing.Dict[str, typing.Any] = dict( + indent=2, sort_keys=True, + ) + + compressed_keys = load_compressed_keys() + metadata = get_zoneinfo_metadata() + output = { + "metadata": metadata, + "data": compressed_keys, + } + + with open(TEST_DATA_LOC / fname, "w") as f: + json.dump(output, f, **json_kwargs) + + +if __name__ == "__main__": + update_test_data() diff --git a/Lib/test/test_zoneinfo/data/zoneinfo_data.json b/Lib/test/test_zoneinfo/data/zoneinfo_data.json new file mode 100644 index 00000000000000..ec4414a0cdedbe --- /dev/null +++ b/Lib/test/test_zoneinfo/data/zoneinfo_data.json @@ -0,0 +1,190 @@ +{ + "data": { + "Africa/Abidjan": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j-~f{VGF<>F7KxBg5R*{Ksocg8-YYVul=v7vZzaHN", + "uC=da5UI2rH18c!OnjV{y4u(+A!!VBKmY&$ORw>7UO^(500B;v0RR91bXh%WvBYQl0ssI2", + "00dcD" + ], + "Africa/Casablanca": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j;0b&Kz+C_;7KxBg5R*{N&yjMUR~;C-fDaSOU;q-~", + "FqW+4{YBjbcw}`a!dW>b)R2-0a+uwf`P3{_Y@HuCz}S$J$ZJ>R_V<~|Fk>sgX4=%0vUrh-", + "lt@YP^Wrus;j?`Th#xRPzf<<~Hp4DH^gZX>d{+WOp~HNu8!{uWu}&XphAd{j1;rB4|9?R!", + "pqruAFUMt8#*WcrVS{;kLlY(cJRV$w?d2car%Rs>q9BgTU4", + "Ht-tQKZ7Z`9QqOb?R#b%z?rk>!CkH7jy3wja4NG2q)H}fNRKg8v{);Em;K3Cncf4C6&Oaj", + "V+DbX%o4+)CV3+e!Lm6dutu(0BQpH1T?W(~cQtKV*^_Pdx!LirjpTs?Bmt@vktjLq4;)O!", + "rrly=c*rwTwMJFd0I57`hgkc?=nyI4RZf9W$6DCWugmf&)wk^tWH17owj=#PGH7Xv-?9$j", + "njwDlkOE+BFNR9YXEmBpO;rqEw=e2IR-8^(W;8ma?M3JVd($2T>IW+0tk|Gm8>ftukRQ9J", + "8k3brzqMnVyjsLI-CKneFa)Lxvp_aq40f}0J3VVoWL5rox", + "`Kptivcp}o5xA^@>qNI%?zo=Yj4AMV?kbAA)j(1%)+Pp)bSn+7Yk`M{oE}L-Z!G6OMr5G+h", + "p)$3Lg{ono{4cN>Vr&>L4kXH;_VnBL5U!LgzqE%P7QQ*tue}O`3(TZ0`aKn&~8trOQ-rBXCp)f@P6RMO4l0+;b|5-pk9_ryNh}Zc*v%mvz_#", + "yd6fjB0g9{MmMnu8bG%#C~ugXK^S^k@?ab#", + "O|aE>dDTt4s4n69(~@t~!wniV%g7khFx~I*4>Y|V$4j5%KPF*-FyKIi@!Ho&", + "x8QQsksYt8)D+W)Ni!=G`ogSu^vLL-l#7A7=iIAKL2SuZk9F}NfNk86VI)9WZE?%2wC-ya", + "F~z#Qsq)LH0|_D8^5fU8X%GeQ4TB>R-dlziA&tZe&1ada208!$nk`7bOFO2S00G`Z@1A~t&lyL{p{eM{5)QGf7Mo5FW9==mlyXJt2", + "UwpntR7H0eSq!(aYq#aqUz&RM*tvuMI)AsM?K3-dV3-TT{t)!Iy#JTo=tXkzAM9~j2YbiO", + "ls3(H8Dc>Y|D1aqL51vjLbpYG;GvGTQB4bXuJ%mA;(B4eUpu$$@zv2vVcq-Y)VKbzp^tei", + "uzy}R{LuvDjpuVb`79O+CBmg{Wx!bvx$eu4zRE&", + "PehMb=&G<9$>iZ|bFE)0=4I?KLFGBC0I(0_svgw0%FiMsT%koo*!nEYc6GY@QnU}&4Isg;", + "l=|khi(!VaiSE2=Ny`&&tpi~~;{$uN}%f|7mBhAy;s3YT^sy!$eG~?`9mNJC9@4Bac_p^BZh)Yd_rWW5qh-?tKY(>5VHO", + "L*iT8P@wCavLj^yYbnDR+4ukhS+xPrpl)iqB?u)bj9a2aW==g6G3lCJd>(+Blfr)~^40F4f>cRZ^UF;RibfZ>0m73hR", + "C{$vTfC(STN`g7(B<=Z2556{}0`?p&|Akkst!4Xy4OT;A@c$XTUI3FRRjy*KA7uC56FD)z", + "^X{WV*sr(w!c$W357o!&eLO2wTDNOyw@gf(&R<t;=-Tu1TV{>%8ZVATC9tjD8|(&`$9YHvZ9bVe#>w", + "|8c;Tg|xE&)`*}LwM*E}q}q8^Qja%p`_U)*5DdLI9O@!e=3jFjOCrCq28b_bb;s>%D#iJB", + "CWJi{JH!Js;6nfayos$kq^OEX00HO-lokL0!mqm{vBYQl0ssI200dcD" + ], + "America/Santiago": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j;0fRZ<6QtM7KxBg84(fsEAUJ$J{f-TXlPEUec5Ee", + "n+hsD4lC(QYax=JdSpoyje8%VM`GW}{bJ8@y$A8O&*$pw{(f~Os#}2w", + "eX6^Rgi$IT%n^V^85L>$_c7{cB^#ogV=rHBJGiz-RQNFGK?gdPi|q)j`&8)}KJ{qo6dixa", + "9@yYyVg+%lo0nO+Tw0-w2hJ%mafyWL)|", + ")?W6Bi%FWuGPA1Dru$XR4SZANsAthU2EoKHF6oEtKq`rwP", + "(VNegnI_NI%;ma$)wj{k!@KFB30Yo)IOrl>)$)D|+(5h&+%2vuwGuy^@S8FT^s21V5};>VA9Iu;?8bHz#r<;JtfZDI1(FT@edh0#", + "MYW$A1qkMGIwTZqqdYNE3gl#zp&NbL9Mp=voqN|;?gqR&4$)1`znddtEyuKS*^nMMD=0^>", + "7^z6-C4P67UWOXuMBubP>j6i~03aR@jD^-Y`JSYu#Yp0P8dLLJ0QOPE8=BoiuRX59YW7xg", + "WiexjHX%&0?`ZQCdxCdL^qd1v@kOjQKaWo2Y1++~LcA%FTq?5o%}fX1-RIvlB)1#iTNomGnUL=nM!>Ix|AGtON7!F1O?53kqlC2o-`ZGw*+s", + "NM$^9znsIJMwlgscE`|O3|;BRgsQMYm~`uv+nvuv`nigRa}X=BX=A5Sw$)WEklF7&c>_~$", + "zJ(m--bqXgiN^w-U=BJH9C0Qro(x90zo@rK;&TJ$nI@&k$ORgOb2s%gWbc}ok_27)Eoku~Fq|B-Ps+4J_", + "HPJMLJ2^_)cOU$p&3kNAlrV!)%~6r$BJ>OOi~=-<6byle{?zd4J{NG}o8tw|+#ZNLcpNwk", + "TuPE~sbJB8_RZb2DopStO+Wwux~F#S59zm%00I98;S&G=b(j+6vBYQl0ssI200dcD" + ], + "Asia/Tokyo": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j-~luMgIxeB7KxBg5R*;y?l4Rl4neXH3cv!OtfK@h", + "KZzauI)S!FSDREPhhBS6Fb$&Vv#7%;?Te|>pF^0HBr&z_Tk<%vMW_QqjevRZOp8XVFgP<8", + "TkT#`9H&0Ua;gT1#rZLV0HqbAKK;_z@nO;6t0L}hOdk<>TdUa07R(LPI6@!GU$ty4=mwqHG-XVe*n(Yvgdlr+FqIU18!osi)48t~eWX8)&L", + "G)Ud^0zz@*AF+2r7E}Nf9Y72K~o-T%}D&z%}#7g2br?oH6ZiYH^%>J3D)TPKV(JY*bwjuw5=DsPB@~CrROZeN", + "x>A*H&CHrWt0`EP`m!F%waepl#|w#&`XgVc?~2M3uw$fGX~tf_Il!q#Aa<*8xlzQ2+7r6Z", + "^;Laa9F(WB_O&Dy2r>~@kSi16W{=6+i5GV=Uq~KX*~&HUN4oz7*O(gXIr}sDVcD`Ikgw#|", + "50ssal8s)Qy;?YGCf;*UKKKN!T4!Kqy_G;7PfQapugqvVBKy12v3TVH^L2", + "0?#5*VP~MOYfe$h`*L!7@tiW|_^X1N%<}`7YahiUYtMu5XwmOf3?dr+@zXHwW`z}ZDqZlT", + "<2Cs(<1%M!i6o&VK89BY0J7HPIo;O62s=|IbV^@y$N&#=>i^F00FcHoDl#3", + "Mdv&xvBYQl0ssI200dcD" + ], + "Europe/Dublin": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j;0>b$_+0=h7KxBg5R*;&J77#T_U2R5sleVWFDmK~", + "Kzj5oh@`QKHvW^6V{jU-w>qg1tSt0c^vh;?qAqA0%t?;#S~6U8Qi", + "v&f1s9IH#g$m1k1a#3+lylw4mwT4QnEUUQdwg+xnEcBlgu31bAVabn41OMZVLGz6NDwG%X", + "uQar!b>GI{qSahE`AG}$kRWbuI~JCt;38)Xwbb~Qggs55t+MAHIxgDxzTJ;2xXx99+qCy4", + "45kC#v_l8fx|G&jlVvaciR<-wwf22l%4(t@S6tnX39#_K(4S0fu$FUs$isud9IKzCXB78NkARYq@9Dc0TGkhz);NtM_SSzEffN", + "l{2^*CKGdp52h!52A)6q9fUSltXF{T*Ehc9Q7u8!W7pE(Fv$D$cKUAt6wY=DA1mGgxC*VX", + "q_If3G#FY6-Voj`fIKk`0}Cc72_SD{v>468LV{pyBI33^p0E?}RwDA6Pkq--C~0jF&Z@Pv", + "!dx_1SN_)jwz@P$(oK%P!Tk9?fRjK88yxhxlcFtTjjZ$DYssSsa#ufYrR+}}nKS+r384o~", + "!Uw$nwTbF~qgRsgr0N#d@KIinx%hQB(SJyjJtDtIy(%mDm}ZBGN}dV6K~om|=U", + "VGkbciQ=^$_14|gT21!YQ)@y*Rd0i_lS6gtPBE9+ah%WIJPwzUTjIr+J1XckkmA!6WE16%", + "CVAl{Dn&-)=G$Bjh?bh0$Xt1UDcgXJjXzzojuw0>paV~?Sa`VN3FysqFxTzfKVAu*ucq#+m=|KSSMvp_#@-lwd+q*ue", + "FQ^5<|<0R-u4qYMbRqzSn&", + "Q7jSuvc%b+EZc%>nI(+&0Tl1Y>a6v4`uNFD-7$QrhHgS7Wnv~rDgfH;rQw3+m`LJxoM4v#", + "gK@?|B{RHJ*VxZgk#!p<_&-sjxOda0YaiJ1UnG41VPv(Et%ElzKRMcO$AfgU+Xnwg5p2_+", + "NrnZ1WfEj^fmHd^sx@%JWKkh#zaK0ox%rdP)zUmGZZnqmZ_9L=%6R8ibJH0bOT$AGhDo6{", + "fJ?;_U;D|^>5by2ul@i4Zf()InfFN}00EQ=q#FPL>RM>svBYQl0ssI200dcD" + ], + "Europe/Lisbon": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j;0=rf*IfWA7KxBg5R*;*X|PN+G3LqthM?xgkNUN_", + ")gCt1Sc%YT6^TTomk4yVHXeyvQj8}l<;q&s7K}#Vnc8lII1?)AHh$*>OKUU4S;*h>v*ep0", + "xTi1cK2{aY*|2D*-~K<;-{_W+r@NvZ7-|NZv($ek_C%VfP0xjWeZP#CPXD`IKkakjh(kUd", + "&H)m;^Q(jGjIyiyrcUMtOP)u3A>sw6ux;Bmp3x$4QvQKMx5TrCx_!$srWQuXNs&`9=^IY1", + "yc&C31!sQh7P=Mk*#6x8Z@5^%ehR8UW$OWw0KMw}P1ycI^", + "4eh12oBUOV?S>n*d!+EM@>x#9PZD12iD=zaC;7`8dTfkU_6d}OZvSFSbGgXeKw}XyX@D=(", + ")D0!^DBGr8pXWBT$S-yhLP>Z3ys^VW3}RQ6{NGGVJG6vf*MH93vvNW6yLjie1;{4tVhg-KnSf|G`!", + "Z;j$7gJ1ows~RD=@n7I6aFd8rOR_7Y?E-$clI%1o5gA@O!KPa^(8^iFFeFykI-+z>E$mvp", + "E_h`vbHPjqkLs`Dn-0FV`R@z|h!S(Lb;M&|Exr!biY`%bfp$6`hK;GDhdP|^Q", + "*Ty*}1d41K>H2B{jrjE9aFK>yAQJBX9CD%-384S;0fw`PlprHGS`^b$oS-`I4VH7ji8ou-", + "g|060jfb1XcxiInT0oOoeR7#%e5Ug5#KW)nVSRvLHNe$SQHM@2)`S9L7>RL@Qx%fmm7?3u7P5TywFQ}C@S(pq}|", + "eLPT{C^{<0Q?uU&kSVd%!~8q3;Z0s3OqzF`$HRkePL5Ywgiwn{R(zi+jmOBFrVpW;)@UsU#%$8BcV#h@}m$#!Fglo&bwb78aYqOG_W7h{eb(+39&-mk4EIXq_", + "_`30=8sfA3=!3TO_TyS5X22~?6nKngZ|bq=grdq=9X)3xAkA42L!~rmS)n3w-~;lgz%Fhn", + "(?rXdp2ho~9?wmVs2JwVt~?@FVD%`tN69{(i3oQa;O0$E$lF&~Y#_H6bu6(BiwblJ>;-Fs", + "gA$Y$*?=X)n1pFkKn}F~`>=4)+LLQk?L*P!bhAm0;`N~z3QbUIyVrm%kOZ(n1JJsm0pyb8", + "!GV{d*C!9KXv;4vD4Q>-k#+x(!V5L@w5M>v2V5a`B>t(|B", + "|Fqr4^-{S*%Ep~ojUtx_CRbSQ(uFwu2=KH)Q@EBs@ZqRXn4mU;B!68;;IQs3Ub=n&UU%*m", + "k&zwD36&JSwsN(%k&x?H+tN^6)23c`I0=5^N_R0~1>tsFZ`^`3z~rXSXT&qcwa#n!%+Z#P", + "PG}(D^_CCILXnF|GKwabBh*xFS?4rwGo2vtJUwzrbv_$5PO+`?$l{H-jGB@X%S!OAhw;D4", + "XFycN3!XqQ&EorJOD3>~^U%Luw!jF<;6_q-f-S|6{cQDfZ2(4Xf1MMLr1=SA=MwVf2%Pp%VP;jn)|5Tf!-DbUGn%I-rkYaH7?$$O!t)wwClAisr3eUoeB^~T=U*_P~Y2*KdnO87>B!19sV=xZ5", + "yApq26RxgqA|*tmsvtL#OhcF(C<0EGWHP)BFl?h)_*7!{LoJiv%RsOs!q->n+DcV%9~B@RbC_1G_1g6`Yd~8|%-=2l~oGN!~TVv2Bnk>7wW8L@^?vX$f3AiT)(4nrCuTm9%(XC6Nai", + "E(;}7&=YZagjAN$O-cN;1u{dTkElmB0GT$|Wa)QMmKrx<|LCJ9qlUoFsUbD^H^6_8(w<0{", + "ftj&O1~p_%lh5z;zNV&sP+", + "NF2>iK{8KMUf+)<-)VxXbLxD(alL}N$AT-ogNbJSMMYeX+Z{jS)b8TK^PB=FxyBxzfmFto", + "eo0R`a(%NO?#aEH9|?Cv00000NIsFh6BW2800DjO0RR918Pu^`vBYQl0ssI200dcD" + ], + "UTC": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j-~e#|9bEt_7KxBg5R*|3h1|xhHLji!C57qW6L*|H", + "pEErm00000ygu;I+>V)?00B92fhY-(AGY&-0RR9100dcD" + ] + }, + "metadata": { + "version": "2020a" + } +} \ No newline at end of file diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py new file mode 100644 index 00000000000000..05db03abf25e99 --- /dev/null +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -0,0 +1,1994 @@ +from __future__ import annotations + +import base64 +import contextlib +import dataclasses +import importlib.metadata +import io +import json +import lzma +import os +import pathlib +import pickle +import re +import shutil +import struct +import tempfile +import unittest +from datetime import date, datetime, time, timedelta, timezone + +from . import _support as test_support +from ._support import ( + OS_ENV_LOCK, + TZPATH_LOCK, + TZPATH_TEST_LOCK, + ZoneInfoTestBase, +) + +py_zoneinfo, c_zoneinfo = test_support.get_modules() + +try: + importlib.metadata.metadata("tzdata") + HAS_TZDATA_PKG = True +except importlib.metadata.PackageNotFoundError: + HAS_TZDATA_PKG = False + +ZONEINFO_DATA = None +ZONEINFO_DATA_V1 = None +TEMP_DIR = None +DATA_DIR = pathlib.Path(__file__).parent / "data" +ZONEINFO_JSON = DATA_DIR / "zoneinfo_data.json" + +# Useful constants +ZERO = timedelta(0) +ONE_H = timedelta(hours=1) + + +def setUpModule(): + global TEMP_DIR + global ZONEINFO_DATA + global ZONEINFO_DATA_V1 + + TEMP_DIR = pathlib.Path(tempfile.mkdtemp(prefix="zoneinfo")) + ZONEINFO_DATA = ZoneInfoData(ZONEINFO_JSON, TEMP_DIR / "v2") + ZONEINFO_DATA_V1 = ZoneInfoData(ZONEINFO_JSON, TEMP_DIR / "v1", v1=True) + + +def tearDownModule(): + shutil.rmtree(TEMP_DIR) + + +class TzPathUserMixin: + """ + Adds a setUp() and tearDown() to make TZPATH manipulations thread-safe. + + Any tests that require manipulation of the TZPATH global are necessarily + thread unsafe, so we will acquire a lock and reset the TZPATH variable + to the default state before each test and release the lock after the test + is through. + """ + + @property + def tzpath(self): # pragma: nocover + return None + + def setUp(self): + with contextlib.ExitStack() as stack: + stack.enter_context( + self.tzpath_context(self.tzpath, lock=TZPATH_TEST_LOCK) + ) + self.addCleanup(stack.pop_all().close) + + super().setUp() + + +class DatetimeSubclassMixin: + """ + Replaces all ZoneTransition transition dates with a datetime subclass. + """ + + class DatetimeSubclass(datetime): + @classmethod + def from_datetime(cls, dt): + return cls( + dt.year, + dt.month, + dt.day, + dt.hour, + dt.minute, + dt.second, + dt.microsecond, + tzinfo=dt.tzinfo, + fold=dt.fold, + ) + + def load_transition_examples(self, key): + transition_examples = super().load_transition_examples(key) + for zt in transition_examples: + dt = zt.transition + new_dt = self.DatetimeSubclass.from_datetime(dt) + new_zt = dataclasses.replace(zt, transition=new_dt) + yield new_zt + + +class ZoneInfoTest(TzPathUserMixin, ZoneInfoTestBase): + module = py_zoneinfo + class_name = "ZoneInfo" + + def setUp(self): + super().setUp() + + # This is necessary because various subclasses pull from different + # data sources (e.g. tzdata, V1 files, etc). + self.klass.clear_cache() + + @property + def zoneinfo_data(self): + return ZONEINFO_DATA + + @property + def tzpath(self): + return [self.zoneinfo_data.tzpath] + + def zone_from_key(self, key): + return self.klass(key) + + def zones(self): + return ZoneDumpData.transition_keys() + + def fixed_offset_zones(self): + return ZoneDumpData.fixed_offset_zones() + + def load_transition_examples(self, key): + return ZoneDumpData.load_transition_examples(key) + + def test_str(self): + # Zones constructed with a key must have str(zone) == key + for key in self.zones(): + with self.subTest(key): + zi = self.zone_from_key(key) + + self.assertEqual(str(zi), key) + + # Zones with no key constructed should have str(zone) == repr(zone) + file_key = self.zoneinfo_data.keys[0] + file_path = self.zoneinfo_data.path_from_key(file_key) + + with open(file_path, "rb") as f: + with self.subTest(test_name="Repr test", path=file_path): + zi_ff = self.klass.from_file(f) + self.assertEqual(str(zi_ff), repr(zi_ff)) + + def test_repr(self): + # The repr is not guaranteed, but I think we can insist that it at + # least contain the name of the class. + key = next(iter(self.zones())) + + zi = self.klass(key) + class_name = self.class_name + with self.subTest(name="from key"): + self.assertRegex(repr(zi), class_name) + + file_key = self.zoneinfo_data.keys[0] + file_path = self.zoneinfo_data.path_from_key(file_key) + with open(file_path, "rb") as f: + zi_ff = self.klass.from_file(f, key=file_key) + + with self.subTest(name="from file with key"): + self.assertRegex(repr(zi_ff), class_name) + + with open(file_path, "rb") as f: + zi_ff_nk = self.klass.from_file(f) + + with self.subTest(name="from file without key"): + self.assertRegex(repr(zi_ff_nk), class_name) + + def test_key_attribute(self): + key = next(iter(self.zones())) + + def from_file_nokey(key): + with open(self.zoneinfo_data.path_from_key(key), "rb") as f: + return self.klass.from_file(f) + + constructors = ( + ("Primary constructor", self.klass, key), + ("no_cache", self.klass.no_cache, key), + ("from_file", from_file_nokey, None), + ) + + for msg, constructor, expected in constructors: + zi = constructor(key) + + # Ensure that the key attribute is set to the input to ``key`` + with self.subTest(msg): + self.assertEqual(zi.key, expected) + + # Ensure that the key attribute is read-only + with self.subTest(f"{msg}: readonly"): + with self.assertRaises(AttributeError): + zi.key = "Some/Value" + + def test_bad_keys(self): + bad_keys = [ + "Eurasia/Badzone", # Plausible but does not exist + "BZQ", + "America.Los_Angeles", + "🇨🇦", # Non-ascii + "America/New\ud800York", # Contains surrogate character + ] + + for bad_key in bad_keys: + with self.assertRaises(self.module.ZoneInfoNotFoundError): + self.klass(bad_key) + + def test_bad_keys_paths(self): + bad_keys = [ + "/America/Los_Angeles", # Absolute path + "America/Los_Angeles/", # Trailing slash - not normalized + "../zoneinfo/America/Los_Angeles", # Traverses above TZPATH + "America/../America/Los_Angeles", # Not normalized + "America/./Los_Angeles", + ] + + for bad_key in bad_keys: + with self.assertRaises(ValueError): + self.klass(bad_key) + + def test_bad_zones(self): + bad_zones = [ + b"", # Empty file + b"AAAA3" + b" " * 15, # Bad magic + ] + + for bad_zone in bad_zones: + fobj = io.BytesIO(bad_zone) + with self.assertRaises(ValueError): + self.klass.from_file(fobj) + + def test_fromutc_errors(self): + key = next(iter(self.zones())) + zone = self.zone_from_key(key) + + bad_values = [ + (datetime(2019, 1, 1, tzinfo=timezone.utc), ValueError), + (datetime(2019, 1, 1), ValueError), + (date(2019, 1, 1), TypeError), + (time(0), TypeError), + (0, TypeError), + ("2019-01-01", TypeError), + ] + + for val, exc_type in bad_values: + with self.subTest(val=val): + with self.assertRaises(exc_type): + zone.fromutc(val) + + def test_utc(self): + zi = self.klass("UTC") + dt = datetime(2020, 1, 1, tzinfo=zi) + + self.assertEqual(dt.utcoffset(), ZERO) + self.assertEqual(dt.dst(), ZERO) + self.assertEqual(dt.tzname(), "UTC") + + def test_unambiguous(self): + test_cases = [] + for key in self.zones(): + for zone_transition in self.load_transition_examples(key): + test_cases.append( + ( + key, + zone_transition.transition - timedelta(days=2), + zone_transition.offset_before, + ) + ) + + test_cases.append( + ( + key, + zone_transition.transition + timedelta(days=2), + zone_transition.offset_after, + ) + ) + + for key, dt, offset in test_cases: + with self.subTest(key=key, dt=dt, offset=offset): + tzi = self.zone_from_key(key) + dt = dt.replace(tzinfo=tzi) + + self.assertEqual(dt.tzname(), offset.tzname, dt) + self.assertEqual(dt.utcoffset(), offset.utcoffset, dt) + self.assertEqual(dt.dst(), offset.dst, dt) + + def test_folds_and_gaps(self): + test_cases = [] + for key in self.zones(): + tests = {"folds": [], "gaps": []} + for zt in self.load_transition_examples(key): + if zt.fold: + test_group = tests["folds"] + elif zt.gap: + test_group = tests["gaps"] + else: + # Assign a random variable here to disable the peephole + # optimizer so that coverage can see this line. + # See bpo-2506 for more information. + no_peephole_opt = None + continue + + # Cases are of the form key, dt, fold, offset + dt = zt.anomaly_start - timedelta(seconds=1) + test_group.append((dt, 0, zt.offset_before)) + test_group.append((dt, 1, zt.offset_before)) + + dt = zt.anomaly_start + test_group.append((dt, 0, zt.offset_before)) + test_group.append((dt, 1, zt.offset_after)) + + dt = zt.anomaly_start + timedelta(seconds=1) + test_group.append((dt, 0, zt.offset_before)) + test_group.append((dt, 1, zt.offset_after)) + + dt = zt.anomaly_end - timedelta(seconds=1) + test_group.append((dt, 0, zt.offset_before)) + test_group.append((dt, 1, zt.offset_after)) + + dt = zt.anomaly_end + test_group.append((dt, 0, zt.offset_after)) + test_group.append((dt, 1, zt.offset_after)) + + dt = zt.anomaly_end + timedelta(seconds=1) + test_group.append((dt, 0, zt.offset_after)) + test_group.append((dt, 1, zt.offset_after)) + + for grp, test_group in tests.items(): + test_cases.append(((key, grp), test_group)) + + for (key, grp), tests in test_cases: + with self.subTest(key=key, grp=grp): + tzi = self.zone_from_key(key) + + for dt, fold, offset in tests: + dt = dt.replace(fold=fold, tzinfo=tzi) + + self.assertEqual(dt.tzname(), offset.tzname, dt) + self.assertEqual(dt.utcoffset(), offset.utcoffset, dt) + self.assertEqual(dt.dst(), offset.dst, dt) + + def test_folds_from_utc(self): + tests = [] + for key in self.zones(): + zi = self.zone_from_key(key) + with self.subTest(key=key): + for zt in self.load_transition_examples(key): + if not zt.fold: + continue + + dt_utc = zt.transition_utc + dt_before_utc = dt_utc - timedelta(seconds=1) + dt_after_utc = dt_utc + timedelta(seconds=1) + + dt_before = dt_before_utc.astimezone(zi) + self.assertEqual(dt_before.fold, 0, (dt_before, dt_utc)) + + dt_after = dt_after_utc.astimezone(zi) + self.assertEqual(dt_after.fold, 1, (dt_after, dt_utc)) + + def test_time_variable_offset(self): + # self.zones() only ever returns variable-offset zones + for key in self.zones(): + zi = self.zone_from_key(key) + t = time(11, 15, 1, 34471, tzinfo=zi) + + with self.subTest(key=key): + self.assertIs(t.tzname(), None) + self.assertIs(t.utcoffset(), None) + self.assertIs(t.dst(), None) + + def test_time_fixed_offset(self): + for key, offset in self.fixed_offset_zones(): + zi = self.zone_from_key(key) + + t = time(11, 15, 1, 34471, tzinfo=zi) + + with self.subTest(key=key): + self.assertEqual(t.tzname(), offset.tzname) + self.assertEqual(t.utcoffset(), offset.utcoffset) + self.assertEqual(t.dst(), offset.dst) + + +class CZoneInfoTest(ZoneInfoTest): + module = c_zoneinfo + + def test_fold_mutate(self): + """Test that fold isn't mutated when no change is necessary. + + The underlying C API is capable of mutating datetime objects, and + may rely on the fact that addition of a datetime object returns a + new datetime; this test ensures that the input datetime to fromutc + is not mutated. + """ + + def to_subclass(dt): + class SameAddSubclass(type(dt)): + def __add__(self, other): + if other == timedelta(0): + return self + + return super().__add__(other) # pragma: nocover + + return SameAddSubclass( + dt.year, + dt.month, + dt.day, + dt.hour, + dt.minute, + dt.second, + dt.microsecond, + fold=dt.fold, + tzinfo=dt.tzinfo, + ) + + subclass = [False, True] + + key = "Europe/London" + zi = self.zone_from_key(key) + for zt in self.load_transition_examples(key): + if zt.fold and zt.offset_after.utcoffset == ZERO: + example = zt.transition_utc.replace(tzinfo=zi) + break + + for subclass in [False, True]: + if subclass: + dt = to_subclass(example) + else: + dt = example + + with self.subTest(subclass=subclass): + dt_fromutc = zi.fromutc(dt) + + self.assertEqual(dt_fromutc.fold, 1) + self.assertEqual(dt.fold, 0) + + +class ZoneInfoDatetimeSubclassTest(DatetimeSubclassMixin, ZoneInfoTest): + pass + + +class CZoneInfoDatetimeSubclassTest(DatetimeSubclassMixin, CZoneInfoTest): + pass + + +class ZoneInfoTestSubclass(ZoneInfoTest): + @classmethod + def setUpClass(cls): + super().setUpClass() + + class ZISubclass(cls.klass): + pass + + cls.class_name = "ZISubclass" + cls.parent_klass = cls.klass + cls.klass = ZISubclass + + def test_subclass_own_cache(self): + base_obj = self.parent_klass("Europe/London") + sub_obj = self.klass("Europe/London") + + self.assertIsNot(base_obj, sub_obj) + self.assertIsInstance(base_obj, self.parent_klass) + self.assertIsInstance(sub_obj, self.klass) + + +class CZoneInfoTestSubclass(ZoneInfoTest): + module = c_zoneinfo + + +class ZoneInfoV1Test(ZoneInfoTest): + @property + def zoneinfo_data(self): + return ZONEINFO_DATA_V1 + + def load_transition_examples(self, key): + # We will discard zdump examples outside the range epoch +/- 2**31, + # because they are not well-supported in Version 1 files. + epoch = datetime(1970, 1, 1) + max_offset_32 = timedelta(seconds=2 ** 31) + min_dt = epoch - max_offset_32 + max_dt = epoch + max_offset_32 + + for zt in ZoneDumpData.load_transition_examples(key): + if min_dt <= zt.transition <= max_dt: + yield zt + + +class CZoneInfoV1Test(ZoneInfoV1Test): + module = c_zoneinfo + + +@unittest.skipIf( + not HAS_TZDATA_PKG, "Skipping tzdata-specific tests: tzdata not installed" +) +class TZDataTests(ZoneInfoTest): + """ + Runs all the ZoneInfoTest tests, but against the tzdata package + + NOTE: The ZoneDumpData has frozen test data, but tzdata will update, so + some of the tests (particularly those related to the far future) may break + in the event that the time zone policies in the relevant time zones change. + """ + + @property + def tzpath(self): + return [] + + def zone_from_key(self, key): + return self.klass(key=key) + + +@unittest.skipIf( + not HAS_TZDATA_PKG, "Skipping tzdata-specific tests: tzdata not installed" +) +class CTZDataTests(TZDataTests): + module = c_zoneinfo + + +class WeirdZoneTest(ZoneInfoTestBase): + module = py_zoneinfo + + def test_one_transition(self): + LMT = ZoneOffset("LMT", -timedelta(hours=6, minutes=31, seconds=2)) + STD = ZoneOffset("STD", -timedelta(hours=6)) + + transitions = [ + ZoneTransition(datetime(1883, 6, 9, 14), LMT, STD), + ] + + after = "STD6" + + zf = self.construct_zone(transitions, after) + zi = self.klass.from_file(zf) + + dt0 = datetime(1883, 6, 9, 1, tzinfo=zi) + dt1 = datetime(1883, 6, 10, 1, tzinfo=zi) + + for dt, offset in [(dt0, LMT), (dt1, STD)]: + with self.subTest(name="local", dt=dt): + self.assertEqual(dt.tzname(), offset.tzname) + self.assertEqual(dt.utcoffset(), offset.utcoffset) + self.assertEqual(dt.dst(), offset.dst) + + dts = [ + ( + datetime(1883, 6, 9, 1, tzinfo=zi), + datetime(1883, 6, 9, 7, 31, 2, tzinfo=timezone.utc), + ), + ( + datetime(2010, 4, 1, 12, tzinfo=zi), + datetime(2010, 4, 1, 18, tzinfo=timezone.utc), + ), + ] + + for dt_local, dt_utc in dts: + with self.subTest(name="fromutc", dt=dt_local): + dt_actual = dt_utc.astimezone(zi) + self.assertEqual(dt_actual, dt_local) + + dt_utc_actual = dt_local.astimezone(timezone.utc) + self.assertEqual(dt_utc_actual, dt_utc) + + def test_one_zone_dst(self): + DST = ZoneOffset("DST", ONE_H, ONE_H) + transitions = [ + ZoneTransition(datetime(1970, 1, 1), DST, DST), + ] + + after = "STD0DST-1,0/0,J365/25" + + zf = self.construct_zone(transitions, after) + zi = self.klass.from_file(zf) + + dts = [ + datetime(1900, 3, 1), + datetime(1965, 9, 12), + datetime(1970, 1, 1), + datetime(2010, 11, 3), + datetime(2040, 1, 1), + ] + + for dt in dts: + dt = dt.replace(tzinfo=zi) + with self.subTest(dt=dt): + self.assertEqual(dt.tzname(), DST.tzname) + self.assertEqual(dt.utcoffset(), DST.utcoffset) + self.assertEqual(dt.dst(), DST.dst) + + def test_no_tz_str(self): + STD = ZoneOffset("STD", ONE_H, ZERO) + DST = ZoneOffset("DST", 2 * ONE_H, ONE_H) + + transitions = [] + for year in range(1996, 2000): + transitions.append( + ZoneTransition(datetime(year, 3, 1, 2), STD, DST) + ) + transitions.append( + ZoneTransition(datetime(year, 11, 1, 2), DST, STD) + ) + + after = "" + + zf = self.construct_zone(transitions, after) + + # According to RFC 8536, local times after the last transition time + # with an empty TZ string are unspecified. We will go with "hold the + # last transition", but the most we should promise is "doesn't crash." + zi = self.klass.from_file(zf) + + cases = [ + (datetime(1995, 1, 1), STD), + (datetime(1996, 4, 1), DST), + (datetime(1996, 11, 2), STD), + (datetime(2001, 1, 1), STD), + ] + + for dt, offset in cases: + dt = dt.replace(tzinfo=zi) + with self.subTest(dt=dt): + self.assertEqual(dt.tzname(), offset.tzname) + self.assertEqual(dt.utcoffset(), offset.utcoffset) + self.assertEqual(dt.dst(), offset.dst) + + # Test that offsets return None when using a datetime.time + t = time(0, tzinfo=zi) + with self.subTest("Testing datetime.time"): + self.assertIs(t.tzname(), None) + self.assertIs(t.utcoffset(), None) + self.assertIs(t.dst(), None) + + def test_tz_before_only(self): + # From RFC 8536 Section 3.2: + # + # If there are no transitions, local time for all timestamps is + # specified by the TZ string in the footer if present and nonempty; + # otherwise, it is specified by time type 0. + + offsets = [ + ZoneOffset("STD", ZERO, ZERO), + ZoneOffset("DST", ONE_H, ONE_H), + ] + + for offset in offsets: + # Phantom transition to set time type 0. + transitions = [ + ZoneTransition(None, offset, offset), + ] + + after = "" + + zf = self.construct_zone(transitions, after) + zi = self.klass.from_file(zf) + + dts = [ + datetime(1900, 1, 1), + datetime(1970, 1, 1), + datetime(2000, 1, 1), + ] + + for dt in dts: + dt = dt.replace(tzinfo=zi) + with self.subTest(offset=offset, dt=dt): + self.assertEqual(dt.tzname(), offset.tzname) + self.assertEqual(dt.utcoffset(), offset.utcoffset) + self.assertEqual(dt.dst(), offset.dst) + + def test_empty_zone(self): + zf = self.construct_zone([], "") + + with self.assertRaises(ValueError): + self.klass.from_file(zf) + + def test_zone_very_large_timestamp(self): + """Test when a transition is in the far past or future. + + Particularly, this is a concern if something: + + 1. Attempts to call ``datetime.timestamp`` for a datetime outside + of ``[datetime.min, datetime.max]``. + 2. Attempts to construct a timedelta outside of + ``[timedelta.min, timedelta.max]``. + + This actually occurs "in the wild", as some time zones on Ubuntu (at + least as of 2020) have an initial transition added at ``-2**58``. + """ + + LMT = ZoneOffset("LMT", timedelta(seconds=-968)) + GMT = ZoneOffset("GMT", ZERO) + + transitions = [ + (-(1 << 62), LMT, LMT), + ZoneTransition(datetime(1912, 1, 1), LMT, GMT), + ((1 << 62), GMT, GMT), + ] + + after = "GMT0" + + zf = self.construct_zone(transitions, after) + zi = self.klass.from_file(zf, key="Africa/Abidjan") + + offset_cases = [ + (datetime.min, LMT), + (datetime.max, GMT), + (datetime(1911, 12, 31), LMT), + (datetime(1912, 1, 2), GMT), + ] + + for dt_naive, offset in offset_cases: + dt = dt_naive.replace(tzinfo=zi) + with self.subTest(name="offset", dt=dt, offset=offset): + self.assertEqual(dt.tzname(), offset.tzname) + self.assertEqual(dt.utcoffset(), offset.utcoffset) + self.assertEqual(dt.dst(), offset.dst) + + utc_cases = [ + (datetime.min, datetime.min + timedelta(seconds=968)), + (datetime(1898, 12, 31, 23, 43, 52), datetime(1899, 1, 1)), + ( + datetime(1911, 12, 31, 23, 59, 59, 999999), + datetime(1912, 1, 1, 0, 16, 7, 999999), + ), + (datetime(1912, 1, 1, 0, 16, 8), datetime(1912, 1, 1, 0, 16, 8)), + (datetime(1970, 1, 1), datetime(1970, 1, 1)), + (datetime.max, datetime.max), + ] + + for naive_dt, naive_dt_utc in utc_cases: + dt = naive_dt.replace(tzinfo=zi) + dt_utc = naive_dt_utc.replace(tzinfo=timezone.utc) + + self.assertEqual(dt_utc.astimezone(zi), dt) + self.assertEqual(dt, dt_utc) + + def test_fixed_offset_phantom_transition(self): + UTC = ZoneOffset("UTC", ZERO, ZERO) + + transitions = [ZoneTransition(datetime(1970, 1, 1), UTC, UTC)] + + after = "UTC0" + zf = self.construct_zone(transitions, after) + zi = self.klass.from_file(zf, key="UTC") + + dt = datetime(2020, 1, 1, tzinfo=zi) + with self.subTest("datetime.datetime"): + self.assertEqual(dt.tzname(), UTC.tzname) + self.assertEqual(dt.utcoffset(), UTC.utcoffset) + self.assertEqual(dt.dst(), UTC.dst) + + t = time(0, tzinfo=zi) + with self.subTest("datetime.time"): + self.assertEqual(t.tzname(), UTC.tzname) + self.assertEqual(t.utcoffset(), UTC.utcoffset) + self.assertEqual(t.dst(), UTC.dst) + + def construct_zone(self, transitions, after=None, version=3): + # These are not used for anything, so we're not going to include + # them for now. + isutc = [] + isstd = [] + leap_seconds = [] + + offset_lists = [[], []] + trans_times_lists = [[], []] + trans_idx_lists = [[], []] + + v1_range = (-(2 ** 31), 2 ** 31) + v2_range = (-(2 ** 63), 2 ** 63) + ranges = [v1_range, v2_range] + + def zt_as_tuple(zt): + # zt may be a tuple (timestamp, offset_before, offset_after) or + # a ZoneTransition object — this is to allow the timestamp to be + # values that are outside the valid range for datetimes but still + # valid 64-bit timestamps. + if isinstance(zt, tuple): + return zt + + if zt.transition: + trans_time = int(zt.transition_utc.timestamp()) + else: + trans_time = None + + return (trans_time, zt.offset_before, zt.offset_after) + + transitions = sorted(map(zt_as_tuple, transitions), key=lambda x: x[0]) + + for zt in transitions: + trans_time, offset_before, offset_after = zt + + for v, (dt_min, dt_max) in enumerate(ranges): + offsets = offset_lists[v] + trans_times = trans_times_lists[v] + trans_idx = trans_idx_lists[v] + + if trans_time is not None and not ( + dt_min <= trans_time <= dt_max + ): + continue + + if offset_before not in offsets: + offsets.append(offset_before) + + if offset_after not in offsets: + offsets.append(offset_after) + + if trans_time is not None: + trans_times.append(trans_time) + trans_idx.append(offsets.index(offset_after)) + + isutcnt = len(isutc) + isstdcnt = len(isstd) + leapcnt = len(leap_seconds) + + zonefile = io.BytesIO() + + time_types = ("l", "q") + for v in range(min((version, 2))): + offsets = offset_lists[v] + trans_times = trans_times_lists[v] + trans_idx = trans_idx_lists[v] + time_type = time_types[v] + + # Translate the offsets into something closer to the C values + abbrstr = bytearray() + ttinfos = [] + + for offset in offsets: + utcoff = int(offset.utcoffset.total_seconds()) + isdst = bool(offset.dst) + abbrind = len(abbrstr) + + ttinfos.append((utcoff, isdst, abbrind)) + abbrstr += offset.tzname.encode("ascii") + b"\x00" + abbrstr = bytes(abbrstr) + + typecnt = len(offsets) + timecnt = len(trans_times) + charcnt = len(abbrstr) + + # Write the header + zonefile.write(b"TZif") + zonefile.write(b"%d" % version) + zonefile.write(b" " * 15) + zonefile.write( + struct.pack( + ">6l", isutcnt, isstdcnt, leapcnt, timecnt, typecnt, charcnt + ) + ) + + # Now the transition data + zonefile.write(struct.pack(f">{timecnt}{time_type}", *trans_times)) + zonefile.write(struct.pack(f">{timecnt}B", *trans_idx)) + + for ttinfo in ttinfos: + zonefile.write(struct.pack(">lbb", *ttinfo)) + + zonefile.write(bytes(abbrstr)) + + # Now the metadata and leap seconds + zonefile.write(struct.pack(f"{isutcnt}b", *isutc)) + zonefile.write(struct.pack(f"{isstdcnt}b", *isstd)) + zonefile.write(struct.pack(f">{leapcnt}l", *leap_seconds)) + + # Finally we write the TZ string if we're writing a Version 2+ file + if v > 0: + zonefile.write(b"\x0A") + zonefile.write(after.encode("ascii")) + zonefile.write(b"\x0A") + + zonefile.seek(0) + return zonefile + + +class CWeirdZoneTest(WeirdZoneTest): + module = c_zoneinfo + + +class TZStrTest(ZoneInfoTestBase): + module = py_zoneinfo + + NORMAL = 0 + FOLD = 1 + GAP = 2 + + @classmethod + def setUpClass(cls): + super().setUpClass() + + cls._populate_test_cases() + cls.populate_tzstr_header() + + @classmethod + def populate_tzstr_header(cls): + out = bytearray() + # The TZif format always starts with a Version 1 file followed by + # the Version 2+ file. In this case, we have no transitions, just + # the tzstr in the footer, so up to the footer, the files are + # identical and we can just write the same file twice in a row. + for i in range(2): + out += b"TZif" # Magic value + out += b"3" # Version + out += b" " * 15 # Reserved + + # We will not write any of the manual transition parts + out += struct.pack(">6l", 0, 0, 0, 0, 0, 0) + + cls._tzif_header = bytes(out) + + def zone_from_tzstr(self, tzstr): + """Creates a zoneinfo file following a POSIX rule.""" + zonefile = io.BytesIO(self._tzif_header) + zonefile.seek(0, 2) + + # Write the footer + zonefile.write(b"\x0A") + zonefile.write(tzstr.encode("ascii")) + zonefile.write(b"\x0A") + + zonefile.seek(0) + + return self.klass.from_file(zonefile, key=tzstr) + + def test_tzstr_localized(self): + i = 0 + for tzstr, cases in self.test_cases.items(): + with self.subTest(tzstr=tzstr): + zi = self.zone_from_tzstr(tzstr) + + for dt_naive, offset, _ in cases: + dt = dt_naive.replace(tzinfo=zi) + + with self.subTest(tzstr=tzstr, dt=dt, offset=offset): + self.assertEqual(dt.tzname(), offset.tzname) + self.assertEqual(dt.utcoffset(), offset.utcoffset) + self.assertEqual(dt.dst(), offset.dst) + + def test_tzstr_from_utc(self): + for tzstr, cases in self.test_cases.items(): + with self.subTest(tzstr=tzstr): + zi = self.zone_from_tzstr(tzstr) + + for dt_naive, offset, dt_type in cases: + if dt_type == self.GAP: + continue # Cannot create a gap from UTC + + dt_utc = (dt_naive - offset.utcoffset).replace( + tzinfo=timezone.utc + ) + + # Check that we can go UTC -> Our zone + dt_act = dt_utc.astimezone(zi) + dt_exp = dt_naive.replace(tzinfo=zi) + + self.assertEqual(dt_act, dt_exp) + + if dt_type == self.FOLD: + self.assertEqual(dt_act.fold, dt_naive.fold, dt_naive) + else: + self.assertEqual(dt_act.fold, 0) + + # Now check that we can go our zone -> UTC + dt_act = dt_exp.astimezone(timezone.utc) + + self.assertEqual(dt_act, dt_utc) + + def test_invalid_tzstr(self): + invalid_tzstrs = [ + "PST8PDT", # DST but no transition specified + "+11", # Unquoted alphanumeric + "GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST + "GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST + "PST8PDT,M3.2.0/2", # Only one transition rule + # Invalid offsets + "STD+25", + "STD-25", + "STD+374", + "STD+374DST,M3.2.0/2,M11.1.0/3", + "STD+23DST+25,M3.2.0/2,M11.1.0/3", + "STD-23DST-25,M3.2.0/2,M11.1.0/3", + # Completely invalid dates + "AAA4BBB,M1443339,M11.1.0/3", + "AAA4BBB,M3.2.0/2,0349309483959c", + # Invalid months + "AAA4BBB,M13.1.1/2,M1.1.1/2", + "AAA4BBB,M1.1.1/2,M13.1.1/2", + "AAA4BBB,M0.1.1/2,M1.1.1/2", + "AAA4BBB,M1.1.1/2,M0.1.1/2", + # Invalid weeks + "AAA4BBB,M1.6.1/2,M1.1.1/2", + "AAA4BBB,M1.1.1/2,M1.6.1/2", + # Invalid weekday + "AAA4BBB,M1.1.7/2,M2.1.1/2", + "AAA4BBB,M1.1.1/2,M2.1.7/2", + # Invalid numeric offset + "AAA4BBB,-1/2,20/2", + "AAA4BBB,1/2,-1/2", + "AAA4BBB,367,20/2", + "AAA4BBB,1/2,367/2", + # Invalid julian offset + "AAA4BBB,J0/2,J20/2", + "AAA4BBB,J20/2,J366/2", + ] + + for invalid_tzstr in invalid_tzstrs: + with self.subTest(tzstr=invalid_tzstr): + # Not necessarily a guaranteed property, but we should show + # the problematic TZ string if that's the cause of failure. + tzstr_regex = re.escape(invalid_tzstr) + with self.assertRaisesRegex(ValueError, tzstr_regex): + self.zone_from_tzstr(invalid_tzstr) + + @classmethod + def _populate_test_cases(cls): + # This method uses a somewhat unusual style in that it populates the + # test cases for each tzstr by using a decorator to automatically call + # a function that mutates the current dictionary of test cases. + # + # The population of the test cases is done in individual functions to + # give each set of test cases its own namespace in which to define + # its offsets (this way we don't have to worry about variable reuse + # causing problems if someone makes a typo). + # + # The decorator for calling is used to make it more obvious that each + # function is actually called (if it's not decorated, it's not called). + def call(f): + """Decorator to call the addition methods. + + This will call a function which adds at least one new entry into + the `cases` dictionary. The decorator will also assert that + something was added to the dictionary. + """ + prev_len = len(cases) + f() + assert len(cases) > prev_len, "Function did not add a test case!" + + NORMAL = cls.NORMAL + FOLD = cls.FOLD + GAP = cls.GAP + + cases = {} + + @call + def _add(): + # Transition to EDT on the 2nd Sunday in March at 4 AM, and + # transition back on the first Sunday in November at 3AM + tzstr = "EST5EDT,M3.2.0/4:00,M11.1.0/3:00" + + EST = ZoneOffset("EST", timedelta(hours=-5), ZERO) + EDT = ZoneOffset("EDT", timedelta(hours=-4), ONE_H) + + cases[tzstr] = ( + (datetime(2019, 3, 9), EST, NORMAL), + (datetime(2019, 3, 10, 3, 59), EST, NORMAL), + (datetime(2019, 3, 10, 4, 0, fold=0), EST, GAP), + (datetime(2019, 3, 10, 4, 0, fold=1), EDT, GAP), + (datetime(2019, 3, 10, 4, 1, fold=0), EST, GAP), + (datetime(2019, 3, 10, 4, 1, fold=1), EDT, GAP), + (datetime(2019, 11, 2), EDT, NORMAL), + (datetime(2019, 11, 3, 1, 59, fold=1), EDT, NORMAL), + (datetime(2019, 11, 3, 2, 0, fold=0), EDT, FOLD), + (datetime(2019, 11, 3, 2, 0, fold=1), EST, FOLD), + (datetime(2020, 3, 8, 3, 59), EST, NORMAL), + (datetime(2020, 3, 8, 4, 0, fold=0), EST, GAP), + (datetime(2020, 3, 8, 4, 0, fold=1), EDT, GAP), + (datetime(2020, 11, 1, 1, 59, fold=1), EDT, NORMAL), + (datetime(2020, 11, 1, 2, 0, fold=0), EDT, FOLD), + (datetime(2020, 11, 1, 2, 0, fold=1), EST, FOLD), + ) + + @call + def _add(): + # Transition to BST happens on the last Sunday in March at 1 AM GMT + # and the transition back happens the last Sunday in October at 2AM BST + tzstr = "GMT0BST-1,M3.5.0/1:00,M10.5.0/2:00" + + GMT = ZoneOffset("GMT", ZERO, ZERO) + BST = ZoneOffset("BST", ONE_H, ONE_H) + + cases[tzstr] = ( + (datetime(2019, 3, 30), GMT, NORMAL), + (datetime(2019, 3, 31, 0, 59), GMT, NORMAL), + (datetime(2019, 3, 31, 2, 0), BST, NORMAL), + (datetime(2019, 10, 26), BST, NORMAL), + (datetime(2019, 10, 27, 0, 59, fold=1), BST, NORMAL), + (datetime(2019, 10, 27, 1, 0, fold=0), BST, GAP), + (datetime(2019, 10, 27, 2, 0, fold=1), GMT, GAP), + (datetime(2020, 3, 29, 0, 59), GMT, NORMAL), + (datetime(2020, 3, 29, 2, 0), BST, NORMAL), + (datetime(2020, 10, 25, 0, 59, fold=1), BST, NORMAL), + (datetime(2020, 10, 25, 1, 0, fold=0), BST, FOLD), + (datetime(2020, 10, 25, 2, 0, fold=1), GMT, NORMAL), + ) + + @call + def _add(): + # Austrialian time zone - DST start is chronologically first + tzstr = "AEST-10AEDT,M10.1.0/2,M4.1.0/3" + + AEST = ZoneOffset("AEST", timedelta(hours=10), ZERO) + AEDT = ZoneOffset("AEDT", timedelta(hours=11), ONE_H) + + cases[tzstr] = ( + (datetime(2019, 4, 6), AEDT, NORMAL), + (datetime(2019, 4, 7, 1, 59), AEDT, NORMAL), + (datetime(2019, 4, 7, 1, 59, fold=1), AEDT, NORMAL), + (datetime(2019, 4, 7, 2, 0, fold=0), AEDT, FOLD), + (datetime(2019, 4, 7, 2, 1, fold=0), AEDT, FOLD), + (datetime(2019, 4, 7, 2, 0, fold=1), AEST, FOLD), + (datetime(2019, 4, 7, 2, 1, fold=1), AEST, FOLD), + (datetime(2019, 4, 7, 3, 0, fold=0), AEST, NORMAL), + (datetime(2019, 4, 7, 3, 0, fold=1), AEST, NORMAL), + (datetime(2019, 10, 5, 0), AEST, NORMAL), + (datetime(2019, 10, 6, 1, 59), AEST, NORMAL), + (datetime(2019, 10, 6, 2, 0, fold=0), AEST, GAP), + (datetime(2019, 10, 6, 2, 0, fold=1), AEDT, GAP), + (datetime(2019, 10, 6, 3, 0), AEDT, NORMAL), + ) + + @call + def _add(): + # Irish time zone - negative DST + tzstr = "IST-1GMT0,M10.5.0,M3.5.0/1" + + GMT = ZoneOffset("GMT", ZERO, -ONE_H) + IST = ZoneOffset("IST", ONE_H, ZERO) + + cases[tzstr] = ( + (datetime(2019, 3, 30), GMT, NORMAL), + (datetime(2019, 3, 31, 0, 59), GMT, NORMAL), + (datetime(2019, 3, 31, 2, 0), IST, NORMAL), + (datetime(2019, 10, 26), IST, NORMAL), + (datetime(2019, 10, 27, 0, 59, fold=1), IST, NORMAL), + (datetime(2019, 10, 27, 1, 0, fold=0), IST, FOLD), + (datetime(2019, 10, 27, 1, 0, fold=1), GMT, FOLD), + (datetime(2019, 10, 27, 2, 0, fold=1), GMT, NORMAL), + (datetime(2020, 3, 29, 0, 59), GMT, NORMAL), + (datetime(2020, 3, 29, 2, 0), IST, NORMAL), + (datetime(2020, 10, 25, 0, 59, fold=1), IST, NORMAL), + (datetime(2020, 10, 25, 1, 0, fold=0), IST, FOLD), + (datetime(2020, 10, 25, 2, 0, fold=1), GMT, NORMAL), + ) + + @call + def _add(): + # Pacific/Kosrae: Fixed offset zone with a quoted numerical tzname + tzstr = "<+11>-11" + + cases[tzstr] = ( + ( + datetime(2020, 1, 1), + ZoneOffset("+11", timedelta(hours=11)), + NORMAL, + ), + ) + + @call + def _add(): + # Quoted STD and DST, transitions at 24:00 + tzstr = "<-04>4<-03>,M9.1.6/24,M4.1.6/24" + + M04 = ZoneOffset("-04", timedelta(hours=-4)) + M03 = ZoneOffset("-03", timedelta(hours=-3), ONE_H) + + cases[tzstr] = ( + (datetime(2020, 5, 1), M04, NORMAL), + (datetime(2020, 11, 1), M03, NORMAL), + ) + + @call + def _add(): + # Permanent daylight saving time is modeled with transitions at 0/0 + # and J365/25, as mentioned in RFC 8536 Section 3.3.1 + tzstr = "EST5EDT,0/0,J365/25" + + EDT = ZoneOffset("EDT", timedelta(hours=-4), ONE_H) + + cases[tzstr] = ( + (datetime(2019, 1, 1), EDT, NORMAL), + (datetime(2019, 6, 1), EDT, NORMAL), + (datetime(2019, 12, 31, 23, 59, 59, 999999), EDT, NORMAL), + (datetime(2020, 1, 1), EDT, NORMAL), + (datetime(2020, 3, 1), EDT, NORMAL), + (datetime(2020, 6, 1), EDT, NORMAL), + (datetime(2020, 12, 31, 23, 59, 59, 999999), EDT, NORMAL), + (datetime(2400, 1, 1), EDT, NORMAL), + (datetime(2400, 3, 1), EDT, NORMAL), + (datetime(2400, 12, 31, 23, 59, 59, 999999), EDT, NORMAL), + ) + + @call + def _add(): + # Transitions on March 1st and November 1st of each year + tzstr = "AAA3BBB,J60/12,J305/12" + + AAA = ZoneOffset("AAA", timedelta(hours=-3)) + BBB = ZoneOffset("BBB", timedelta(hours=-2), ONE_H) + + cases[tzstr] = ( + (datetime(2019, 1, 1), AAA, NORMAL), + (datetime(2019, 2, 28), AAA, NORMAL), + (datetime(2019, 3, 1, 11, 59), AAA, NORMAL), + (datetime(2019, 3, 1, 12, fold=0), AAA, GAP), + (datetime(2019, 3, 1, 12, fold=1), BBB, GAP), + (datetime(2019, 3, 1, 13), BBB, NORMAL), + (datetime(2019, 11, 1, 10, 59), BBB, NORMAL), + (datetime(2019, 11, 1, 11, fold=0), BBB, FOLD), + (datetime(2019, 11, 1, 11, fold=1), AAA, FOLD), + (datetime(2019, 11, 1, 12), AAA, NORMAL), + (datetime(2019, 12, 31, 23, 59, 59, 999999), AAA, NORMAL), + (datetime(2020, 1, 1), AAA, NORMAL), + (datetime(2020, 2, 29), AAA, NORMAL), + (datetime(2020, 3, 1, 11, 59), AAA, NORMAL), + (datetime(2020, 3, 1, 12, fold=0), AAA, GAP), + (datetime(2020, 3, 1, 12, fold=1), BBB, GAP), + (datetime(2020, 3, 1, 13), BBB, NORMAL), + (datetime(2020, 11, 1, 10, 59), BBB, NORMAL), + (datetime(2020, 11, 1, 11, fold=0), BBB, FOLD), + (datetime(2020, 11, 1, 11, fold=1), AAA, FOLD), + (datetime(2020, 11, 1, 12), AAA, NORMAL), + (datetime(2020, 12, 31, 23, 59, 59, 999999), AAA, NORMAL), + ) + + @call + def _add(): + # Taken from America/Godthab, this rule has a transition on the + # Saturday before the last Sunday of March and October, at 22:00 + # and 23:00, respectively. This is encoded with negative start + # and end transition times. + tzstr = "<-03>3<-02>,M3.5.0/-2,M10.5.0/-1" + + N03 = ZoneOffset("-03", timedelta(hours=-3)) + N02 = ZoneOffset("-02", timedelta(hours=-2), ONE_H) + + cases[tzstr] = ( + (datetime(2020, 3, 27), N03, NORMAL), + (datetime(2020, 3, 28, 21, 59, 59), N03, NORMAL), + (datetime(2020, 3, 28, 22, fold=0), N03, GAP), + (datetime(2020, 3, 28, 22, fold=1), N02, GAP), + (datetime(2020, 3, 28, 23), N02, NORMAL), + (datetime(2020, 10, 24, 21), N02, NORMAL), + (datetime(2020, 10, 24, 22, fold=0), N02, FOLD), + (datetime(2020, 10, 24, 22, fold=1), N03, FOLD), + (datetime(2020, 10, 24, 23), N03, NORMAL), + ) + + @call + def _add(): + # Transition times with minutes and seconds + tzstr = "AAA3BBB,M3.2.0/01:30,M11.1.0/02:15:45" + + AAA = ZoneOffset("AAA", timedelta(hours=-3)) + BBB = ZoneOffset("BBB", timedelta(hours=-2), ONE_H) + + cases[tzstr] = ( + (datetime(2012, 3, 11, 1, 0), AAA, NORMAL), + (datetime(2012, 3, 11, 1, 30, fold=0), AAA, GAP), + (datetime(2012, 3, 11, 1, 30, fold=1), BBB, GAP), + (datetime(2012, 3, 11, 2, 30), BBB, NORMAL), + (datetime(2012, 11, 4, 1, 15, 44, 999999), BBB, NORMAL), + (datetime(2012, 11, 4, 1, 15, 45, fold=0), BBB, FOLD), + (datetime(2012, 11, 4, 1, 15, 45, fold=1), AAA, FOLD), + (datetime(2012, 11, 4, 2, 15, 45), AAA, NORMAL), + ) + + cls.test_cases = cases + + +class CTZStrTest(TZStrTest): + module = c_zoneinfo + + +class ZoneInfoCacheTest(TzPathUserMixin, ZoneInfoTestBase): + module = py_zoneinfo + + def setUp(self): + self.klass.clear_cache() + super().setUp() + + @property + def zoneinfo_data(self): + return ZONEINFO_DATA + + @property + def tzpath(self): + return [self.zoneinfo_data.tzpath] + + def test_ephemeral_zones(self): + self.assertIs( + self.klass("America/Los_Angeles"), self.klass("America/Los_Angeles") + ) + + def test_strong_refs(self): + tz0 = self.klass("Australia/Sydney") + tz1 = self.klass("Australia/Sydney") + + self.assertIs(tz0, tz1) + + def test_no_cache(self): + + tz0 = self.klass("Europe/Lisbon") + tz1 = self.klass.no_cache("Europe/Lisbon") + + self.assertIsNot(tz0, tz1) + + def test_cache_reset_tzpath(self): + """Test that the cache persists when tzpath has been changed. + + The PEP specifies that as long as a reference exists to one zone + with a given key, the primary constructor must continue to return + the same object. + """ + zi0 = self.klass("America/Los_Angeles") + with self.tzpath_context([]): + zi1 = self.klass("America/Los_Angeles") + + self.assertIs(zi0, zi1) + + def test_clear_cache_explicit_none(self): + la0 = self.klass("America/Los_Angeles") + self.klass.clear_cache(only_keys=None) + la1 = self.klass("America/Los_Angeles") + + self.assertIsNot(la0, la1) + + def test_clear_cache_one_key(self): + """Tests that you can clear a single key from the cache.""" + la0 = self.klass("America/Los_Angeles") + dub0 = self.klass("Europe/Dublin") + + self.klass.clear_cache(only_keys=["America/Los_Angeles"]) + + la1 = self.klass("America/Los_Angeles") + dub1 = self.klass("Europe/Dublin") + + self.assertIsNot(la0, la1) + self.assertIs(dub0, dub1) + + def test_clear_cache_two_keys(self): + la0 = self.klass("America/Los_Angeles") + dub0 = self.klass("Europe/Dublin") + tok0 = self.klass("Asia/Tokyo") + + self.klass.clear_cache( + only_keys=["America/Los_Angeles", "Europe/Dublin"] + ) + + la1 = self.klass("America/Los_Angeles") + dub1 = self.klass("Europe/Dublin") + tok1 = self.klass("Asia/Tokyo") + + self.assertIsNot(la0, la1) + self.assertIsNot(dub0, dub1) + self.assertIs(tok0, tok1) + + +class CZoneInfoCacheTest(ZoneInfoCacheTest): + module = c_zoneinfo + + +class ZoneInfoPickleTest(TzPathUserMixin, ZoneInfoTestBase): + module = py_zoneinfo + + def setUp(self): + self.klass.clear_cache() + + with contextlib.ExitStack() as stack: + stack.enter_context(test_support.set_zoneinfo_module(self.module)) + self.addCleanup(stack.pop_all().close) + + super().setUp() + + @property + def zoneinfo_data(self): + return ZONEINFO_DATA + + @property + def tzpath(self): + return [self.zoneinfo_data.tzpath] + + def test_cache_hit(self): + zi_in = self.klass("Europe/Dublin") + pkl = pickle.dumps(zi_in) + zi_rt = pickle.loads(pkl) + + with self.subTest(test="Is non-pickled ZoneInfo"): + self.assertIs(zi_in, zi_rt) + + zi_rt2 = pickle.loads(pkl) + with self.subTest(test="Is unpickled ZoneInfo"): + self.assertIs(zi_rt, zi_rt2) + + def test_cache_miss(self): + zi_in = self.klass("Europe/Dublin") + pkl = pickle.dumps(zi_in) + + del zi_in + self.klass.clear_cache() # Induce a cache miss + zi_rt = pickle.loads(pkl) + zi_rt2 = pickle.loads(pkl) + + self.assertIs(zi_rt, zi_rt2) + + def test_no_cache(self): + zi_no_cache = self.klass.no_cache("Europe/Dublin") + + pkl = pickle.dumps(zi_no_cache) + zi_rt = pickle.loads(pkl) + + with self.subTest(test="Not the pickled object"): + self.assertIsNot(zi_rt, zi_no_cache) + + zi_rt2 = pickle.loads(pkl) + with self.subTest(test="Not a second unpickled object"): + self.assertIsNot(zi_rt, zi_rt2) + + zi_cache = self.klass("Europe/Dublin") + with self.subTest(test="Not a cached object"): + self.assertIsNot(zi_rt, zi_cache) + + def test_from_file(self): + key = "Europe/Dublin" + with open(self.zoneinfo_data.path_from_key(key), "rb") as f: + zi_nokey = self.klass.from_file(f) + + f.seek(0) + zi_key = self.klass.from_file(f, key=key) + + test_cases = [ + (zi_key, "ZoneInfo with key"), + (zi_nokey, "ZoneInfo without key"), + ] + + for zi, test_name in test_cases: + with self.subTest(test_name=test_name): + with self.assertRaises(pickle.PicklingError): + pickle.dumps(zi) + + def test_pickle_after_from_file(self): + # This may be a bit of paranoia, but this test is to ensure that no + # global state is maintained in order to handle the pickle cache and + # from_file behavior, and that it is possible to interweave the + # constructors of each of these and pickling/unpickling without issues. + key = "Europe/Dublin" + zi = self.klass(key) + + pkl_0 = pickle.dumps(zi) + zi_rt_0 = pickle.loads(pkl_0) + self.assertIs(zi, zi_rt_0) + + with open(self.zoneinfo_data.path_from_key(key), "rb") as f: + zi_ff = self.klass.from_file(f, key=key) + + pkl_1 = pickle.dumps(zi) + zi_rt_1 = pickle.loads(pkl_1) + self.assertIs(zi, zi_rt_1) + + with self.assertRaises(pickle.PicklingError): + pickle.dumps(zi_ff) + + pkl_2 = pickle.dumps(zi) + zi_rt_2 = pickle.loads(pkl_2) + self.assertIs(zi, zi_rt_2) + + +class CZoneInfoPickleTest(ZoneInfoPickleTest): + module = c_zoneinfo + + +class CallingConventionTest(ZoneInfoTestBase): + """Tests for functions with restricted calling conventions.""" + + module = py_zoneinfo + + @property + def zoneinfo_data(self): + return ZONEINFO_DATA + + def test_from_file(self): + with open(self.zoneinfo_data.path_from_key("UTC"), "rb") as f: + with self.assertRaises(TypeError): + self.klass.from_file(fobj=f) + + def test_clear_cache(self): + with self.assertRaises(TypeError): + self.klass.clear_cache(["UTC"]) + + +class CCallingConventionTest(CallingConventionTest): + module = c_zoneinfo + + +class TzPathTest(TzPathUserMixin, ZoneInfoTestBase): + module = py_zoneinfo + + @staticmethod + @contextlib.contextmanager + def python_tzpath_context(value): + path_var = "PYTHONTZPATH" + try: + with OS_ENV_LOCK: + old_env = os.environ.get(path_var, None) + os.environ[path_var] = value + yield + finally: + if old_env is None: + del os.environ[path_var] + else: + os.environ[path_var] = old_env # pragma: nocover + + def test_env_variable(self): + """Tests that the environment variable works with reset_tzpath.""" + new_paths = [ + ("", []), + ("/etc/zoneinfo", ["/etc/zoneinfo"]), + (f"/a/b/c{os.pathsep}/d/e/f", ["/a/b/c", "/d/e/f"]), + ] + + for new_path_var, expected_result in new_paths: + with self.python_tzpath_context(new_path_var): + with self.subTest(tzpath=new_path_var): + self.module.reset_tzpath() + tzpath = self.module.TZPATH + self.assertSequenceEqual(tzpath, expected_result) + + def test_env_variable_relative_paths(self): + test_cases = [ + [("path/to/somewhere",), ()], + [ + ("/usr/share/zoneinfo", "path/to/somewhere",), + ("/usr/share/zoneinfo",), + ], + [("../relative/path",), ()], + [ + ("/usr/share/zoneinfo", "../relative/path",), + ("/usr/share/zoneinfo",), + ], + [("path/to/somewhere", "../relative/path",), ()], + [ + ( + "/usr/share/zoneinfo", + "path/to/somewhere", + "../relative/path", + ), + ("/usr/share/zoneinfo",), + ], + ] + + for input_paths, expected_paths in test_cases: + path_var = os.pathsep.join(input_paths) + with self.python_tzpath_context(path_var): + with self.subTest("warning", path_var=path_var): + # Note: Per PEP 615 the warning is implementation-defined + # behavior, other implementations need not warn. + with self.assertWarns(self.module.InvalidTZPathWarning): + self.module.reset_tzpath() + + tzpath = self.module.TZPATH + with self.subTest("filtered", path_var=path_var): + self.assertSequenceEqual(tzpath, expected_paths) + + def test_reset_tzpath_kwarg(self): + self.module.reset_tzpath(to=["/a/b/c"]) + + self.assertSequenceEqual(self.module.TZPATH, ("/a/b/c",)) + + def test_reset_tzpath_relative_paths(self): + bad_values = [ + ("path/to/somewhere",), + ("/usr/share/zoneinfo", "path/to/somewhere",), + ("../relative/path",), + ("/usr/share/zoneinfo", "../relative/path",), + ("path/to/somewhere", "../relative/path",), + ("/usr/share/zoneinfo", "path/to/somewhere", "../relative/path",), + ] + for input_paths in bad_values: + with self.subTest(input_paths=input_paths): + with self.assertRaises(ValueError): + self.module.reset_tzpath(to=input_paths) + + def test_tzpath_type_error(self): + bad_values = [ + "/etc/zoneinfo:/usr/share/zoneinfo", + b"/etc/zoneinfo:/usr/share/zoneinfo", + 0, + ] + + for bad_value in bad_values: + with self.subTest(value=bad_value): + with self.assertRaises(TypeError): + self.module.reset_tzpath(bad_value) + + def test_tzpath_attribute(self): + tzpath_0 = ["/one", "/two"] + tzpath_1 = ["/three"] + + with self.tzpath_context(tzpath_0): + query_0 = self.module.TZPATH + + with self.tzpath_context(tzpath_1): + query_1 = self.module.TZPATH + + self.assertSequenceEqual(tzpath_0, query_0) + self.assertSequenceEqual(tzpath_1, query_1) + + +class CTzPathTest(TzPathTest): + module = c_zoneinfo + + +class TestModule(ZoneInfoTestBase): + module = py_zoneinfo + + def test_getattr_error(self): + with self.assertRaises(AttributeError): + self.module.NOATTRIBUTE + + def test_dir_contains_all(self): + """dir(self.module) should at least contain everything in __all__.""" + module_all_set = set(self.module.__all__) + module_dir_set = set(dir(self.module)) + + difference = module_all_set - module_dir_set + + self.assertFalse(difference) + + def test_dir_unique(self): + """Test that there are no duplicates in dir(self.module)""" + module_dir = dir(self.module) + module_unique = set(module_dir) + + self.assertCountEqual(module_dir, module_unique) + + +class CTestModule(TestModule): + module = c_zoneinfo + + +class ExtensionBuiltTest(unittest.TestCase): + """Smoke test to ensure that the C and Python extensions are both tested. + + Because the intention is for the Python and C versions of ZoneInfo to + behave identically, these tests necessarily rely on implementation details, + so the tests may need to be adjusted if the implementations change. Do not + rely on these tests as an indication of stable properties of these classes. + """ + + def test_cache_location(self): + # The pure Python version stores caches on attributes, but the C + # extension stores them in C globals (at least for now) + self.assertFalse(hasattr(c_zoneinfo.ZoneInfo, "_weak_cache")) + self.assertTrue(hasattr(py_zoneinfo.ZoneInfo, "_weak_cache")) + + def test_gc_tracked(self): + # The pure Python version is tracked by the GC but (for now) the C + # version is not. + import gc + + self.assertTrue(gc.is_tracked(py_zoneinfo.ZoneInfo)) + self.assertFalse(gc.is_tracked(c_zoneinfo.ZoneInfo)) + + +@dataclasses.dataclass(frozen=True) +class ZoneOffset: + tzname: str + utcoffset: timedelta + dst: timedelta = ZERO + + +@dataclasses.dataclass(frozen=True) +class ZoneTransition: + transition: datetime + offset_before: ZoneOffset + offset_after: ZoneOffset + + @property + def transition_utc(self): + return (self.transition - self.offset_before.utcoffset).replace( + tzinfo=timezone.utc + ) + + @property + def fold(self): + """Whether this introduces a fold""" + return self.offset_before.utcoffset > self.offset_after.utcoffset + + @property + def gap(self): + """Whether this introduces a gap""" + return self.offset_before.utcoffset < self.offset_after.utcoffset + + @property + def delta(self): + return self.offset_after.utcoffset - self.offset_before.utcoffset + + @property + def anomaly_start(self): + if self.fold: + return self.transition + self.delta + else: + return self.transition + + @property + def anomaly_end(self): + if not self.fold: + return self.transition + self.delta + else: + return self.transition + + +class ZoneInfoData: + def __init__(self, source_json, tzpath, v1=False): + self.tzpath = pathlib.Path(tzpath) + self.keys = [] + self.v1 = v1 + self._populate_tzpath(source_json) + + def path_from_key(self, key): + return self.tzpath / key + + def _populate_tzpath(self, source_json): + with open(source_json, "rb") as f: + zoneinfo_dict = json.load(f) + + zoneinfo_data = zoneinfo_dict["data"] + + for key, value in zoneinfo_data.items(): + self.keys.append(key) + raw_data = self._decode_text(value) + + if self.v1: + data = self._convert_to_v1(raw_data) + else: + data = raw_data + + destination = self.path_from_key(key) + destination.parent.mkdir(exist_ok=True, parents=True) + with open(destination, "wb") as f: + f.write(data) + + def _decode_text(self, contents): + raw_data = b"".join(map(str.encode, contents)) + decoded = base64.b85decode(raw_data) + + return lzma.decompress(decoded) + + def _convert_to_v1(self, contents): + assert contents[0:4] == b"TZif", "Invalid TZif data found!" + version = int(contents[4:5]) + + header_start = 4 + 16 + header_end = header_start + 24 # 6l == 24 bytes + assert version >= 2, "Version 1 file found: no conversion necessary" + isutcnt, isstdcnt, leapcnt, timecnt, typecnt, charcnt = struct.unpack( + ">6l", contents[header_start:header_end] + ) + + file_size = ( + timecnt * 5 + + typecnt * 6 + + charcnt + + leapcnt * 8 + + isstdcnt + + isutcnt + ) + file_size += header_end + out = b"TZif" + b"\x00" + contents[5:file_size] + + assert ( + contents[file_size : (file_size + 4)] == b"TZif" + ), "Version 2 file not truncated at Version 2 header" + + return out + + +class ZoneDumpData: + @classmethod + def transition_keys(cls): + return cls._get_zonedump().keys() + + @classmethod + def load_transition_examples(cls, key): + return cls._get_zonedump()[key] + + @classmethod + def fixed_offset_zones(cls): + if not cls._FIXED_OFFSET_ZONES: + cls._populate_fixed_offsets() + + return cls._FIXED_OFFSET_ZONES.items() + + @classmethod + def _get_zonedump(cls): + if not cls._ZONEDUMP_DATA: + cls._populate_zonedump_data() + return cls._ZONEDUMP_DATA + + @classmethod + def _populate_fixed_offsets(cls): + cls._FIXED_OFFSET_ZONES = { + "UTC": ZoneOffset("UTC", ZERO, ZERO), + } + + @classmethod + def _populate_zonedump_data(cls): + def _Africa_Abidjan(): + LMT = ZoneOffset("LMT", timedelta(seconds=-968)) + GMT = ZoneOffset("GMT", ZERO) + + return [ + ZoneTransition(datetime(1912, 1, 1), LMT, GMT), + ] + + def _Africa_Casablanca(): + P00_s = ZoneOffset("+00", ZERO, ZERO) + P01_d = ZoneOffset("+01", ONE_H, ONE_H) + P00_d = ZoneOffset("+00", ZERO, -ONE_H) + P01_s = ZoneOffset("+01", ONE_H, ZERO) + + return [ + # Morocco sometimes pauses DST during Ramadan + ZoneTransition(datetime(2018, 3, 25, 2), P00_s, P01_d), + ZoneTransition(datetime(2018, 5, 13, 3), P01_d, P00_s), + ZoneTransition(datetime(2018, 6, 17, 2), P00_s, P01_d), + # On October 28th Morocco set standard time to +01, + # with negative DST only during Ramadan + ZoneTransition(datetime(2018, 10, 28, 3), P01_d, P01_s), + ZoneTransition(datetime(2019, 5, 5, 3), P01_s, P00_d), + ZoneTransition(datetime(2019, 6, 9, 2), P00_d, P01_s), + ] + + def _America_Los_Angeles(): + LMT = ZoneOffset("LMT", timedelta(seconds=-28378), ZERO) + PST = ZoneOffset("PST", timedelta(hours=-8), ZERO) + PDT = ZoneOffset("PDT", timedelta(hours=-7), ONE_H) + PWT = ZoneOffset("PWT", timedelta(hours=-7), ONE_H) + PPT = ZoneOffset("PPT", timedelta(hours=-7), ONE_H) + + return [ + ZoneTransition(datetime(1883, 11, 18, 12, 7, 2), LMT, PST), + ZoneTransition(datetime(1918, 3, 31, 2), PST, PDT), + ZoneTransition(datetime(1918, 3, 31, 2), PST, PDT), + ZoneTransition(datetime(1918, 10, 27, 2), PDT, PST), + # Transition to Pacific War Time + ZoneTransition(datetime(1942, 2, 9, 2), PST, PWT), + # Transition from Pacific War Time to Pacific Peace Time + ZoneTransition(datetime(1945, 8, 14, 16), PWT, PPT), + ZoneTransition(datetime(1945, 9, 30, 2), PPT, PST), + ZoneTransition(datetime(2015, 3, 8, 2), PST, PDT), + ZoneTransition(datetime(2015, 11, 1, 2), PDT, PST), + # After 2038: Rules continue indefinitely + ZoneTransition(datetime(2450, 3, 13, 2), PST, PDT), + ZoneTransition(datetime(2450, 11, 6, 2), PDT, PST), + ] + + def _America_Santiago(): + LMT = ZoneOffset("LMT", timedelta(seconds=-16966), ZERO) + SMT = ZoneOffset("SMT", timedelta(seconds=-16966), ZERO) + N05 = ZoneOffset("-05", timedelta(seconds=-18000), ZERO) + N04 = ZoneOffset("-04", timedelta(seconds=-14400), ZERO) + N03 = ZoneOffset("-03", timedelta(seconds=-10800), ONE_H) + + return [ + ZoneTransition(datetime(1890, 1, 1), LMT, SMT), + ZoneTransition(datetime(1910, 1, 10), SMT, N05), + ZoneTransition(datetime(1916, 7, 1), N05, SMT), + ZoneTransition(datetime(2008, 3, 30), N03, N04), + ZoneTransition(datetime(2008, 10, 12), N04, N03), + ZoneTransition(datetime(2040, 4, 8), N03, N04), + ZoneTransition(datetime(2040, 9, 2), N04, N03), + ] + + def _Asia_Tokyo(): + JST = ZoneOffset("JST", timedelta(seconds=32400), ZERO) + JDT = ZoneOffset("JDT", timedelta(seconds=36000), ONE_H) + + # Japan had DST from 1948 to 1951, and it was unusual in that + # the transition from DST to STD occurred at 25:00, and is + # denominated as such in the time zone database + return [ + ZoneTransition(datetime(1948, 5, 2), JST, JDT), + ZoneTransition(datetime(1948, 9, 12, 1), JDT, JST), + ZoneTransition(datetime(1951, 9, 9, 1), JDT, JST), + ] + + def _Australia_Sydney(): + LMT = ZoneOffset("LMT", timedelta(seconds=36292), ZERO) + AEST = ZoneOffset("AEST", timedelta(seconds=36000), ZERO) + AEDT = ZoneOffset("AEDT", timedelta(seconds=39600), ONE_H) + + return [ + ZoneTransition(datetime(1895, 2, 1), LMT, AEST), + ZoneTransition(datetime(1917, 1, 1, 0, 1), AEST, AEDT), + ZoneTransition(datetime(1917, 3, 25, 2), AEDT, AEST), + ZoneTransition(datetime(2012, 4, 1, 3), AEDT, AEST), + ZoneTransition(datetime(2012, 10, 7, 2), AEST, AEDT), + ZoneTransition(datetime(2040, 4, 1, 3), AEDT, AEST), + ZoneTransition(datetime(2040, 10, 7, 2), AEST, AEDT), + ] + + def _Europe_Dublin(): + LMT = ZoneOffset("LMT", timedelta(seconds=-1500), ZERO) + DMT = ZoneOffset("DMT", timedelta(seconds=-1521), ZERO) + IST_0 = ZoneOffset("IST", timedelta(seconds=2079), ONE_H) + GMT_0 = ZoneOffset("GMT", ZERO, ZERO) + BST = ZoneOffset("BST", ONE_H, ONE_H) + GMT_1 = ZoneOffset("GMT", ZERO, -ONE_H) + IST_1 = ZoneOffset("IST", ONE_H, ZERO) + + return [ + ZoneTransition(datetime(1880, 8, 2, 0), LMT, DMT), + ZoneTransition(datetime(1916, 5, 21, 2), DMT, IST_0), + ZoneTransition(datetime(1916, 10, 1, 3), IST_0, GMT_0), + ZoneTransition(datetime(1917, 4, 8, 2), GMT_0, BST), + ZoneTransition(datetime(2016, 3, 27, 1), GMT_1, IST_1), + ZoneTransition(datetime(2016, 10, 30, 2), IST_1, GMT_1), + ZoneTransition(datetime(2487, 3, 30, 1), GMT_1, IST_1), + ZoneTransition(datetime(2487, 10, 26, 2), IST_1, GMT_1), + ] + + def _Europe_Lisbon(): + WET = ZoneOffset("WET", ZERO, ZERO) + WEST = ZoneOffset("WEST", ONE_H, ONE_H) + CET = ZoneOffset("CET", ONE_H, ZERO) + CEST = ZoneOffset("CEST", timedelta(seconds=7200), ONE_H) + + return [ + ZoneTransition(datetime(1992, 3, 29, 1), WET, WEST), + ZoneTransition(datetime(1992, 9, 27, 2), WEST, CET), + ZoneTransition(datetime(1993, 3, 28, 2), CET, CEST), + ZoneTransition(datetime(1993, 9, 26, 3), CEST, CET), + ZoneTransition(datetime(1996, 3, 31, 2), CET, WEST), + ZoneTransition(datetime(1996, 10, 27, 2), WEST, WET), + ] + + def _Europe_London(): + LMT = ZoneOffset("LMT", timedelta(seconds=-75), ZERO) + GMT = ZoneOffset("GMT", ZERO, ZERO) + BST = ZoneOffset("BST", ONE_H, ONE_H) + + return [ + ZoneTransition(datetime(1847, 12, 1), LMT, GMT), + ZoneTransition(datetime(2005, 3, 27, 1), GMT, BST), + ZoneTransition(datetime(2005, 10, 30, 2), BST, GMT), + ZoneTransition(datetime(2043, 3, 29, 1), GMT, BST), + ZoneTransition(datetime(2043, 10, 25, 2), BST, GMT), + ] + + def _Pacific_Kiritimati(): + LMT = ZoneOffset("LMT", timedelta(seconds=-37760), ZERO) + N1040 = ZoneOffset("-1040", timedelta(seconds=-38400), ZERO) + N10 = ZoneOffset("-10", timedelta(seconds=-36000), ZERO) + P14 = ZoneOffset("+14", timedelta(seconds=50400), ZERO) + + # This is literally every transition in Christmas Island history + return [ + ZoneTransition(datetime(1901, 1, 1), LMT, N1040), + ZoneTransition(datetime(1979, 10, 1), N1040, N10), + # They skipped December 31, 1994 + ZoneTransition(datetime(1994, 12, 31), N10, P14), + ] + + cls._ZONEDUMP_DATA = { + "Africa/Abidjan": _Africa_Abidjan(), + "Africa/Casablanca": _Africa_Casablanca(), + "America/Los_Angeles": _America_Los_Angeles(), + "America/Santiago": _America_Santiago(), + "Australia/Sydney": _Australia_Sydney(), + "Asia/Tokyo": _Asia_Tokyo(), + "Europe/Dublin": _Europe_Dublin(), + "Europe/Lisbon": _Europe_Lisbon(), + "Europe/London": _Europe_London(), + "Pacific/Kiritimati": _Pacific_Kiritimati(), + } + + _ZONEDUMP_DATA = None + _FIXED_OFFSET_ZONES = None diff --git a/Lib/zoneinfo/__init__.py b/Lib/zoneinfo/__init__.py new file mode 100644 index 00000000000000..81a2d5ea97be0f --- /dev/null +++ b/Lib/zoneinfo/__init__.py @@ -0,0 +1,29 @@ +__all__ = [ + "ZoneInfo", + "reset_tzpath", + "TZPATH", + "ZoneInfoNotFoundError", + "InvalidTZPathWarning", +] + +from . import _tzpath +from ._common import ZoneInfoNotFoundError + +try: + from _zoneinfo import ZoneInfo +except ImportError: # pragma: nocover + from ._zoneinfo import ZoneInfo + +reset_tzpath = _tzpath.reset_tzpath +InvalidTZPathWarning = _tzpath.InvalidTZPathWarning + + +def __getattr__(name): + if name == "TZPATH": + return _tzpath.TZPATH + else: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__(): + return sorted(list(globals()) + ["TZPATH"]) diff --git a/Lib/zoneinfo/_common.py b/Lib/zoneinfo/_common.py new file mode 100644 index 00000000000000..3d35d4f4b463f8 --- /dev/null +++ b/Lib/zoneinfo/_common.py @@ -0,0 +1,166 @@ +import struct + + +def load_tzdata(key): + import importlib.resources + + components = key.split("/") + package_name = ".".join(["tzdata.zoneinfo"] + components[:-1]) + resource_name = components[-1] + + try: + return importlib.resources.open_binary(package_name, resource_name) + except (ImportError, FileNotFoundError, UnicodeEncodeError): + # There are three types of exception that can be raised that all amount + # to "we cannot find this key": + # + # ImportError: If package_name doesn't exist (e.g. if tzdata is not + # installed, or if there's an error in the folder name like + # Amrica/New_York) + # FileNotFoundError: If resource_name doesn't exist in the package + # (e.g. Europe/Krasnoy) + # UnicodeEncodeError: If package_name or resource_name are not UTF-8, + # such as keys containing a surrogate character. + raise ZoneInfoNotFoundError(f"No time zone found with key {key}") + + +def load_data(fobj): + header = _TZifHeader.from_file(fobj) + + if header.version == 1: + time_size = 4 + time_type = "l" + else: + # Version 2+ has 64-bit integer transition times + time_size = 8 + time_type = "q" + + # Version 2+ also starts with a Version 1 header and data, which + # we need to skip now + skip_bytes = ( + header.timecnt * 5 # Transition times and types + + header.typecnt * 6 # Local time type records + + header.charcnt # Time zone designations + + header.leapcnt * 8 # Leap second records + + header.isstdcnt # Standard/wall indicators + + header.isutcnt # UT/local indicators + ) + + fobj.seek(skip_bytes, 1) + + # Now we need to read the second header, which is not the same + # as the first + header = _TZifHeader.from_file(fobj) + + typecnt = header.typecnt + timecnt = header.timecnt + charcnt = header.charcnt + + # The data portion starts with timecnt transitions and indices + if timecnt: + trans_list_utc = struct.unpack( + f">{timecnt}{time_type}", fobj.read(timecnt * time_size) + ) + trans_idx = struct.unpack(f">{timecnt}B", fobj.read(timecnt)) + else: + trans_list_utc = () + trans_idx = () + + # Read the ttinfo struct, (utoff, isdst, abbrind) + if typecnt: + utcoff, isdst, abbrind = zip( + *(struct.unpack(">lbb", fobj.read(6)) for i in range(typecnt)) + ) + else: + utcoff = () + isdst = () + abbrind = () + + # Now read the abbreviations. They are null-terminated strings, indexed + # not by position in the array but by position in the unsplit + # abbreviation string. I suppose this makes more sense in C, which uses + # null to terminate the strings, but it's inconvenient here... + char_total = 0 + abbr_vals = {} + abbr_chars = fobj.read(charcnt) + + def get_abbr(idx): + # Gets a string starting at idx and running until the next \x00 + # + # We cannot pre-populate abbr_vals by splitting on \x00 because there + # are some zones that use subsets of longer abbreviations, like so: + # + # LMT\x00AHST\x00HDT\x00 + # + # Where the idx to abbr mapping should be: + # + # {0: "LMT", 4: "AHST", 5: "HST", 9: "HDT"} + if idx not in abbr_vals: + span_end = abbr_chars.find(b"\x00", idx) + abbr_vals[idx] = abbr_chars[idx:span_end].decode() + + return abbr_vals[idx] + + abbr = tuple(get_abbr(idx) for idx in abbrind) + + # The remainder of the file consists of leap seconds (currently unused) and + # the standard/wall and ut/local indicators, which are metadata we don't need. + # In version 2 files, we need to skip the unnecessary data to get at the TZ string: + if header.version >= 2: + # Each leap second record has size (time_size + 4) + skip_bytes = header.isutcnt + header.isstdcnt + header.leapcnt * 12 + fobj.seek(skip_bytes, 1) + + c = fobj.read(1) # Should be \n + assert c == b"\n", c + + tz_bytes = b"" + while (c := fobj.read(1)) != b"\n": + tz_bytes += c + + tz_str = tz_bytes + else: + tz_str = None + + return trans_idx, trans_list_utc, utcoff, isdst, abbr, tz_str + + +class _TZifHeader: + __slots__ = [ + "version", + "isutcnt", + "isstdcnt", + "leapcnt", + "timecnt", + "typecnt", + "charcnt", + ] + + def __init__(self, *args): + assert len(self.__slots__) == len(args) + for attr, val in zip(self.__slots__, args): + setattr(self, attr, val) + + @classmethod + def from_file(cls, stream): + # The header starts with a 4-byte "magic" value + if stream.read(4) != b"TZif": + raise ValueError("Invalid TZif file: magic not found") + + _version = stream.read(1) + if _version == b"\x00": + version = 1 + else: + version = int(_version) + stream.read(15) + + args = (version,) + + # Slots are defined in the order that the bytes are arranged + args = args + struct.unpack(">6l", stream.read(24)) + + return cls(*args) + + +class ZoneInfoNotFoundError(KeyError): + """Exception raised when a ZoneInfo key is not found.""" diff --git a/Lib/zoneinfo/_tzpath.py b/Lib/zoneinfo/_tzpath.py new file mode 100644 index 00000000000000..8cff0b171bf32f --- /dev/null +++ b/Lib/zoneinfo/_tzpath.py @@ -0,0 +1,110 @@ +import os +import sys +import sysconfig + + +def reset_tzpath(to=None): + global TZPATH + + tzpaths = to + if tzpaths is not None: + if isinstance(tzpaths, (str, bytes)): + raise TypeError( + f"tzpaths must be a list or tuple, " + + f"not {type(tzpaths)}: {tzpaths!r}" + ) + elif not all(map(os.path.isabs, tzpaths)): + raise ValueError(_get_invalid_paths_message(tzpaths)) + base_tzpath = tzpaths + else: + env_var = os.environ.get("PYTHONTZPATH", None) + if env_var is not None: + base_tzpath = _parse_python_tzpath(env_var) + else: + base_tzpath = _parse_python_tzpath( + sysconfig.get_config_var("TZPATH") + ) + + TZPATH = tuple(base_tzpath) + + +def _parse_python_tzpath(env_var): + if not env_var: + return () + + raw_tzpath = env_var.split(os.pathsep) + new_tzpath = tuple(filter(os.path.isabs, raw_tzpath)) + + # If anything has been filtered out, we will warn about it + if len(new_tzpath) != len(raw_tzpath): + import warnings + + msg = _get_invalid_paths_message(raw_tzpath) + + warnings.warn( + "Invalid paths specified in PYTHONTZPATH environment variable." + + msg, + InvalidTZPathWarning, + ) + + return new_tzpath + + +def _get_invalid_paths_message(tzpaths): + invalid_paths = (path for path in tzpaths if not os.path.isabs(path)) + + prefix = "\n " + indented_str = prefix + prefix.join(invalid_paths) + + return ( + "Paths should be absolute but found the following relative paths:" + + indented_str + ) + + +def find_tzfile(key): + """Retrieve the path to a TZif file from a key.""" + _validate_tzfile_path(key) + for search_path in TZPATH: + filepath = os.path.join(search_path, key) + if os.path.isfile(filepath): + return filepath + + return None + + +_TEST_PATH = os.path.normpath(os.path.join("_", "_"))[:-1] + + +def _validate_tzfile_path(path, _base=_TEST_PATH): + if os.path.isabs(path): + raise ValueError( + f"ZoneInfo keys may not be absolute paths, got: {path}" + ) + + # We only care about the kinds of path normalizations that would change the + # length of the key - e.g. a/../b -> a/b, or a/b/ -> a/b. On Windows, + # normpath will also change from a/b to a\b, but that would still preserve + # the length. + new_path = os.path.normpath(path) + if len(new_path) != len(path): + raise ValueError( + f"ZoneInfo keys must be normalized relative paths, got: {path}" + ) + + resolved = os.path.normpath(os.path.join(_base, new_path)) + if not resolved.startswith(_base): + raise ValueError( + f"ZoneInfo keys must refer to subdirectories of TZPATH, got: {path}" + ) + + +del _TEST_PATH + + +class InvalidTZPathWarning(RuntimeWarning): + """Warning raised if an invalid path is specified in PYTHONTZPATH.""" + + +TZPATH = () +reset_tzpath() diff --git a/Lib/zoneinfo/_zoneinfo.py b/Lib/zoneinfo/_zoneinfo.py new file mode 100644 index 00000000000000..69133ae80a4932 --- /dev/null +++ b/Lib/zoneinfo/_zoneinfo.py @@ -0,0 +1,755 @@ +import bisect +import calendar +import collections +import functools +import os +import re +import struct +import sys +import weakref +from datetime import datetime, timedelta, timezone, tzinfo + +from . import _common, _tzpath + +EPOCH = datetime(1970, 1, 1) +EPOCHORDINAL = datetime(1970, 1, 1).toordinal() + +# It is relatively expensive to construct new timedelta objects, and in most +# cases we're looking at the same deltas, like integer numbers of hours, etc. +# To improve speed and memory use, we'll keep a dictionary with references +# to the ones we've already used so far. +# +# Loading every time zone in the 2020a version of the time zone database +# requires 447 timedeltas, which requires approximately the amount of space +# that ZoneInfo("America/New_York") with 236 transitions takes up, so we will +# set the cache size to 512 so that in the common case we always get cache +# hits, but specifically crafted ZoneInfo objects don't leak arbitrary amounts +# of memory. +@functools.lru_cache(maxsize=512) +def _load_timedelta(seconds): + return timedelta(seconds=seconds) + + +class ZoneInfo(tzinfo): + _strong_cache_size = 8 + _strong_cache = collections.OrderedDict() + _weak_cache = weakref.WeakValueDictionary() + __module__ = "zoneinfo" + + def __init_subclass__(cls): + cls._strong_cache = collections.OrderedDict() + cls._weak_cache = weakref.WeakValueDictionary() + + def __new__(cls, key): + instance = cls._weak_cache.get(key, None) + if instance is None: + instance = cls._weak_cache.setdefault(key, cls._new_instance(key)) + instance._from_cache = True + + # Update the "strong" cache + cls._strong_cache[key] = cls._strong_cache.pop(key, instance) + + if len(cls._strong_cache) > cls._strong_cache_size: + cls._strong_cache.popitem(last=False) + + return instance + + @classmethod + def no_cache(cls, key): + obj = cls._new_instance(key) + obj._from_cache = False + + return obj + + @classmethod + def _new_instance(cls, key): + obj = super().__new__(cls) + obj._key = key + obj._file_path = obj._find_tzfile(key) + + if obj._file_path is not None: + file_obj = open(obj._file_path, "rb") + else: + file_obj = _common.load_tzdata(key) + + with file_obj as f: + obj._load_file(f) + + return obj + + @classmethod + def from_file(cls, fobj, /, key=None): + obj = super().__new__(cls) + obj._key = key + obj._file_path = None + obj._load_file(fobj) + obj._file_repr = repr(fobj) + + # Disable pickling for objects created from files + obj.__reduce__ = obj._file_reduce + + return obj + + @classmethod + def clear_cache(cls, *, only_keys=None): + if only_keys is not None: + for key in only_keys: + cls._weak_cache.pop(key, None) + cls._strong_cache.pop(key, None) + + else: + cls._weak_cache.clear() + cls._strong_cache.clear() + + @property + def key(self): + return self._key + + def utcoffset(self, dt): + return self._find_trans(dt).utcoff + + def dst(self, dt): + return self._find_trans(dt).dstoff + + def tzname(self, dt): + return self._find_trans(dt).tzname + + def fromutc(self, dt): + """Convert from datetime in UTC to datetime in local time""" + + if not isinstance(dt, datetime): + raise TypeError("fromutc() requires a datetime argument") + if dt.tzinfo is not self: + raise ValueError("dt.tzinfo is not self") + + timestamp = self._get_local_timestamp(dt) + num_trans = len(self._trans_utc) + + if num_trans >= 1 and timestamp < self._trans_utc[0]: + tti = self._tti_before + fold = 0 + elif ( + num_trans == 0 or timestamp > self._trans_utc[-1] + ) and not isinstance(self._tz_after, _ttinfo): + tti, fold = self._tz_after.get_trans_info_fromutc( + timestamp, dt.year + ) + elif num_trans == 0: + tti = self._tz_after + fold = 0 + else: + idx = bisect.bisect_right(self._trans_utc, timestamp) + + if num_trans > 1 and timestamp >= self._trans_utc[1]: + tti_prev, tti = self._ttinfos[idx - 2 : idx] + elif timestamp > self._trans_utc[-1]: + tti_prev = self._ttinfos[-1] + tti = self._tz_after + else: + tti_prev = self._tti_before + tti = self._ttinfos[0] + + # Detect fold + shift = tti_prev.utcoff - tti.utcoff + fold = shift.total_seconds() > timestamp - self._trans_utc[idx - 1] + dt += tti.utcoff + if fold: + return dt.replace(fold=1) + else: + return dt + + def _find_trans(self, dt): + if dt is None: + if self._fixed_offset: + return self._tz_after + else: + return _NO_TTINFO + + ts = self._get_local_timestamp(dt) + + lt = self._trans_local[dt.fold] + + num_trans = len(lt) + + if num_trans and ts < lt[0]: + return self._tti_before + elif not num_trans or ts > lt[-1]: + if isinstance(self._tz_after, _TZStr): + return self._tz_after.get_trans_info(ts, dt.year, dt.fold) + else: + return self._tz_after + else: + # idx is the transition that occurs after this timestamp, so we + # subtract off 1 to get the current ttinfo + idx = bisect.bisect_right(lt, ts) - 1 + assert idx >= 0 + return self._ttinfos[idx] + + def _get_local_timestamp(self, dt): + return ( + (dt.toordinal() - EPOCHORDINAL) * 86400 + + dt.hour * 3600 + + dt.minute * 60 + + dt.second + ) + + def __str__(self): + if self._key is not None: + return f"{self._key}" + else: + return repr(self) + + def __repr__(self): + if self._key is not None: + return f"{self.__class__.__name__}(key={self._key!r})" + else: + return f"{self.__class__.__name__}.from_file({self._file_repr})" + + def __reduce__(self): + return (self.__class__._unpickle, (self._key, self._from_cache)) + + def _file_reduce(self): + import pickle + + raise pickle.PicklingError( + "Cannot pickle a ZoneInfo file created from a file stream." + ) + + @classmethod + def _unpickle(cls, key, from_cache, /): + if from_cache: + return cls(key) + else: + return cls.no_cache(key) + + def _find_tzfile(self, key): + return _tzpath.find_tzfile(key) + + def _load_file(self, fobj): + # Retrieve all the data as it exists in the zoneinfo file + trans_idx, trans_utc, utcoff, isdst, abbr, tz_str = _common.load_data( + fobj + ) + + # Infer the DST offsets (needed for .dst()) from the data + dstoff = self._utcoff_to_dstoff(trans_idx, utcoff, isdst) + + # Convert all the transition times (UTC) into "seconds since 1970-01-01 local time" + trans_local = self._ts_to_local(trans_idx, trans_utc, utcoff) + + # Construct `_ttinfo` objects for each transition in the file + _ttinfo_list = [ + _ttinfo( + _load_timedelta(utcoffset), _load_timedelta(dstoffset), tzname + ) + for utcoffset, dstoffset, tzname in zip(utcoff, dstoff, abbr) + ] + + self._trans_utc = trans_utc + self._trans_local = trans_local + self._ttinfos = [_ttinfo_list[idx] for idx in trans_idx] + + # Find the first non-DST transition + for i in range(len(isdst)): + if not isdst[i]: + self._tti_before = _ttinfo_list[i] + break + else: + if self._ttinfos: + self._tti_before = self._ttinfos[0] + else: + self._tti_before = None + + # Set the "fallback" time zone + if tz_str is not None and tz_str != b"": + self._tz_after = _parse_tz_str(tz_str.decode()) + else: + if not self._ttinfos and not _ttinfo_list: + raise ValueError("No time zone information found.") + + if self._ttinfos: + self._tz_after = self._ttinfos[-1] + else: + self._tz_after = _ttinfo_list[-1] + + # Determine if this is a "fixed offset" zone, meaning that the output + # of the utcoffset, dst and tzname functions does not depend on the + # specific datetime passed. + # + # We make three simplifying assumptions here: + # + # 1. If _tz_after is not a _ttinfo, it has transitions that might + # actually occur (it is possible to construct TZ strings that + # specify STD and DST but no transitions ever occur, such as + # AAA0BBB,0/0,J365/25). + # 2. If _ttinfo_list contains more than one _ttinfo object, the objects + # represent different offsets. + # 3. _ttinfo_list contains no unused _ttinfos (in which case an + # otherwise fixed-offset zone with extra _ttinfos defined may + # appear to *not* be a fixed offset zone). + # + # Violations to these assumptions would be fairly exotic, and exotic + # zones should almost certainly not be used with datetime.time (the + # only thing that would be affected by this). + if len(_ttinfo_list) > 1 or not isinstance(self._tz_after, _ttinfo): + self._fixed_offset = False + elif not _ttinfo_list: + self._fixed_offset = True + else: + self._fixed_offset = _ttinfo_list[0] == self._tz_after + + @staticmethod + def _utcoff_to_dstoff(trans_idx, utcoffsets, isdsts): + # Now we must transform our ttis and abbrs into `_ttinfo` objects, + # but there is an issue: .dst() must return a timedelta with the + # difference between utcoffset() and the "standard" offset, but + # the "base offset" and "DST offset" are not encoded in the file; + # we can infer what they are from the isdst flag, but it is not + # sufficient to to just look at the last standard offset, because + # occasionally countries will shift both DST offset and base offset. + + typecnt = len(isdsts) + dstoffs = [0] * typecnt # Provisionally assign all to 0. + dst_cnt = sum(isdsts) + dst_found = 0 + + for i in range(1, len(trans_idx)): + if dst_cnt == dst_found: + break + + idx = trans_idx[i] + + dst = isdsts[idx] + + # We're only going to look at daylight saving time + if not dst: + continue + + # Skip any offsets that have already been assigned + if dstoffs[idx] != 0: + continue + + dstoff = 0 + utcoff = utcoffsets[idx] + + comp_idx = trans_idx[i - 1] + + if not isdsts[comp_idx]: + dstoff = utcoff - utcoffsets[comp_idx] + + if not dstoff and idx < (typecnt - 1): + comp_idx = trans_idx[i + 1] + + # If the following transition is also DST and we couldn't + # find the DST offset by this point, we're going ot have to + # skip it and hope this transition gets assigned later + if isdsts[comp_idx]: + continue + + dstoff = utcoff - utcoffsets[comp_idx] + + if dstoff: + dst_found += 1 + dstoffs[idx] = dstoff + else: + # If we didn't find a valid value for a given index, we'll end up + # with dstoff = 0 for something where `isdst=1`. This is obviously + # wrong - one hour will be a much better guess than 0 + for idx in range(typecnt): + if not dstoffs[idx] and isdsts[idx]: + dstoffs[idx] = 3600 + + return dstoffs + + @staticmethod + def _ts_to_local(trans_idx, trans_list_utc, utcoffsets): + """Generate number of seconds since 1970 *in the local time*. + + This is necessary to easily find the transition times in local time""" + if not trans_list_utc: + return [[], []] + + # Start with the timestamps and modify in-place + trans_list_wall = [list(trans_list_utc), list(trans_list_utc)] + + if len(utcoffsets) > 1: + offset_0 = utcoffsets[0] + offset_1 = utcoffsets[trans_idx[0]] + if offset_1 > offset_0: + offset_1, offset_0 = offset_0, offset_1 + else: + offset_0 = offset_1 = utcoffsets[0] + + trans_list_wall[0][0] += offset_0 + trans_list_wall[1][0] += offset_1 + + for i in range(1, len(trans_idx)): + offset_0 = utcoffsets[trans_idx[i - 1]] + offset_1 = utcoffsets[trans_idx[i]] + + if offset_1 > offset_0: + offset_1, offset_0 = offset_0, offset_1 + + trans_list_wall[0][i] += offset_0 + trans_list_wall[1][i] += offset_1 + + return trans_list_wall + + +class _ttinfo: + __slots__ = ["utcoff", "dstoff", "tzname"] + + def __init__(self, utcoff, dstoff, tzname): + self.utcoff = utcoff + self.dstoff = dstoff + self.tzname = tzname + + def __eq__(self, other): + return ( + self.utcoff == other.utcoff + and self.dstoff == other.dstoff + and self.tzname == other.tzname + ) + + def __repr__(self): # pragma: nocover + return ( + f"{self.__class__.__name__}" + + f"({self.utcoff}, {self.dstoff}, {self.tzname})" + ) + + +_NO_TTINFO = _ttinfo(None, None, None) + + +class _TZStr: + __slots__ = ( + "std", + "dst", + "start", + "end", + "get_trans_info", + "get_trans_info_fromutc", + "dst_diff", + ) + + def __init__( + self, std_abbr, std_offset, dst_abbr, dst_offset, start=None, end=None + ): + self.dst_diff = dst_offset - std_offset + std_offset = _load_timedelta(std_offset) + self.std = _ttinfo( + utcoff=std_offset, dstoff=_load_timedelta(0), tzname=std_abbr + ) + + self.start = start + self.end = end + + dst_offset = _load_timedelta(dst_offset) + delta = _load_timedelta(self.dst_diff) + self.dst = _ttinfo(utcoff=dst_offset, dstoff=delta, tzname=dst_abbr) + + # These are assertions because the constructor should only be called + # by functions that would fail before passing start or end + assert start is not None, "No transition start specified" + assert end is not None, "No transition end specified" + + self.get_trans_info = self._get_trans_info + self.get_trans_info_fromutc = self._get_trans_info_fromutc + + def transitions(self, year): + start = self.start.year_to_epoch(year) + end = self.end.year_to_epoch(year) + return start, end + + def _get_trans_info(self, ts, year, fold): + """Get the information about the current transition - tti""" + start, end = self.transitions(year) + + # With fold = 0, the period (denominated in local time) with the + # smaller offset starts at the end of the gap and ends at the end of + # the fold; with fold = 1, it runs from the start of the gap to the + # beginning of the fold. + # + # So in order to determine the DST boundaries we need to know both + # the fold and whether DST is positive or negative (rare), and it + # turns out that this boils down to fold XOR is_positive. + if fold == (self.dst_diff >= 0): + end -= self.dst_diff + else: + start += self.dst_diff + + if start < end: + isdst = start <= ts < end + else: + isdst = not (end <= ts < start) + + return self.dst if isdst else self.std + + def _get_trans_info_fromutc(self, ts, year): + start, end = self.transitions(year) + start -= self.std.utcoff.total_seconds() + end -= self.dst.utcoff.total_seconds() + + if start < end: + isdst = start <= ts < end + else: + isdst = not (end <= ts < start) + + # For positive DST, the ambiguous period is one dst_diff after the end + # of DST; for negative DST, the ambiguous period is one dst_diff before + # the start of DST. + if self.dst_diff > 0: + ambig_start = end + ambig_end = end + self.dst_diff + else: + ambig_start = start + ambig_end = start - self.dst_diff + + fold = ambig_start <= ts < ambig_end + + return (self.dst if isdst else self.std, fold) + + +def _post_epoch_days_before_year(year): + """Get the number of days between 1970-01-01 and YEAR-01-01""" + y = year - 1 + return y * 365 + y // 4 - y // 100 + y // 400 - EPOCHORDINAL + + +class _DayOffset: + __slots__ = ["d", "julian", "hour", "minute", "second"] + + def __init__(self, d, julian, hour=2, minute=0, second=0): + if not (0 + julian) <= d <= 365: + min_day = 0 + julian + raise ValueError(f"d must be in [{min_day}, 365], not: {d}") + + self.d = d + self.julian = julian + self.hour = hour + self.minute = minute + self.second = second + + def year_to_epoch(self, year): + days_before_year = _post_epoch_days_before_year(year) + + d = self.d + if self.julian and d >= 59 and calendar.isleap(year): + d += 1 + + epoch = (days_before_year + d) * 86400 + epoch += self.hour * 3600 + self.minute * 60 + self.second + + return epoch + + +class _CalendarOffset: + __slots__ = ["m", "w", "d", "hour", "minute", "second"] + + _DAYS_BEFORE_MONTH = ( + -1, + 0, + 31, + 59, + 90, + 120, + 151, + 181, + 212, + 243, + 273, + 304, + 334, + ) + + def __init__(self, m, w, d, hour=2, minute=0, second=0): + if not 0 < m <= 12: + raise ValueError("m must be in (0, 12]") + + if not 0 < w <= 5: + raise ValueError("w must be in (0, 5]") + + if not 0 <= d <= 6: + raise ValueError("d must be in [0, 6]") + + self.m = m + self.w = w + self.d = d + self.hour = hour + self.minute = minute + self.second = second + + @classmethod + def _ymd2ord(cls, year, month, day): + return ( + _post_epoch_days_before_year(year) + + cls._DAYS_BEFORE_MONTH[month] + + (month > 2 and calendar.isleap(year)) + + day + ) + + # TODO: These are not actually epoch dates as they are expressed in local time + def year_to_epoch(self, year): + """Calculates the datetime of the occurrence from the year""" + # We know year and month, we need to convert w, d into day of month + # + # Week 1 is the first week in which day `d` (where 0 = Sunday) appears. + # Week 5 represents the last occurrence of day `d`, so we need to know + # the range of the month. + first_day, days_in_month = calendar.monthrange(year, self.m) + + # This equation seems magical, so I'll break it down: + # 1. calendar says 0 = Monday, POSIX says 0 = Sunday + # so we need first_day + 1 to get 1 = Monday -> 7 = Sunday, + # which is still equivalent because this math is mod 7 + # 2. Get first day - desired day mod 7: -1 % 7 = 6, so we don't need + # to do anything to adjust negative numbers. + # 3. Add 1 because month days are a 1-based index. + month_day = (self.d - (first_day + 1)) % 7 + 1 + + # Now use a 0-based index version of `w` to calculate the w-th + # occurrence of `d` + month_day += (self.w - 1) * 7 + + # month_day will only be > days_in_month if w was 5, and `w` means + # "last occurrence of `d`", so now we just check if we over-shot the + # end of the month and if so knock off 1 week. + if month_day > days_in_month: + month_day -= 7 + + ordinal = self._ymd2ord(year, self.m, month_day) + epoch = ordinal * 86400 + epoch += self.hour * 3600 + self.minute * 60 + self.second + return epoch + + +def _parse_tz_str(tz_str): + # The tz string has the format: + # + # std[offset[dst[offset],start[/time],end[/time]]] + # + # std and dst must be 3 or more characters long and must not contain + # a leading colon, embedded digits, commas, nor a plus or minus signs; + # The spaces between "std" and "offset" are only for display and are + # not actually present in the string. + # + # The format of the offset is ``[+|-]hh[:mm[:ss]]`` + + offset_str, *start_end_str = tz_str.split(",", 1) + + # fmt: off + parser_re = re.compile( + r"(?P[^<0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" + + r"((?P[+-]?\d{1,2}(:\d{2}(:\d{2})?)?)" + + r"((?P[^0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" + + r"((?P[+-]?\d{1,2}(:\d{2}(:\d{2})?)?))?" + + r")?" + # dst + r")?$" # stdoff + ) + # fmt: on + + m = parser_re.match(offset_str) + + if m is None: + raise ValueError(f"{tz_str} is not a valid TZ string") + + std_abbr = m.group("std") + dst_abbr = m.group("dst") + dst_offset = None + + std_abbr = std_abbr.strip("<>") + + if dst_abbr: + dst_abbr = dst_abbr.strip("<>") + + if std_offset := m.group("stdoff"): + try: + std_offset = _parse_tz_delta(std_offset) + except ValueError as e: + raise ValueError(f"Invalid STD offset in {tz_str}") from e + else: + std_offset = 0 + + if dst_abbr is not None: + if dst_offset := m.group("dstoff"): + try: + dst_offset = _parse_tz_delta(dst_offset) + except ValueError as e: + raise ValueError(f"Invalid DST offset in {tz_str}") from e + else: + dst_offset = std_offset + 3600 + + if not start_end_str: + raise ValueError(f"Missing transition rules: {tz_str}") + + start_end_strs = start_end_str[0].split(",", 1) + try: + start, end = (_parse_dst_start_end(x) for x in start_end_strs) + except ValueError as e: + raise ValueError(f"Invalid TZ string: {tz_str}") from e + + return _TZStr(std_abbr, std_offset, dst_abbr, dst_offset, start, end) + elif start_end_str: + raise ValueError(f"Transition rule present without DST: {tz_str}") + else: + # This is a static ttinfo, don't return _TZStr + return _ttinfo( + _load_timedelta(std_offset), _load_timedelta(0), std_abbr + ) + + +def _parse_dst_start_end(dststr): + date, *time = dststr.split("/") + if date[0] == "M": + n_is_julian = False + m = re.match(r"M(\d{1,2})\.(\d).(\d)$", date) + if m is None: + raise ValueError(f"Invalid dst start/end date: {dststr}") + date_offset = tuple(map(int, m.groups())) + offset = _CalendarOffset(*date_offset) + else: + if date[0] == "J": + n_is_julian = True + date = date[1:] + else: + n_is_julian = False + + doy = int(date) + offset = _DayOffset(doy, n_is_julian) + + if time: + time_components = list(map(int, time[0].split(":"))) + n_components = len(time_components) + if n_components < 3: + time_components.extend([0] * (3 - n_components)) + offset.hour, offset.minute, offset.second = time_components + + return offset + + +def _parse_tz_delta(tz_delta): + match = re.match( + r"(?P[+-])?(?P\d{1,2})(:(?P\d{2})(:(?P\d{2}))?)?", + tz_delta, + ) + # Anything passed to this function should already have hit an equivalent + # regular expression to find the section to parse. + assert match is not None, tz_delta + + h, m, s = ( + int(v) if v is not None else 0 + for v in map(match.group, ("h", "m", "s")) + ) + + total = h * 3600 + m * 60 + s + + if not -86400 < total < 86400: + raise ValueError( + "Offset must be strictly between -24h and +24h:" + tz_delta + ) + + # Yes, +5 maps to an offset of -5h + if match.group("sign") != "-": + total *= -1 + + return total diff --git a/Makefile.pre.in b/Makefile.pre.in index d545a9efb3cd99..dbfd805f1a02fe 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -197,6 +197,9 @@ OPENSSL_INCLUDES=@OPENSSL_INCLUDES@ OPENSSL_LIBS=@OPENSSL_LIBS@ OPENSSL_LDFLAGS=@OPENSSL_LDFLAGS@ +# Default zoneinfo.TZPATH. Added here to expose it in sysconfig.get_config_var +TZPATH=@TZPATH@ + # Modes for directories, executables and data files created by the # install process. Default to user-only-writable for all file types. DIRMODE= 755 diff --git a/Misc/requirements-test.txt b/Misc/requirements-test.txt new file mode 100644 index 00000000000000..6e46c12e4f9d13 --- /dev/null +++ b/Misc/requirements-test.txt @@ -0,0 +1 @@ +tzdata==2020.1rc0 diff --git a/Modules/Setup b/Modules/Setup index 87e73bac78faec..02cfb67518df74 100644 --- a/Modules/Setup +++ b/Modules/Setup @@ -181,6 +181,7 @@ _symtable symtablemodule.c #_elementtree -I$(srcdir)/Modules/expat -DHAVE_EXPAT_CONFIG_H -DUSE_PYEXPAT_CAPI _elementtree.c # elementtree accelerator #_pickle _pickle.c # pickle accelerator #_datetime _datetimemodule.c # datetime accelerator +#_zoneinfo _zoneinfo.c # zoneinfo accelerator #_bisect _bisectmodule.c # Bisection algorithms #_heapq _heapqmodule.c # Heap queue algorithm #_asyncio _asynciomodule.c # Fast asyncio Future diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c new file mode 100644 index 00000000000000..9f5e64d8486ccc --- /dev/null +++ b/Modules/_zoneinfo.c @@ -0,0 +1,2695 @@ +#include "Python.h" +#include "structmember.h" + +#include +#include +#include + +#include "datetime.h" + +// Imports +static PyObject *io_open = NULL; +static PyObject *_tzpath_find_tzfile = NULL; +static PyObject *_common_mod = NULL; + +typedef struct TransitionRuleType TransitionRuleType; +typedef struct StrongCacheNode StrongCacheNode; + +typedef struct { + PyObject *utcoff; + PyObject *dstoff; + PyObject *tzname; + long utcoff_seconds; +} _ttinfo; + +typedef struct { + _ttinfo std; + _ttinfo dst; + int dst_diff; + TransitionRuleType *start; + TransitionRuleType *end; + unsigned char std_only; +} _tzrule; + +typedef struct { + PyDateTime_TZInfo base; + PyObject *key; + PyObject *file_repr; + PyObject *weakreflist; + unsigned int num_transitions; + unsigned int num_ttinfos; + int64_t *trans_list_utc; + int64_t *trans_list_wall[2]; + _ttinfo **trans_ttinfos; // References to the ttinfo for each transition + _ttinfo *ttinfo_before; + _tzrule tzrule_after; + _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation + unsigned char fixed_offset; + unsigned char source; +} PyZoneInfo_ZoneInfo; + +struct TransitionRuleType { + int64_t (*year_to_timestamp)(TransitionRuleType *, int); +}; + +typedef struct { + TransitionRuleType base; + uint8_t month; + uint8_t week; + uint8_t day; + int8_t hour; + int8_t minute; + int8_t second; +} CalendarRule; + +typedef struct { + TransitionRuleType base; + uint8_t julian; + unsigned int day; + int8_t hour; + int8_t minute; + int8_t second; +} DayRule; + +struct StrongCacheNode { + StrongCacheNode *next; + StrongCacheNode *prev; + PyObject *key; + PyObject *zone; +}; + +static PyTypeObject PyZoneInfo_ZoneInfoType; + +// Globals +static PyObject *TIMEDELTA_CACHE = NULL; +static PyObject *ZONEINFO_WEAK_CACHE = NULL; +static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL; +static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8; + +static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0}; + +// Constants +static const int EPOCHORDINAL = 719163; +static int DAYS_IN_MONTH[] = { + -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, +}; + +static int DAYS_BEFORE_MONTH[] = { + -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, +}; + +static const int SOURCE_NOCACHE = 0; +static const int SOURCE_CACHE = 1; +static const int SOURCE_FILE = 2; + +// Forward declarations +static int +load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj); +static void +utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs, + unsigned char *isdsts, size_t num_transitions, + size_t num_ttinfos); +static int +ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, + int64_t *trans_local[2], size_t num_ttinfos, + size_t num_transitions); + +static int +parse_tz_str(PyObject *tz_str_obj, _tzrule *out); + +static ssize_t +parse_abbr(const char *const p, PyObject **abbr); +static ssize_t +parse_tz_delta(const char *const p, long *total_seconds); +static ssize_t +parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, + int8_t *second); +static ssize_t +parse_transition_rule(const char *const p, TransitionRuleType **out); + +static _ttinfo * +find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year); +static _ttinfo * +find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year, + unsigned char *fold); + +static int +build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out); +static void +xdecref_ttinfo(_ttinfo *ttinfo); +static int +ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1); + +static int +build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset, + long dst_offset, TransitionRuleType *start, + TransitionRuleType *end, _tzrule *out); +static void +free_tzrule(_tzrule *tzrule); + +static PyObject * +load_timedelta(long seconds); + +static int +get_local_timestamp(PyObject *dt, int64_t *local_ts); +static _ttinfo * +find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt); + +static int +ymd_to_ord(int y, int m, int d); +static int +is_leap_year(int year); + +static size_t +_bisect(const int64_t value, const int64_t *arr, size_t size); + +static void +eject_from_strong_cache(const PyTypeObject *const type, PyObject *key); +static void +clear_strong_cache(const PyTypeObject *const type); +static void +update_strong_cache(const PyTypeObject *const type, PyObject *key, + PyObject *zone); +static PyObject * +zone_from_strong_cache(const PyTypeObject *const type, PyObject *key); + +static PyObject * +zoneinfo_new_instance(PyTypeObject *type, PyObject *key) +{ + PyObject *file_obj = NULL; + PyObject *file_path = NULL; + + file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL); + if (file_path == NULL) { + return NULL; + } + else if (file_path == Py_None) { + file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key); + if (file_obj == NULL) { + Py_DECREF(file_path); + return NULL; + } + } + + PyObject *self = (PyObject *)(type->tp_alloc(type, 0)); + if (self == NULL) { + goto error; + } + + if (file_obj == NULL) { + file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb"); + if (file_obj == NULL) { + goto error; + } + } + + if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) { + goto error; + } + + PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL); + Py_DECREF(file_obj); + file_obj = NULL; + if (rv == NULL) { + goto error; + } + Py_DECREF(rv); + + ((PyZoneInfo_ZoneInfo *)self)->key = key; + Py_INCREF(key); + + goto cleanup; +error: + Py_XDECREF(self); + self = NULL; +cleanup: + if (file_obj != NULL) { + PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL); + Py_DECREF(tmp); + Py_DECREF(file_obj); + } + Py_DECREF(file_path); + return self; +} + +static PyObject * +get_weak_cache(PyTypeObject *type) +{ + if (type == &PyZoneInfo_ZoneInfoType) { + return ZONEINFO_WEAK_CACHE; + } + else { + PyObject *cache = + PyObject_GetAttrString((PyObject *)type, "_weak_cache"); + // We are assuming that the type lives at least as long as the function + // that calls get_weak_cache, and that it holds a reference to the + // cache, so we'll return a "borrowed reference". + Py_XDECREF(cache); + return cache; + } +} + +static PyObject * +zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw) +{ + PyObject *key = NULL; + static char *kwlist[] = {"key", NULL}; + if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) { + return NULL; + } + + PyObject *instance = zone_from_strong_cache(type, key); + if (instance != NULL) { + return instance; + } + + PyObject *weak_cache = get_weak_cache(type); + instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None); + if (instance == NULL) { + return NULL; + } + + if (instance == Py_None) { + Py_DECREF(instance); + PyObject *tmp = zoneinfo_new_instance(type, key); + if (tmp == NULL) { + return NULL; + } + + instance = + PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp); + ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE; + + Py_DECREF(tmp); + + if (instance == NULL) { + return NULL; + } + } + + update_strong_cache(type, key, instance); + return instance; +} + +static void +zoneinfo_dealloc(PyObject *obj_self) +{ + PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; + + if (self->weakreflist != NULL) { + PyObject_ClearWeakRefs(obj_self); + } + + if (self->trans_list_utc != NULL) { + PyMem_Free(self->trans_list_utc); + } + + for (size_t i = 0; i < 2; i++) { + if (self->trans_list_wall[i] != NULL) { + PyMem_Free(self->trans_list_wall[i]); + } + } + + if (self->_ttinfos != NULL) { + for (size_t i = 0; i < self->num_ttinfos; ++i) { + xdecref_ttinfo(&(self->_ttinfos[i])); + } + PyMem_Free(self->_ttinfos); + } + + if (self->trans_ttinfos != NULL) { + PyMem_Free(self->trans_ttinfos); + } + + free_tzrule(&(self->tzrule_after)); + + Py_XDECREF(self->key); + Py_XDECREF(self->file_repr); + + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject * +zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *file_obj = NULL; + PyObject *file_repr = NULL; + PyObject *key = Py_None; + PyZoneInfo_ZoneInfo *self = NULL; + + static char *kwlist[] = {"", "key", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj, + &key)) { + return NULL; + } + + PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0)); + self = (PyZoneInfo_ZoneInfo *)obj_self; + if (self == NULL) { + return NULL; + } + + file_repr = PyUnicode_FromFormat("%R", file_obj); + if (file_repr == NULL) { + goto error; + } + + if (load_data(self, file_obj)) { + goto error; + } + + self->source = SOURCE_FILE; + self->file_repr = file_repr; + self->key = key; + Py_INCREF(key); + + return obj_self; +error: + Py_XDECREF(file_repr); + Py_XDECREF(self); + return NULL; +} + +static PyObject * +zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = {"key", NULL}; + PyObject *key = NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) { + return NULL; + } + + PyObject *out = zoneinfo_new_instance(cls, key); + if (out != NULL) { + ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE; + } + + return out; +} + +static PyObject * +zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs) +{ + PyObject *only_keys = NULL; + static char *kwlist[] = {"only_keys", NULL}; + + if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist, + &only_keys))) { + return NULL; + } + + PyTypeObject *type = (PyTypeObject *)cls; + PyObject *weak_cache = get_weak_cache(type); + + if (only_keys == NULL || only_keys == Py_None) { + PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL); + if (rv != NULL) { + Py_DECREF(rv); + } + + clear_strong_cache(type); + ZONEINFO_STRONG_CACHE = NULL; + } + else { + PyObject *item = NULL; + PyObject *pop = PyUnicode_FromString("pop"); + if (pop == NULL) { + return NULL; + } + + PyObject *iter = PyObject_GetIter(only_keys); + if (iter == NULL) { + Py_DECREF(pop); + return NULL; + } + + while ((item = PyIter_Next(iter))) { + // Remove from strong cache + eject_from_strong_cache(type, item); + + // Remove from weak cache + PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item, + Py_None, NULL); + + Py_DECREF(item); + if (tmp == NULL) { + break; + } + Py_DECREF(tmp); + } + Py_DECREF(iter); + Py_DECREF(pop); + } + + if (PyErr_Occurred()) { + return NULL; + } + + Py_RETURN_NONE; +} + +static PyObject * +zoneinfo_utcoffset(PyObject *self, PyObject *dt) +{ + _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); + if (tti == NULL) { + return NULL; + } + Py_INCREF(tti->utcoff); + return tti->utcoff; +} + +static PyObject * +zoneinfo_dst(PyObject *self, PyObject *dt) +{ + _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); + if (tti == NULL) { + return NULL; + } + Py_INCREF(tti->dstoff); + return tti->dstoff; +} + +static PyObject * +zoneinfo_tzname(PyObject *self, PyObject *dt) +{ + _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); + if (tti == NULL) { + return NULL; + } + Py_INCREF(tti->tzname); + return tti->tzname; +} + +#define HASTZINFO(p) (((_PyDateTime_BaseTZInfo *)(p))->hastzinfo) +#define GET_DT_TZINFO(p) \ + (HASTZINFO(p) ? ((PyDateTime_DateTime *)(p))->tzinfo : Py_None) + +static PyObject * +zoneinfo_fromutc(PyObject *obj_self, PyObject *dt) +{ + if (!PyDateTime_Check(dt)) { + PyErr_SetString(PyExc_TypeError, + "fromutc: argument must be a datetime"); + return NULL; + } + if (GET_DT_TZINFO(dt) != obj_self) { + PyErr_SetString(PyExc_ValueError, + "fromutc: dt.tzinfo " + "is not self"); + return NULL; + } + + PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; + + int64_t timestamp; + if (get_local_timestamp(dt, ×tamp)) { + return NULL; + } + size_t num_trans = self->num_transitions; + + _ttinfo *tti = NULL; + unsigned char fold = 0; + + if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) { + tti = self->ttinfo_before; + } + else if (num_trans == 0 || + timestamp > self->trans_list_utc[num_trans - 1]) { + tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp, + PyDateTime_GET_YEAR(dt), &fold); + + // Immediately after the last manual transition, the fold/gap is + // between self->trans_ttinfos[num_transitions - 1] and whatever + // ttinfo applies immediately after the last transition, not between + // the STD and DST rules in the tzrule_after, so we may need to + // adjust the fold value. + if (num_trans) { + _ttinfo *tti_prev = NULL; + if (num_trans == 1) { + tti_prev = self->ttinfo_before; + } + else { + tti_prev = self->trans_ttinfos[num_trans - 2]; + } + int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds; + if (diff > 0 && + timestamp < (self->trans_list_utc[num_trans - 1] + diff)) { + fold = 1; + } + } + } + else { + size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans); + _ttinfo *tti_prev = NULL; + + if (idx >= 2) { + tti_prev = self->trans_ttinfos[idx - 2]; + tti = self->trans_ttinfos[idx - 1]; + } + else { + tti_prev = self->ttinfo_before; + tti = self->trans_ttinfos[0]; + } + + // Detect fold + int64_t shift = + (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds); + if (shift > (timestamp - self->trans_list_utc[idx - 1])) { + fold = 1; + } + } + + PyObject *tmp = PyNumber_Add(dt, tti->utcoff); + if (tmp == NULL) { + return NULL; + } + + if (fold) { + if (PyDateTime_CheckExact(tmp)) { + ((PyDateTime_DateTime *)tmp)->fold = 1; + dt = tmp; + } + else { + PyObject *replace = PyObject_GetAttrString(tmp, "replace"); + PyObject *args = PyTuple_New(0); + PyObject *kwargs = PyDict_New(); + + Py_DECREF(tmp); + if (args == NULL || kwargs == NULL || replace == NULL) { + Py_XDECREF(args); + Py_XDECREF(kwargs); + Py_XDECREF(replace); + return NULL; + } + + dt = NULL; + if (!PyDict_SetItemString(kwargs, "fold", _PyLong_One)) { + dt = PyObject_Call(replace, args, kwargs); + } + + Py_DECREF(args); + Py_DECREF(kwargs); + Py_DECREF(replace); + + if (dt == NULL) { + return NULL; + } + } + } + else { + dt = tmp; + } + return dt; +} + +static PyObject * +zoneinfo_repr(PyZoneInfo_ZoneInfo *self) +{ + PyObject *rv = NULL; + const char *type_name = Py_TYPE((PyObject *)self)->tp_name; + if (!(self->key == Py_None)) { + rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key); + } + else { + assert(PyUnicode_Check(self->file_repr)); + rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name, + self->file_repr); + } + + return rv; +} + +static PyObject * +zoneinfo_str(PyZoneInfo_ZoneInfo *self) +{ + if (!(self->key == Py_None)) { + Py_INCREF(self->key); + return self->key; + } + else { + return zoneinfo_repr(self); + } +} + +/* Pickles the ZoneInfo object by key and source. + * + * ZoneInfo objects are pickled by reference to the TZif file that they came + * from, which means that the exact transitions may be different or the file + * may not un-pickle if the data has changed on disk in the interim. + * + * It is necessary to include a bit indicating whether or not the object + * was constructed from the cache, because from-cache objects will hit the + * unpickling process's cache, whereas no-cache objects will bypass it. + * + * Objects constructed from ZoneInfo.from_file cannot be pickled. + */ +static PyObject * +zoneinfo_reduce(PyObject *obj_self, PyObject *unused) +{ + PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; + if (self->source == SOURCE_FILE) { + // Objects constructed from files cannot be pickled. + PyObject *pickle = PyImport_ImportModule("pickle"); + if (pickle == NULL) { + return NULL; + } + + PyObject *pickle_error = + PyObject_GetAttrString(pickle, "PicklingError"); + Py_DECREF(pickle); + if (pickle_error == NULL) { + return NULL; + } + + PyErr_Format(pickle_error, + "Cannot pickle a ZoneInfo file from a file stream."); + Py_DECREF(pickle_error); + return NULL; + } + + unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0; + PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle"); + + if (constructor == NULL) { + return NULL; + } + + PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache); + Py_DECREF(constructor); + return rv; +} + +static PyObject * +zoneinfo__unpickle(PyTypeObject *cls, PyObject *args) +{ + PyObject *key; + unsigned char from_cache; + if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) { + return NULL; + } + + if (from_cache) { + PyObject *val_args = Py_BuildValue("(O)", key); + if (val_args == NULL) { + return NULL; + } + + PyObject *rv = zoneinfo_new(cls, val_args, NULL); + + Py_DECREF(val_args); + return rv; + } + else { + return zoneinfo_new_instance(cls, key); + } +} + +/* It is relatively expensive to construct new timedelta objects, and in most + * cases we're looking at a relatively small number of timedeltas, such as + * integer number of hours, etc. We will keep a cache so that we construct + * a minimal number of these. + * + * Possibly this should be replaced with an LRU cache so that it's not possible + * for the memory usage to explode from this, but in order for this to be a + * serious problem, one would need to deliberately craft a malicious time zone + * file with many distinct offsets. As of tzdb 2019c, loading every single zone + * fills the cache with ~450 timedeltas for a total size of ~12kB. + * + * This returns a new reference to the timedelta. + */ +static PyObject * +load_timedelta(long seconds) +{ + PyObject *rv = NULL; + PyObject *pyoffset = PyLong_FromLong(seconds); + if (pyoffset == NULL) { + return NULL; + } + int contains = PyDict_Contains(TIMEDELTA_CACHE, pyoffset); + if (contains == -1) { + goto error; + } + + if (!contains) { + PyObject *tmp = PyDateTimeAPI->Delta_FromDelta( + 0, seconds, 0, 1, PyDateTimeAPI->DeltaType); + + if (tmp == NULL) { + goto error; + } + + rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp); + Py_DECREF(tmp); + } + else { + rv = PyDict_GetItem(TIMEDELTA_CACHE, pyoffset); + } + + Py_DECREF(pyoffset); + Py_INCREF(rv); + return rv; +error: + Py_DECREF(pyoffset); + return NULL; +} + +/* Constructor for _ttinfo object - this starts by initializing the _ttinfo + * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially + * initialized _ttinfo objects. + */ +static int +build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out) +{ + out->utcoff = NULL; + out->dstoff = NULL; + out->tzname = NULL; + + out->utcoff_seconds = utcoffset; + out->utcoff = load_timedelta(utcoffset); + if (out->utcoff == NULL) { + return -1; + } + + out->dstoff = load_timedelta(dstoffset); + if (out->dstoff == NULL) { + return -1; + } + + out->tzname = tzname; + Py_INCREF(tzname); + + return 0; +} + +/* Decrease reference count on any non-NULL members of a _ttinfo */ +static void +xdecref_ttinfo(_ttinfo *ttinfo) +{ + if (ttinfo != NULL) { + Py_XDECREF(ttinfo->utcoff); + Py_XDECREF(ttinfo->dstoff); + Py_XDECREF(ttinfo->tzname); + } +} + +/* Equality function for _ttinfo. */ +static int +ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1) +{ + int rv; + if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) < + 1) { + goto end; + } + + if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) < + 1) { + goto end; + } + + if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) < + 1) { + goto end; + } +end: + return rv; +} + +/* Given a file-like object, this populates a ZoneInfo object + * + * The current version calls into a Python function to read the data from + * file into Python objects, and this translates those Python objects into + * C values and calculates derived values (e.g. dstoff) in C. + * + * This returns 0 on success and -1 on failure. + * + * The function will never return while `self` is partially initialized — + * the object only needs to be freed / deallocated if this succeeds. + */ +static int +load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj) +{ + PyObject *data_tuple = NULL; + + long *utcoff = NULL; + long *dstoff = NULL; + size_t *trans_idx = NULL; + unsigned char *isdst = NULL; + + self->trans_list_utc = NULL; + self->trans_list_wall[0] = NULL; + self->trans_list_wall[1] = NULL; + self->trans_ttinfos = NULL; + self->_ttinfos = NULL; + self->file_repr = NULL; + + size_t ttinfos_allocated = 0; + + data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj); + + if (data_tuple == NULL) { + goto error; + } + + if (!PyTuple_CheckExact(data_tuple)) { + PyErr_Format(PyExc_TypeError, "Invalid data result type: %r", + data_tuple); + goto error; + } + + // Unpack the data tuple + PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0); + if (trans_idx_list == NULL) { + goto error; + } + + PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1); + if (trans_utc == NULL) { + goto error; + } + + PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2); + if (utcoff_list == NULL) { + goto error; + } + + PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3); + if (isdst_list == NULL) { + goto error; + } + + PyObject *abbr = PyTuple_GetItem(data_tuple, 4); + if (abbr == NULL) { + goto error; + } + + PyObject *tz_str = PyTuple_GetItem(data_tuple, 5); + if (tz_str == NULL) { + goto error; + } + + // Load the relevant sizes + Py_ssize_t num_transitions = PyTuple_Size(trans_utc); + if (num_transitions == -1) { + goto error; + } + + Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list); + if (num_ttinfos == -1) { + goto error; + } + + self->num_transitions = (size_t)num_transitions; + self->num_ttinfos = (size_t)num_ttinfos; + + // Load the transition indices and list + self->trans_list_utc = + PyMem_Malloc(self->num_transitions * sizeof(int64_t)); + trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t)); + + for (Py_ssize_t i = 0; i < self->num_transitions; ++i) { + PyObject *num = PyTuple_GetItem(trans_utc, i); + if (num == NULL) { + goto error; + } + self->trans_list_utc[i] = PyLong_AsLongLong(num); + if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) { + goto error; + } + + num = PyTuple_GetItem(trans_idx_list, i); + if (num == NULL) { + goto error; + } + + Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num); + if (cur_trans_idx == -1) { + goto error; + } + + trans_idx[i] = (size_t)cur_trans_idx; + if (trans_idx[i] > self->num_ttinfos) { + PyErr_Format( + PyExc_ValueError, + "Invalid transition index found while reading TZif: %zd", + cur_trans_idx); + + goto error; + } + } + + // Load UTC offsets and isdst (size num_ttinfos) + utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long)); + isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char)); + + if (utcoff == NULL || isdst == NULL) { + goto error; + } + for (Py_ssize_t i = 0; i < self->num_ttinfos; ++i) { + PyObject *num = PyTuple_GetItem(utcoff_list, i); + if (num == NULL) { + goto error; + } + + utcoff[i] = PyLong_AsLong(num); + if (utcoff[i] == -1 && PyErr_Occurred()) { + goto error; + } + + num = PyTuple_GetItem(isdst_list, i); + if (num == NULL) { + goto error; + } + + int isdst_with_error = PyObject_IsTrue(num); + if (isdst_with_error == -1) { + goto error; + } + else { + isdst[i] = (unsigned char)isdst_with_error; + } + } + + dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long)); + if (dstoff == NULL) { + goto error; + } + + // Derive dstoff and trans_list_wall from the information we've loaded + utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions, + self->num_ttinfos); + + if (ts_to_local(trans_idx, self->trans_list_utc, utcoff, + self->trans_list_wall, self->num_ttinfos, + self->num_transitions)) { + goto error; + } + + // Build _ttinfo objects from utcoff, dstoff and abbr + self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo)); + for (size_t i = 0; i < self->num_ttinfos; ++i) { + PyObject *tzname = PyTuple_GetItem(abbr, i); + if (tzname == NULL) { + goto error; + } + + ttinfos_allocated++; + if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) { + goto error; + } + } + + // Build our mapping from transition to the ttinfo that applies + self->trans_ttinfos = + PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *)); + for (size_t i = 0; i < self->num_transitions; ++i) { + size_t ttinfo_idx = trans_idx[i]; + assert(ttinfo_idx < self->num_ttinfos); + self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]); + } + + // Set ttinfo_before to the first non-DST transition + for (size_t i = 0; i < self->num_ttinfos; ++i) { + if (!isdst[i]) { + self->ttinfo_before = &(self->_ttinfos[i]); + break; + } + } + + // If there are only DST ttinfos, pick the first one, if there are no + // ttinfos at all, set ttinfo_before to NULL + if (self->ttinfo_before == NULL && self->num_ttinfos > 0) { + self->ttinfo_before = &(self->_ttinfos[0]); + } + + if (tz_str != Py_None && PyObject_IsTrue(tz_str)) { + if (parse_tz_str(tz_str, &(self->tzrule_after))) { + goto error; + } + } + else { + if (!self->num_ttinfos) { + PyErr_Format(PyExc_ValueError, "No time zone information found."); + goto error; + } + + size_t idx; + if (!self->num_transitions) { + idx = self->num_ttinfos - 1; + } + else { + idx = trans_idx[self->num_transitions - 1]; + } + + _ttinfo *tti = &(self->_ttinfos[idx]); + build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL, + &(self->tzrule_after)); + + // We've abused the build_tzrule constructor to construct an STD-only + // rule mimicking whatever ttinfo we've picked up, but it's possible + // that the one we've picked up is a DST zone, so we need to make sure + // that the dstoff is set correctly in that case. + if (PyObject_IsTrue(tti->dstoff)) { + _ttinfo *tti_after = &(self->tzrule_after.std); + Py_DECREF(tti_after->dstoff); + tti_after->dstoff = tti->dstoff; + Py_INCREF(tti_after->dstoff); + } + } + + // Determine if this is a "fixed offset" zone, meaning that the output of + // the utcoffset, dst and tzname functions does not depend on the specific + // datetime passed. + // + // We make three simplifying assumptions here: + // + // 1. If tzrule_after is not std_only, it has transitions that might occur + // (it is possible to construct TZ strings that specify STD and DST but + // no transitions ever occur, such as AAA0BBB,0/0,J365/25). + // 2. If self->_ttinfos contains more than one _ttinfo object, the objects + // represent different offsets. + // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise + // fixed-offset zone with extra _ttinfos defined may appear to *not* be + // a fixed offset zone). + // + // Violations to these assumptions would be fairly exotic, and exotic + // zones should almost certainly not be used with datetime.time (the + // only thing that would be affected by this). + if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) { + self->fixed_offset = 0; + } + else if (self->num_ttinfos == 0) { + self->fixed_offset = 1; + } + else { + int constant_offset = + ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std); + if (constant_offset < 0) { + goto error; + } + else { + self->fixed_offset = constant_offset; + } + } + + int rv = 0; + goto cleanup; +error: + // These resources only need to be freed if we have failed, if we succeed + // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc + // method to free the relevant resources. + if (self->trans_list_utc != NULL) { + PyMem_Free(self->trans_list_utc); + self->trans_list_utc = NULL; + } + + for (size_t i = 0; i < 2; ++i) { + if (self->trans_list_wall[i] != NULL) { + PyMem_Free(self->trans_list_wall[i]); + self->trans_list_wall[i] = NULL; + } + } + + if (self->_ttinfos != NULL) { + for (size_t i = 0; i < ttinfos_allocated; ++i) { + xdecref_ttinfo(&(self->_ttinfos[i])); + } + PyMem_Free(self->_ttinfos); + self->_ttinfos = NULL; + } + + if (self->trans_ttinfos != NULL) { + PyMem_Free(self->trans_ttinfos); + self->trans_ttinfos = NULL; + } + + rv = -1; +cleanup: + Py_XDECREF(data_tuple); + + if (utcoff != NULL) { + PyMem_Free(utcoff); + } + + if (dstoff != NULL) { + PyMem_Free(dstoff); + } + + if (isdst != NULL) { + PyMem_Free(isdst); + } + + if (trans_idx != NULL) { + PyMem_Free(trans_idx); + } + + return rv; +} + +/* Function to calculate the local timestamp of a transition from the year. */ +int64_t +calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year) +{ + CalendarRule *self = (CalendarRule *)base_self; + + // We want (year, month, day of month); we have year and month, but we + // need to turn (week, day-of-week) into day-of-month + // + // Week 1 is the first week in which day `day` (where 0 = Sunday) appears. + // Week 5 represents the last occurrence of day `day`, so we need to know + // the first weekday of the month and the number of days in the month. + int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7; + uint8_t days_in_month = DAYS_IN_MONTH[self->month]; + if (self->month == 2 && is_leap_year(year)) { + days_in_month += 1; + } + + // This equation seems magical, so I'll break it down: + // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day + // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent + // because this math is mod 7 + // 2. Get first day - desired day mod 7 (adjusting by 7 for negative + // numbers so that -1 % 7 = 6). + // 3. Add 1 because month days are a 1-based index. + int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7; + if (month_day < 0) { + month_day += 7; + } + month_day += 1; + + // Now use a 0-based index version of `week` to calculate the w-th + // occurrence of `day` + month_day += ((int8_t)(self->week) - 1) * 7; + + // month_day will only be > days_in_month if w was 5, and `w` means "last + // occurrence of `d`", so now we just check if we over-shot the end of the + // month and if so knock off 1 week. + if (month_day > days_in_month) { + month_day -= 7; + } + + int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL; + return ((ordinal * 86400) + (int64_t)(self->hour * 3600) + + (int64_t)(self->minute * 60) + (int64_t)(self->second)); +} + +/* Constructor for CalendarRule. */ +int +calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour, + int8_t minute, int8_t second, CalendarRule *out) +{ + // These bounds come from the POSIX standard, which describes an Mm.n.d + // rule as: + // + // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <= + // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which + // may occur in either the fourth or the fifth week). Week 1 is the first + // week in which the d'th day occurs. Day zero is Sunday. + if (month <= 0 || month > 12) { + PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]"); + return -1; + } + + if (week <= 0 || week > 5) { + PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]"); + return -1; + } + + // day is an unsigned integer, so day < 0 should always return false, but + // if day's type changes to a signed integer *without* changing this value, + // it may create a bug. Considering that the compiler should be able to + // optimize out the first comparison if day is an unsigned integer anyway, + // we will leave this comparison in place and disable the compiler warning. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" + if (day < 0 || day > 6) { +#pragma GCC diagnostic pop + PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]"); + return -1; + } + + TransitionRuleType base = {&calendarrule_year_to_timestamp}; + + CalendarRule new_offset = { + .base = base, + .month = month, + .week = week, + .day = day, + .hour = hour, + .minute = minute, + .second = second, + }; + + *out = new_offset; + return 0; +} + +/* Function to calculate the local timestamp of a transition from the year. + * + * This translates the day of the year into a local timestamp — either a + * 1-based Julian day, not including leap days, or the 0-based year-day, + * including leap days. + * */ +int64_t +dayrule_year_to_timestamp(TransitionRuleType *base_self, int year) +{ + // The function signature requires a TransitionRuleType pointer, but this + // function is only applicable to DayRule* objects. + DayRule *self = (DayRule *)base_self; + + // ymd_to_ord calculates the number of days since 0001-01-01, but we want + // to know the number of days since 1970-01-01, so we must subtract off + // the equivalent of ymd_to_ord(1970, 1, 1). + // + // We subtract off an additional 1 day to account for January 1st (we want + // the number of full days *before* the date of the transition - partial + // days are accounted for in the hour, minute and second portions. + int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1; + + // The Julian day specification skips over February 29th in leap years, + // from the POSIX standard: + // + // Leap days shall not be counted. That is, in all years-including leap + // years-February 28 is day 59 and March 1 is day 60. It is impossible to + // refer explicitly to the occasional February 29. + // + // This is actually more useful than you'd think — if you want a rule that + // always transitions on a given calendar day (other than February 29th), + // you would use a Julian day, e.g. J91 always refers to April 1st and J365 + // always refers to December 31st. + unsigned int day = self->day; + if (self->julian && day >= 59 && is_leap_year(year)) { + day += 1; + } + + return ((days_before_year + day) * 86400) + (self->hour * 3600) + + (self->minute * 60) + self->second; +} + +/* Constructor for DayRule. */ +static int +dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute, + int8_t second, DayRule *out) +{ + // The POSIX standard specifies that Julian days must be in the range (1 <= + // n <= 365) and that non-Julian (they call it "0-based Julian") days must + // be in the range (0 <= n <= 365). + if (day < julian || day > 365) { + PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u", + julian, day); + return -1; + } + + TransitionRuleType base = { + &dayrule_year_to_timestamp, + }; + + DayRule tmp = { + .base = base, + .julian = julian, + .day = day, + .hour = hour, + .minute = minute, + .second = second, + }; + + *out = tmp; + + return 0; +} + +/* Calculate the start and end rules for a _tzrule in the given year. */ +static void +tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end) +{ + assert(rule->start != NULL); + assert(rule->end != NULL); + *start = rule->start->year_to_timestamp(rule->start, year); + *end = rule->end->year_to_timestamp(rule->end, year); +} + +/* Calculate the _ttinfo that applies at a given local time from a _tzrule. + * + * This takes a local timestamp and fold for disambiguation purposes; the year + * could technically be calculated from the timestamp, but given that the + * callers of this function already have the year information accessible from + * the datetime struct, it is taken as an additional parameter to reduce + * unncessary calculation. + * */ +static _ttinfo * +find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year) +{ + if (rule->std_only) { + return &(rule->std); + } + + int64_t start, end; + uint8_t isdst; + + tzrule_transitions(rule, year, &start, &end); + + // With fold = 0, the period (denominated in local time) with the smaller + // offset starts at the end of the gap and ends at the end of the fold; + // with fold = 1, it runs from the start of the gap to the beginning of the + // fold. + // + // So in order to determine the DST boundaries we need to know both the + // fold and whether DST is positive or negative (rare), and it turns out + // that this boils down to fold XOR is_positive. + if (fold == (rule->dst_diff >= 0)) { + end -= rule->dst_diff; + } + else { + start += rule->dst_diff; + } + + if (start < end) { + isdst = (ts >= start) && (ts < end); + } + else { + isdst = (ts < end) || (ts >= start); + } + + if (isdst) { + return &(rule->dst); + } + else { + return &(rule->std); + } +} + +/* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time. + * + * This function can determine the _ttinfo that applies at a given epoch time, + * (analogous to trans_list_utc), and whether or not the datetime is in a fold. + * This is to be used in the .fromutc() function. + * + * The year is technically a redundant parameter, because it can be calculated + * from the timestamp, but all callers of this function should have the year + * in the datetime struct anyway, so taking it as a parameter saves unnecessary + * calculation. + **/ +static _ttinfo * +find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year, + unsigned char *fold) +{ + if (rule->std_only) { + *fold = 0; + return &(rule->std); + } + + int64_t start, end; + uint8_t isdst; + tzrule_transitions(rule, year, &start, &end); + start -= rule->std.utcoff_seconds; + end -= rule->dst.utcoff_seconds; + + if (start < end) { + isdst = (ts >= start) && (ts < end); + } + else { + isdst = (ts < end) || (ts >= start); + } + + // For positive DST, the ambiguous period is one dst_diff after the end of + // DST; for negative DST, the ambiguous period is one dst_diff before the + // start of DST. + int64_t ambig_start, ambig_end; + if (rule->dst_diff > 0) { + ambig_start = end; + ambig_end = end + rule->dst_diff; + } + else { + ambig_start = start; + ambig_end = start - rule->dst_diff; + } + + *fold = (ts >= ambig_start) && (ts < ambig_end); + + if (isdst) { + return &(rule->dst); + } + else { + return &(rule->std); + } +} + +/* Parse a TZ string in the format specified by the POSIX standard: + * + * std offset[dst[offset],start[/time],end[/time]] + * + * std and dst must be 3 or more characters long and must not contain a + * leading colon, embedded digits, commas, nor a plus or minus signs; The + * spaces between "std" and "offset" are only for display and are not actually + * present in the string. + * + * The format of the offset is ``[+|-]hh[:mm[:ss]]`` + * + * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3: + * + * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html + */ +static int +parse_tz_str(PyObject *tz_str_obj, _tzrule *out) +{ + PyObject *std_abbr = NULL; + PyObject *dst_abbr = NULL; + TransitionRuleType *start = NULL; + TransitionRuleType *end = NULL; + long std_offset, dst_offset; + + char *tz_str = PyBytes_AsString(tz_str_obj); + if (tz_str == NULL) { + return -1; + } + char *p = tz_str; + + // Read the `std` abbreviation, which must be at least 3 characters long. + ssize_t num_chars = parse_abbr(p, &std_abbr); + if (num_chars < 1) { + PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj); + goto error; + } + + p += num_chars; + + // Now read the STD offset, which is required + num_chars = parse_tz_delta(p, &std_offset); + if (num_chars < 0) { + PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj); + goto error; + } + p += num_chars; + + // If the string ends here, there is no DST, otherwise we must parse the + // DST abbreviation and start and end dates and times. + if (*p == '\0') { + goto complete; + } + + num_chars = parse_abbr(p, &dst_abbr); + if (num_chars < 1) { + PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj); + goto error; + } + p += num_chars; + + if (*p == ',') { + // From the POSIX standard: + // + // If no offset follows dst, the alternative time is assumed to be one + // hour ahead of standard time. + dst_offset = std_offset + 3600; + } + else { + num_chars = parse_tz_delta(p, &dst_offset); + if (num_chars < 0) { + PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R", + tz_str_obj); + goto error; + } + + p += num_chars; + } + + TransitionRuleType **transitions[2] = {&start, &end}; + for (size_t i = 0; i < 2; ++i) { + if (*p != ',') { + PyErr_Format(PyExc_ValueError, + "Missing transition rules in TZ string: %R", + tz_str_obj); + goto error; + } + p++; + + num_chars = parse_transition_rule(p, transitions[i]); + if (num_chars < 0) { + PyErr_Format(PyExc_ValueError, + "Malformed transition rule in TZ string: %R", + tz_str_obj); + goto error; + } + p += num_chars; + } + + if (*p != '\0') { + PyErr_Format(PyExc_ValueError, + "Extraneous characters at end of TZ string: %R", + tz_str_obj); + goto error; + } + +complete: + build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out); + Py_DECREF(std_abbr); + Py_XDECREF(dst_abbr); + + return 0; +error: + Py_XDECREF(std_abbr); + if (dst_abbr != NULL && dst_abbr != Py_None) { + Py_DECREF(dst_abbr); + } + + if (start != NULL) { + PyMem_Free(start); + } + + if (end != NULL) { + PyMem_Free(end); + } + + return -1; +} + +static ssize_t +parse_uint(const char *const p) +{ + if (!isdigit(*p)) { + return -1; + } + + return (*p) - '0'; +} + +/* Parse the STD and DST abbreviations from a TZ string. */ +static ssize_t +parse_abbr(const char *const p, PyObject **abbr) +{ + const char *ptr = p; + char buff = *ptr; + const char *str_start; + const char *str_end; + + if (*ptr == '<') { + ptr++; + str_start = ptr; + while ((buff = *ptr) != '>') { + // From the POSIX standard: + // + // In the quoted form, the first character shall be the less-than + // ( '<' ) character and the last character shall be the + // greater-than ( '>' ) character. All characters between these + // quoting characters shall be alphanumeric characters from the + // portable character set in the current locale, the plus-sign ( + // '+' ) character, or the minus-sign ( '-' ) character. The std + // and dst fields in this case shall not include the quoting + // characters. + if (!isalpha(buff) && !isdigit(buff) && buff != '+' && + buff != '-') { + return -1; + } + ptr++; + } + str_end = ptr; + ptr++; + } + else { + str_start = p; + // From the POSIX standard: + // + // In the unquoted form, all characters in these fields shall be + // alphabetic characters from the portable character set in the + // current locale. + while (isalpha(*ptr)) { + ptr++; + } + str_end = ptr; + } + + *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start); + if (abbr == NULL) { + return -1; + } + + return ptr - p; +} + +/* Parse a UTC offset from a TZ str. */ +static ssize_t +parse_tz_delta(const char *const p, long *total_seconds) +{ + // From the POSIX spec: + // + // Indicates the value added to the local time to arrive at Coordinated + // Universal Time. The offset has the form: + // + // hh[:mm[:ss]] + // + // One or more digits may be used; the value is always interpreted as a + // decimal number. + // + // The POSIX spec says that the values for `hour` must be between 0 and 24 + // hours, but RFC 8536 §3.3.1 specifies that the hours part of the + // transition times may be signed and range from -167 to 167. + long sign = -1; + long hours = 0; + long minutes = 0; + long seconds = 0; + + const char *ptr = p; + char buff = *ptr; + if (buff == '-' || buff == '+') { + // Negative numbers correspond to *positive* offsets, from the spec: + // + // If preceded by a '-', the timezone shall be east of the Prime + // Meridian; otherwise, it shall be west (which may be indicated by + // an optional preceding '+' ). + if (buff == '-') { + sign = 1; + } + + ptr++; + } + + // The hour can be 1 or 2 numeric characters + for (size_t i = 0; i < 2; ++i) { + buff = *ptr; + if (!isdigit(buff)) { + if (i == 0) { + return -1; + } + else { + break; + } + } + + hours *= 10; + hours += buff - '0'; + ptr++; + } + + if (hours > 24 || hours < 0) { + return -1; + } + + // Minutes and seconds always of the format ":dd" + long *outputs[2] = {&minutes, &seconds}; + for (size_t i = 0; i < 2; ++i) { + if (*ptr != ':') { + goto complete; + } + ptr++; + + for (size_t j = 0; j < 2; ++j) { + buff = *ptr; + if (!isdigit(buff)) { + return -1; + } + *(outputs[i]) *= 10; + *(outputs[i]) += buff - '0'; + ptr++; + } + } + +complete: + *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds); + + return ptr - p; +} + +/* Parse the date portion of a transition rule. */ +static ssize_t +parse_transition_rule(const char *const p, TransitionRuleType **out) +{ + // The full transition rule indicates when to change back and forth between + // STD and DST, and has the form: + // + // date[/time],date[/time] + // + // This function parses an individual date[/time] section, and returns + // the number of characters that contributed to the transition rule. This + // does not include the ',' at the end of the first rule. + // + // The POSIX spec states that if *time* is not given, the default is 02:00. + const char *ptr = p; + int8_t hour = 2; + int8_t minute = 0; + int8_t second = 0; + + // Rules come in one of three flavors: + // + // 1. Jn: Julian day n, with no leap days. + // 2. n: Day of year (0-based, with leap days) + // 3. Mm.n.d: Specifying by month, week and day-of-week. + + if (*ptr == 'M') { + uint8_t month, week, day; + ptr++; + ssize_t tmp = parse_uint(ptr); + if (tmp < 0) { + return -1; + } + month = (uint8_t)tmp; + ptr++; + if (*ptr != '.') { + tmp = parse_uint(ptr); + if (tmp < 0) { + return -1; + } + + month *= 10; + month += (uint8_t)tmp; + ptr++; + } + + uint8_t *values[2] = {&week, &day}; + for (size_t i = 0; i < 2; ++i) { + if (*ptr != '.') { + return -1; + } + ptr++; + + tmp = parse_uint(ptr); + if (tmp < 0) { + return -1; + } + ptr++; + + *(values[i]) = tmp; + } + + if (*ptr == '/') { + ptr++; + ssize_t num_chars = + parse_transition_time(ptr, &hour, &minute, &second); + if (num_chars < 0) { + return -1; + } + ptr += num_chars; + } + + CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule)); + if (rv == NULL) { + return -1; + } + + if (calendarrule_new(month, week, day, hour, minute, second, rv)) { + PyMem_Free(rv); + return -1; + } + + *out = (TransitionRuleType *)rv; + } + else { + uint8_t julian = 0; + unsigned int day = 0; + if (*ptr == 'J') { + julian = 1; + ptr++; + } + + for (size_t i = 0; i < 3; ++i) { + if (!isdigit(*ptr)) { + if (i == 0) { + return -1; + } + break; + } + day *= 10; + day += (*ptr) - '0'; + ptr++; + } + + if (*ptr == '/') { + ptr++; + ssize_t num_chars = + parse_transition_time(ptr, &hour, &minute, &second); + if (num_chars < 0) { + return -1; + } + ptr += num_chars; + } + + DayRule *rv = PyMem_Calloc(1, sizeof(DayRule)); + if (rv == NULL) { + return -1; + } + + if (dayrule_new(julian, day, hour, minute, second, rv)) { + PyMem_Free(rv); + return -1; + } + *out = (TransitionRuleType *)rv; + } + + return ptr - p; +} + +/* Parse the time portion of a transition rule (e.g. following an /) */ +static ssize_t +parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, + int8_t *second) +{ + // From the spec: + // + // The time has the same format as offset except that no leading sign + // ( '-' or '+' ) is allowed. + // + // The format for the offset is: + // + // h[h][:mm[:ss]] + // + // RFC 8536 also allows transition times to be signed and to range from + // -167 to +167, but the current version only supports [0, 99]. + // + // TODO: Support the full range of transition hours. + int8_t *components[3] = {hour, minute, second}; + const char *ptr = p; + int8_t sign = 1; + + if (*ptr == '-' || *ptr == '+') { + if (*ptr == '-') { + sign = -1; + } + ptr++; + } + + for (size_t i = 0; i < 3; ++i) { + if (i > 0) { + if (*ptr != ':') { + break; + } + ptr++; + } + + uint8_t buff = 0; + for (size_t j = 0; j < 2; j++) { + if (!isdigit(*ptr)) { + if (i == 0 && j > 0) { + break; + } + return -1; + } + + buff *= 10; + buff += (*ptr) - '0'; + ptr++; + } + + *(components[i]) = sign * buff; + } + + return ptr - p; +} + +/* Constructor for a _tzrule. + * + * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which + * case `dst_offset` will be ignored and `start` and `end` are expected to be + * NULL as well. + * + * Returns 0 on success. + */ +static int +build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset, + long dst_offset, TransitionRuleType *start, + TransitionRuleType *end, _tzrule *out) +{ + _tzrule rv = {0}; + + rv.start = start; + rv.end = end; + + if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) { + goto error; + } + + if (dst_abbr != NULL) { + rv.dst_diff = dst_offset - std_offset; + if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) { + goto error; + } + } + else { + rv.std_only = 1; + } + + *out = rv; + + return 0; +error: + xdecref_ttinfo(&rv.std); + xdecref_ttinfo(&rv.dst); + return -1; +} + +/* Destructor for _tzrule. */ +static void +free_tzrule(_tzrule *tzrule) +{ + xdecref_ttinfo(&(tzrule->std)); + if (!tzrule->std_only) { + xdecref_ttinfo(&(tzrule->dst)); + } + + if (tzrule->start != NULL) { + PyMem_Free(tzrule->start); + } + + if (tzrule->end != NULL) { + PyMem_Free(tzrule->end); + } +} + +/* Calculate DST offsets from transitions and UTC offsets + * + * This is necessary because each C `ttinfo` only contains the UTC offset, + * time zone abbreviation and an isdst boolean - it does not include the + * amount of the DST offset, but we need the amount for the dst() function. + * + * Thus function uses heuristics to infer what the offset should be, so it + * is not guaranteed that this will work for all zones. If we cannot assign + * a value for a given DST offset, we'll assume it's 1H rather than 0H, so + * bool(dt.dst()) will always match ttinfo.isdst. + */ +static void +utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs, + unsigned char *isdsts, size_t num_transitions, + size_t num_ttinfos) +{ + size_t dst_count = 0; + size_t dst_found = 0; + for (size_t i = 0; i < num_ttinfos; ++i) { + dst_count++; + } + + for (size_t i = 1; i < num_transitions; ++i) { + if (dst_count == dst_found) { + break; + } + + size_t idx = trans_idx[i]; + size_t comp_idx = trans_idx[i - 1]; + + // Only look at DST offsets that have nto been assigned already + if (!isdsts[idx] || dstoffs[idx] != 0) { + continue; + } + + long dstoff = 0; + long utcoff = utcoffs[idx]; + + if (!isdsts[comp_idx]) { + dstoff = utcoff - utcoffs[comp_idx]; + } + + if (!dstoff && idx < (num_ttinfos - 1)) { + comp_idx = trans_idx[i + 1]; + + // If the following transition is also DST and we couldn't find + // the DST offset by this point, we're going to have to skip it + // and hope this transition gets assigned later + if (isdsts[comp_idx]) { + continue; + } + + dstoff = utcoff - utcoffs[comp_idx]; + } + + if (dstoff) { + dst_found++; + dstoffs[idx] = dstoff; + } + } + + if (dst_found < dst_count) { + // If there are time zones we didn't find a value for, we'll end up + // with dstoff = 0 for something where isdst=1. This is obviously + // wrong — one hour will be a much better guess than 0. + for (size_t idx = 0; idx < num_ttinfos; ++idx) { + if (isdsts[idx] && !dstoffs[idx]) { + dstoffs[idx] = 3600; + } + } + } +} + +#define _swap(x, y, buffer) \ + buffer = x; \ + x = y; \ + y = buffer; + +/* Calculate transitions in local time from UTC time and offsets. + * + * We want to know when each transition occurs, denominated in the number of + * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in + * *local time* (note: this is *not* equivalent to the output of + * datetime.timestamp, which is the total number of seconds actual elapsed + * since 1970-01-01T00:00:00Z in UTC). + * + * This is an ambiguous question because "local time" can be ambiguous — but it + * is disambiguated by the `fold` parameter, so we allocate two arrays: + * + * trans_local[0]: The wall-time transitions for fold=0 + * trans_local[1]: The wall-time transitions for fold=1 + * + * This returns 0 on success and a negative number of failure. The trans_local + * arrays must be freed if they are not NULL. + */ +static int +ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, + int64_t *trans_local[2], size_t num_ttinfos, + size_t num_transitions) +{ + if (num_transitions == 0) { + return 0; + } + + // Copy the UTC transitions into each array to be modified in place later + for (size_t i = 0; i < 2; ++i) { + trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t)); + if (trans_local[i] == NULL) { + return -1; + } + + memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t)); + } + + int64_t offset_0, offset_1, buff; + if (num_ttinfos > 1) { + offset_0 = utcoff[0]; + offset_1 = utcoff[trans_idx[0]]; + + if (offset_1 > offset_0) { + _swap(offset_0, offset_1, buff); + } + } + else { + offset_0 = utcoff[0]; + offset_1 = utcoff[0]; + } + + trans_local[0][0] += offset_0; + trans_local[1][0] += offset_1; + + for (size_t i = 1; i < num_transitions; ++i) { + offset_0 = utcoff[trans_idx[i - 1]]; + offset_1 = utcoff[trans_idx[i]]; + + if (offset_1 > offset_0) { + _swap(offset_1, offset_0, buff); + } + + trans_local[0][i] += offset_0; + trans_local[1][i] += offset_1; + } + + return 0; +} + +/* Simple bisect_right binary search implementation */ +static size_t +_bisect(const int64_t value, const int64_t *arr, size_t size) +{ + size_t lo = 0; + size_t hi = size; + size_t m; + + while (lo < hi) { + m = (lo + hi) / 2; + if (arr[m] > value) { + hi = m; + } + else { + lo = m + 1; + } + } + + return hi; +} + +/* Find the ttinfo rules that apply at a given local datetime. */ +static _ttinfo * +find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt) +{ + // datetime.time has a .tzinfo attribute that passes None as the dt + // argument; it only really has meaning for fixed-offset zones. + if (dt == Py_None) { + if (self->fixed_offset) { + return &(self->tzrule_after.std); + } + else { + return &NO_TTINFO; + } + } + + int64_t ts; + if (get_local_timestamp(dt, &ts)) { + return NULL; + } + + unsigned char fold = PyDateTime_DATE_GET_FOLD(dt); + assert(fold < 2); + int64_t *local_transitions = self->trans_list_wall[fold]; + size_t num_trans = self->num_transitions; + + if (num_trans && ts < local_transitions[0]) { + return self->ttinfo_before; + } + else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) { + return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold, + PyDateTime_GET_YEAR(dt)); + } + else { + size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1; + assert(idx < self->num_transitions); + return self->trans_ttinfos[idx]; + } +} + +static int +is_leap_year(int year) +{ + const unsigned int ayear = (unsigned int)year; + return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0); +} + +/* Calculates ordinal datetime from year, month and day. */ +static int +ymd_to_ord(int y, int m, int d) +{ + y -= 1; + int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400); + int yearday = DAYS_BEFORE_MONTH[m]; + if (m > 2 && is_leap_year(y + 1)) { + yearday += 1; + } + + return days_before_year + yearday + d; +} + +/* Calculate the number of seconds since 1970-01-01 in local time. + * + * This gets a datetime in the same "units" as self->trans_list_wall so that we + * can easily determine which transitions a datetime falls between. See the + * comment above ts_to_local for more information. + * */ +static int +get_local_timestamp(PyObject *dt, int64_t *local_ts) +{ + assert(local_ts != NULL); + + int hour, minute, second; + int ord; + if (PyDateTime_CheckExact(dt)) { + int y = PyDateTime_GET_YEAR(dt); + int m = PyDateTime_GET_MONTH(dt); + int d = PyDateTime_GET_DAY(dt); + hour = PyDateTime_DATE_GET_HOUR(dt); + minute = PyDateTime_DATE_GET_MINUTE(dt); + second = PyDateTime_DATE_GET_SECOND(dt); + + ord = ymd_to_ord(y, m, d); + } + else { + PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL); + if (num == NULL) { + return -1; + } + + ord = PyLong_AsLong(num); + Py_DECREF(num); + if (ord == -1 && PyErr_Occurred()) { + return -1; + } + + num = PyObject_GetAttrString(dt, "hour"); + if (num == NULL) { + return -1; + } + hour = PyLong_AsLong(num); + Py_DECREF(num); + if (hour == -1) { + return -1; + } + + num = PyObject_GetAttrString(dt, "minute"); + if (num == NULL) { + return -1; + } + minute = PyLong_AsLong(num); + Py_DECREF(num); + if (minute == -1) { + return -1; + } + + num = PyObject_GetAttrString(dt, "second"); + if (num == NULL) { + return -1; + } + second = PyLong_AsLong(num); + Py_DECREF(num); + if (second == -1) { + return -1; + } + } + + *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 + + (int64_t)(hour * 3600 + minute * 60 + second); + + return 0; +} + +///// +// Functions for cache handling + +/* Constructor for StrongCacheNode */ +static StrongCacheNode * +strong_cache_node_new(PyObject *key, PyObject *zone) +{ + StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode)); + if (node == NULL) { + return NULL; + } + + Py_INCREF(key); + Py_INCREF(zone); + + node->next = NULL; + node->prev = NULL; + node->key = key; + node->zone = zone; + + return node; +} + +/* Destructor for StrongCacheNode */ +void +strong_cache_node_free(StrongCacheNode *node) +{ + Py_XDECREF(node->key); + Py_XDECREF(node->zone); + + PyMem_Free(node); +} + +/* Frees all nodes at or after a specified root in the strong cache. + * + * This can be used on the root node to free the entire cache or it can be used + * to clear all nodes that have been expired (which, if everything is going + * right, will actually only be 1 node at a time). + */ +void +strong_cache_free(StrongCacheNode *root) +{ + StrongCacheNode *node = root; + StrongCacheNode *next_node; + while (node != NULL) { + next_node = node->next; + strong_cache_node_free(node); + + node = next_node; + } +} + +/* Removes a node from the cache and update its neighbors. + * + * This is used both when ejecting a node from the cache and when moving it to + * the front of the cache. + */ +static void +remove_from_strong_cache(StrongCacheNode *node) +{ + if (ZONEINFO_STRONG_CACHE == node) { + ZONEINFO_STRONG_CACHE = node->next; + } + + if (node->prev != NULL) { + node->prev->next = node->next; + } + + if (node->next != NULL) { + node->next->prev = node->prev; + } + + node->next = NULL; + node->prev = NULL; +} + +/* Retrieves the node associated with a key, if it exists. + * + * This traverses the strong cache until it finds a matching key and returns a + * pointer to the relevant node if found. Returns NULL if no node is found. + * + * root may be NULL, indicating an empty cache. + */ +static StrongCacheNode * +find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key) +{ + const StrongCacheNode *node = root; + while (node != NULL) { + if (PyObject_RichCompareBool(key, node->key, Py_EQ)) { + return (StrongCacheNode *)node; + } + + node = node->next; + } + + return NULL; +} + +/* Ejects a given key from the class's strong cache, if applicable. + * + * This function is used to enable the per-key functionality in clear_cache. + */ +static void +eject_from_strong_cache(const PyTypeObject *const type, PyObject *key) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return; + } + + StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key); + if (node != NULL) { + remove_from_strong_cache(node); + + strong_cache_node_free(node); + } +} + +/* Moves a node to the front of the LRU cache. + * + * The strong cache is an LRU cache, so whenever a given node is accessed, if + * it is not at the front of the cache, it needs to be moved there. + */ +static void +move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node) +{ + StrongCacheNode *root_p = *root; + if (root_p == node) { + return; + } + + remove_from_strong_cache(node); + + node->prev = NULL; + node->next = root_p; + + if (root_p != NULL) { + root_p->prev = node; + } + + *root = node; +} + +/* Retrieves a ZoneInfo from the strong cache if it's present. + * + * This function finds the ZoneInfo by key and if found will move the node to + * the front of the LRU cache and return a new reference to it. It returns NULL + * if the key is not in the cache. + * + * The strong cache is currently only implemented for the base class, so this + * always returns a cache miss for subclasses. + */ +static PyObject * +zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return NULL; // Strong cache currently only implemented for base class + } + + StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key); + + if (node != NULL) { + move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node); + Py_INCREF(node->zone); + return node->zone; + } + + return NULL; // Cache miss +} + +/* Inserts a new key into the strong LRU cache. + * + * This function is only to be used after a cache miss — it creates a new node + * at the front of the cache and ejects any stale entries (keeping the size of + * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE). + */ +static void +update_strong_cache(const PyTypeObject *const type, PyObject *key, + PyObject *zone) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return; + } + + StrongCacheNode *new_node = strong_cache_node_new(key, zone); + + move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node); + + StrongCacheNode *node = new_node->next; + for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) { + if (node == NULL) { + return; + } + node = node->next; + } + + // Everything beyond this point needs to be freed + if (node != NULL) { + if (node->prev != NULL) { + node->prev->next = NULL; + } + strong_cache_free(node); + } +} + +/* Clears all entries into a type's strong cache. + * + * Because the strong cache is not implemented for subclasses, this is a no-op + * for everything except the base class. + */ +void +clear_strong_cache(const PyTypeObject *const type) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return; + } + + strong_cache_free(ZONEINFO_STRONG_CACHE); +} + +static PyObject * +new_weak_cache() +{ + PyObject *weakref_module = PyImport_ImportModule("weakref"); + if (weakref_module == NULL) { + return NULL; + } + + PyObject *weak_cache = + PyObject_CallMethod(weakref_module, "WeakValueDictionary", ""); + Py_DECREF(weakref_module); + return weak_cache; +} + +static int +initialize_caches() +{ + if (TIMEDELTA_CACHE == NULL) { + TIMEDELTA_CACHE = PyDict_New(); + } + else { + Py_INCREF(TIMEDELTA_CACHE); + } + + if (TIMEDELTA_CACHE == NULL) { + return -1; + } + + if (ZONEINFO_WEAK_CACHE == NULL) { + ZONEINFO_WEAK_CACHE = new_weak_cache(); + } + else { + Py_INCREF(ZONEINFO_WEAK_CACHE); + } + + if (ZONEINFO_WEAK_CACHE == NULL) { + return -1; + } + + return 0; +} + +static PyObject * +zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs) +{ + PyObject *weak_cache = new_weak_cache(); + if (weak_cache == NULL) { + return NULL; + } + + PyObject_SetAttrString((PyObject *)cls, "_weak_cache", weak_cache); + Py_RETURN_NONE; +} + +///// +// Specify the ZoneInfo type +static PyMethodDef zoneinfo_methods[] = { + {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache, + METH_VARARGS | METH_KEYWORDS | METH_CLASS, + PyDoc_STR("Clear the ZoneInfo cache.")}, + {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache, + METH_VARARGS | METH_KEYWORDS | METH_CLASS, + PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")}, + {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file, + METH_VARARGS | METH_KEYWORDS | METH_CLASS, + PyDoc_STR("Create a ZoneInfo file from a file object.")}, + {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O, + PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at " + "the given datetime.")}, + {"dst", (PyCFunction)zoneinfo_dst, METH_O, + PyDoc_STR("Retrieve a timedelta representing the amount of DST applied " + "in a zone at the given datetime.")}, + {"tzname", (PyCFunction)zoneinfo_tzname, METH_O, + PyDoc_STR("Retrieve a string containing the abbreviation for the time " + "zone that applies in a zone at a given datetime.")}, + {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O, + PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted " + "datetime in local time.")}, + {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS, + PyDoc_STR("Function for serialization with the pickle protocol.")}, + {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS, + PyDoc_STR("Private method used in unpickling.")}, + {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass, + METH_VARARGS | METH_KEYWORDS, + PyDoc_STR("Function to initialize subclasses.")}, + {NULL} /* Sentinel */ +}; + +static PyMemberDef zoneinfo_members[] = { + {.name = "key", + .offset = offsetof(PyZoneInfo_ZoneInfo, key), + .type = T_OBJECT_EX, + .flags = READONLY, + .doc = NULL}, + {NULL}, /* Sentinel */ +}; + +static PyTypeObject PyZoneInfo_ZoneInfoType = { + PyVarObject_HEAD_INIT(NULL, 0) // + .tp_name = "zoneinfo.ZoneInfo", + .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo), + .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist), + .tp_repr = (reprfunc)zoneinfo_repr, + .tp_str = (reprfunc)zoneinfo_str, + .tp_getattro = PyObject_GenericGetAttr, + .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE), + /* .tp_doc = zoneinfo_doc, */ + .tp_methods = zoneinfo_methods, + .tp_members = zoneinfo_members, + .tp_new = zoneinfo_new, + .tp_dealloc = zoneinfo_dealloc, +}; + +///// +// Specify the _zoneinfo module +static PyMethodDef module_methods[] = {{NULL, NULL}}; +static void +module_free() +{ + Py_XDECREF(_tzpath_find_tzfile); + _tzpath_find_tzfile = NULL; + + Py_XDECREF(_common_mod); + _common_mod = NULL; + + Py_XDECREF(io_open); + io_open = NULL; + + xdecref_ttinfo(&NO_TTINFO); + + Py_XDECREF(TIMEDELTA_CACHE); + if (!Py_REFCNT(TIMEDELTA_CACHE)) { + TIMEDELTA_CACHE = NULL; + } + + Py_XDECREF(ZONEINFO_WEAK_CACHE); + if (!Py_REFCNT(ZONEINFO_WEAK_CACHE)) { + ZONEINFO_WEAK_CACHE = NULL; + } + + strong_cache_free(ZONEINFO_STRONG_CACHE); + ZONEINFO_STRONG_CACHE = NULL; +} + +static int +zoneinfomodule_exec(PyObject *m) +{ + PyDateTime_IMPORT; + PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType; + if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) { + goto error; + } + + Py_INCREF(&PyZoneInfo_ZoneInfoType); + PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType); + + /* Populate imports */ + PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath"); + if (_tzpath_module == NULL) { + goto error; + } + + _tzpath_find_tzfile = + PyObject_GetAttrString(_tzpath_module, "find_tzfile"); + Py_DECREF(_tzpath_module); + if (_tzpath_find_tzfile == NULL) { + goto error; + } + + PyObject *io_module = PyImport_ImportModule("io"); + if (io_module == NULL) { + goto error; + } + + io_open = PyObject_GetAttrString(io_module, "open"); + Py_DECREF(io_module); + if (io_open == NULL) { + goto error; + } + + _common_mod = PyImport_ImportModule("zoneinfo._common"); + if (_common_mod == NULL) { + goto error; + } + + if (NO_TTINFO.utcoff == NULL) { + NO_TTINFO.utcoff = Py_None; + NO_TTINFO.dstoff = Py_None; + NO_TTINFO.tzname = Py_None; + + for (size_t i = 0; i < 3; ++i) { + Py_INCREF(Py_None); + } + } + + if (initialize_caches()) { + goto error; + } + + return 0; + +error: + return -1; +} + +static PyModuleDef_Slot zoneinfomodule_slots[] = { + {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}}; + +static struct PyModuleDef zoneinfomodule = { + PyModuleDef_HEAD_INIT, + .m_name = "_zoneinfo", + .m_doc = "C implementation of the zoneinfo module", + .m_size = 0, + .m_methods = module_methods, + .m_slots = zoneinfomodule_slots, + .m_free = (freefunc)module_free}; + +PyMODINIT_FUNC +PyInit__zoneinfo(void) +{ + return PyModuleDef_Init(&zoneinfomodule); +} diff --git a/PCbuild/_zoneinfo.vcxproj b/PCbuild/_zoneinfo.vcxproj new file mode 100644 index 00000000000000..6e6389c3773397 --- /dev/null +++ b/PCbuild/_zoneinfo.vcxproj @@ -0,0 +1,109 @@ + + + + + Debug + ARM + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + PGInstrument + ARM + + + PGInstrument + ARM64 + + + PGInstrument + Win32 + + + PGInstrument + x64 + + + PGUpdate + ARM + + + PGUpdate + ARM64 + + + PGUpdate + Win32 + + + PGUpdate + x64 + + + Release + ARM + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + {FCBE1EF2-E0F0-40B1-88B5-00A35D378742} + _zoneinfo + Win32Proj + + + + + DynamicLibrary + NotSet + + + + .pyd + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + + + + + + + + + + {cf7ac3d1-e2df-41d2-bea6-1e2556cdea26} + false + + + + + + diff --git a/PCbuild/_zoneinfo.vcxproj.filters b/PCbuild/_zoneinfo.vcxproj.filters new file mode 100644 index 00000000000000..e3c3ef4c72a7fc --- /dev/null +++ b/PCbuild/_zoneinfo.vcxproj.filters @@ -0,0 +1,16 @@ + + + + + + + + {2422278e-eeeb-4241-8182-433e2bc5a7fc} + + + + + Source Files + + + diff --git a/PCbuild/lib.pyproj b/PCbuild/lib.pyproj index ee01d109f162d7..7ce88e5690b454 100644 --- a/PCbuild/lib.pyproj +++ b/PCbuild/lib.pyproj @@ -1396,6 +1396,10 @@ + + + + @@ -1563,6 +1567,10 @@ + + + + diff --git a/PCbuild/pcbuild.proj b/PCbuild/pcbuild.proj index 9c4d352b434488..4d416c589e4c47 100644 --- a/PCbuild/pcbuild.proj +++ b/PCbuild/pcbuild.proj @@ -51,7 +51,7 @@ - + diff --git a/PCbuild/pcbuild.sln b/PCbuild/pcbuild.sln index 6d4c9506e5ec1a..61db4e02ad3837 100644 --- a/PCbuild/pcbuild.sln +++ b/PCbuild/pcbuild.sln @@ -91,6 +91,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_testconsole", "_testconsol EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_asyncio", "_asyncio.vcxproj", "{384C224A-7474-476E-A01B-750EA7DE918C}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_zoneinfo", "_zoneinfo.vcxproj", "{FCBE1EF2-E0F0-40B1-88B5-00A35D378742}" +EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_queue", "_queue.vcxproj", "{78D80A15-BD8C-44E2-B49E-1F05B0A0A687}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "liblzma", "liblzma.vcxproj", "{12728250-16EC-4DC6-94D7-E21DD88947F8}" diff --git a/PCbuild/readme.txt b/PCbuild/readme.txt index 5fe3e8c36ecf53..c44910e9bfcf04 100644 --- a/PCbuild/readme.txt +++ b/PCbuild/readme.txt @@ -132,6 +132,7 @@ library which are implemented in C; each one builds a DLL (renamed to _asyncio _ctypes _ctypes_test +_zoneinfo _decimal _elementtree _hashlib diff --git a/Tools/msi/lib/lib_files.wxs b/Tools/msi/lib/lib_files.wxs index 95541599b9bb29..037fc38f1d9cbf 100644 --- a/Tools/msi/lib/lib_files.wxs +++ b/Tools/msi/lib/lib_files.wxs @@ -1,6 +1,6 @@  - + diff --git a/configure b/configure index 26e9aa9fe454e2..5d290eca0c3039 100755 --- a/configure +++ b/configure @@ -658,6 +658,7 @@ LIBFFI_INCLUDEDIR PKG_CONFIG_LIBDIR PKG_CONFIG_PATH PKG_CONFIG +TZPATH SHLIBS CFLAGSFORSHARED LINKFORSHARED @@ -819,6 +820,7 @@ with_assertions enable_optimizations with_lto with_hash_algorithm +with_tzpath with_address_sanitizer with_memory_sanitizer with_undefined_behavior_sanitizer @@ -1524,6 +1526,9 @@ Optional Packages: --with-hash-algorithm=[fnv|siphash24] select hash algorithm for use in Python/pyhash.c (default is SipHash24) + --with-tzpath= + Select the default time zone search path for zoneinfo.TZPATH + --with-address-sanitizer enable AddressSanitizer memory error detector, 'asan' (default is no) @@ -10150,6 +10155,47 @@ $as_echo "default" >&6; } fi +validate_tzpath() { + # Checks that each element of hte path is an absolute path + if test -z "$1"; then + # Empty string is allowed: it indicates no system TZPATH + return 0 + fi + + # Bad paths are those that don't start with / + if ( echo $1 | grep -qE '(^|:)([^/]|$)' ); then + as_fn_error $? "--with-tzpath must contain only absolute paths, not $1" "$LINENO" 5 + return 1; + fi +} + +TZPATH="/usr/share/zoneinfo:/usr/lib/zoneinfo:/usr/share/lib/zoneinfo:/etc/zoneinfo" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-tzpath" >&5 +$as_echo_n "checking for --with-tzpath... " >&6; } + +# Check whether --with-tzpath was given. +if test "${with_tzpath+set}" = set; then : + withval=$with_tzpath; +case "$withval" in + yes) + as_fn_error $? "--with-tzpath requires a value" "$LINENO" 5 + ;; + *) + validate_tzpath "$withval" + TZPATH="$withval" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$withval\"" >&5 +$as_echo "\"$withval\"" >&6; } + ;; +esac + +else + validate_tzpath "$TZPATH" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$TZPATH\"" >&5 +$as_echo "\"$TZPATH\"" >&6; } +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-address-sanitizer" >&5 $as_echo_n "checking for --with-address-sanitizer... " >&6; } diff --git a/configure.ac b/configure.ac index acb6d4bfa8da10..0b5ca88c7f5556 100644 --- a/configure.ac +++ b/configure.ac @@ -2946,6 +2946,42 @@ esac ], [AC_MSG_RESULT(default)]) +validate_tzpath() { + # Checks that each element of hte path is an absolute path + if test -z "$1"; then + # Empty string is allowed: it indicates no system TZPATH + return 0 + fi + + # Bad paths are those that don't start with / + dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output + if ( echo $1 | grep -qE '(^|:)(@<:@^/@:>@|$)' ); then + AC_MSG_ERROR([--with-tzpath must contain only absolute paths, not $1]) + return 1; + fi +} + +TZPATH="/usr/share/zoneinfo:/usr/lib/zoneinfo:/usr/share/lib/zoneinfo:/etc/zoneinfo" +AC_MSG_CHECKING(for --with-tzpath) +AC_ARG_WITH(tzpath, + AS_HELP_STRING([--with-tzpath=] + [Select the default time zone search path for zoneinfo.TZPATH]), +[ +case "$withval" in + yes) + AC_MSG_ERROR([--with-tzpath requires a value]) + ;; + *) + validate_tzpath "$withval" + TZPATH="$withval" + AC_MSG_RESULT("$withval") + ;; +esac +], +[validate_tzpath "$TZPATH" + AC_MSG_RESULT("$TZPATH")]) +AC_SUBST(TZPATH) + AC_MSG_CHECKING(for --with-address-sanitizer) AC_ARG_WITH(address_sanitizer, AS_HELP_STRING([--with-address-sanitizer], diff --git a/setup.py b/setup.py index 878372154d411a..2a16ed053daee0 100644 --- a/setup.py +++ b/setup.py @@ -304,6 +304,17 @@ def find_library_file(compiler, libname, std_dirs, paths): else: assert False, "Internal error: Path not found in std_dirs or paths" +def validate_tzpath(): + base_tzpath = sysconfig.get_config_var('TZPATH') + if not base_tzpath: + return + + tzpaths = base_tzpath.split(os.pathsep) + bad_paths = [tzpath for tzpath in tzpaths if not os.path.isabs(tzpath)] + if bad_paths: + raise ValueError('TZPATH must contain only absolute paths, ' + + f'found:\n{tzpaths!r}\nwith invalid paths:\n' + + f'{bad_paths!r}') def find_module_file(module, dirlist): """Find a module in a set of possible folders. If it is not found @@ -807,6 +818,8 @@ def detect_simple_extensions(self): # uses modf(). self.add(Extension('_datetime', ['_datetimemodule.c'], libraries=['m'])) + # zoneinfo module + self.add(Extension('_zoneinfo', ['_zoneinfo.c'])), # random number generator implemented in C self.add(Extension("_random", ["_randommodule.c"], extra_compile_args=['-DPy_BUILD_CORE_MODULE'])) @@ -2449,6 +2462,7 @@ class DummyProcess: ProcessPoolExecutor = None sys.modules['concurrent.futures.process'] = DummyProcess + validate_tzpath() # turn off warnings when deprecated modules are imported import warnings