Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF: optimize NaT lookups in cython modules #24008

Merged
merged 9 commits into from Dec 2, 2018
4 changes: 2 additions & 2 deletions pandas/_libs/tslib.pyx
Expand Up @@ -40,8 +40,8 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject,
tz_convert_utc_to_tzlocal)

# many modules still look for NaT and iNaT here despite them not being needed
from tslibs.nattype import nat_strings, NaT, iNaT # noqa:F821
from tslibs.nattype cimport checknull_with_nat, NPY_NAT
from tslibs.nattype import nat_strings, iNaT # noqa:F821
from tslibs.nattype cimport checknull_with_nat, NPY_NAT, NAT as NaT

from tslibs.offsets cimport to_offset

Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/conversion.pyx
Expand Up @@ -39,8 +39,8 @@ from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
from timezones import UTC
from parsing import parse_datetime_string

from nattype import nat_strings, NaT
from nattype cimport NPY_NAT, checknull_with_nat
from nattype import nat_strings
from nattype cimport NPY_NAT, checknull_with_nat, NAT as NaT

# ----------------------------------------------------------------------
# Constants
Expand Down
11 changes: 11 additions & 0 deletions pandas/_libs/tslibs/nattype.pxd
@@ -1,9 +1,20 @@
# -*- coding: utf-8 -*-

from cpython.datetime cimport datetime

from numpy cimport int64_t
cdef int64_t NPY_NAT

cdef bint _nat_scalar_rules[6]


cdef class _NaT(datetime):
cdef readonly:
int64_t value
object freq

cdef _NaT NAT


cdef bint checknull_with_nat(object val)
cdef bint is_null_datetimelike(object val)
75 changes: 38 additions & 37 deletions pandas/_libs/tslibs/nattype.pyx
Expand Up @@ -47,7 +47,7 @@ def _make_nan_func(func_name, doc):

def _make_nat_func(func_name, doc):
def f(*args, **kwargs):
return NaT
return NAT
f.__name__ = func_name
f.__doc__ = doc
return f
Expand All @@ -67,10 +67,10 @@ def _make_error_func(func_name, cls):


cdef _nat_divide_op(self, other):
if PyDelta_Check(other) or is_timedelta64_object(other) or other is NaT:
if PyDelta_Check(other) or is_timedelta64_object(other) or other is NAT:
return np.nan
if is_integer_object(other) or is_float_object(other):
return NaT
return NAT
return NotImplemented


Expand All @@ -82,15 +82,15 @@ cdef _nat_rdivide_op(self, other):

def __nat_unpickle(*args):
# return constant defined in the module
return NaT
return NAT

# ----------------------------------------------------------------------


cdef class _NaT(datetime):
cdef readonly:
int64_t value
object freq
# cdef readonly:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you remove

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or do we do this elsewhere to remind of the attributes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is the pattern we used for _TSObject

# int64_t value
# object freq

def __hash__(_NaT self):
# py3k needs this defined here
Expand All @@ -116,18 +116,18 @@ cdef class _NaT(datetime):

def __add__(self, other):
if PyDateTime_Check(other):
return NaT
return NAT

elif hasattr(other, 'delta'):
# Timedelta, offsets.Tick, offsets.Week
return NaT
return NAT
elif getattr(other, '_typ', None) in ['dateoffset', 'series',
'period', 'datetimeindex',
'timedeltaindex']:
# Duplicate logic in _Timestamp.__add__ to avoid needing
# to subclass; allows us to @final(_Timestamp.__add__)
return NotImplemented
return NaT
return NAT

def __sub__(self, other):
# Duplicate some logic from _Timestamp.__sub__ to avoid needing
Expand Down Expand Up @@ -184,19 +184,6 @@ cdef class _NaT(datetime):
""" Returns a numpy.datetime64 object with 'ns' precision """
return np.datetime64('NaT', 'ns')


class NaTType(_NaT):
"""(N)ot-(A)-(T)ime, the time equivalent of NaN"""

def __new__(cls):
cdef _NaT base

base = _NaT.__new__(cls, 1, 1, 1)
base.value = NPY_NAT
base.freq = None

return base

def __repr__(self):
return 'NaT'

Expand All @@ -216,20 +203,11 @@ class NaTType(_NaT):
def __long__(self):
return NPY_NAT

def __reduce_ex__(self, protocol):
# python 3.6 compat
# http://bugs.python.org/issue28730
# now __reduce_ex__ is defined and higher priority than __reduce__
return self.__reduce__()

def __reduce__(self):
return (__nat_unpickle, (None, ))

def total_seconds(self):
"""
Total duration of timedelta in seconds (to ns precision)
"""
# GH 10939
# GH#10939
return np.nan

@property
Expand Down Expand Up @@ -260,6 +238,28 @@ class NaTType(_NaT):
def is_year_end(self):
return False


class NaTType(_NaT):
"""(N)ot-(A)-(T)ime, the time equivalent of NaN"""

def __new__(cls):
cdef _NaT base

base = _NaT.__new__(cls, 1, 1, 1)
base.value = NPY_NAT
base.freq = None

return base

def __reduce_ex__(self, protocol):
# python 3.6 compat
# http://bugs.python.org/issue28730
# now __reduce_ex__ is defined and higher priority than __reduce__
return self.__reduce__()

def __reduce__(self):
return (__nat_unpickle, (None, ))

def __rdiv__(self, other):
return _nat_rdivide_op(self, other)

Expand All @@ -271,7 +271,7 @@ class NaTType(_NaT):

def __rmul__(self, other):
if is_integer_object(other) or is_float_object(other):
return NaT
return NAT
return NotImplemented

# ----------------------------------------------------------------------
Expand Down Expand Up @@ -659,14 +659,15 @@ class NaTType(_NaT):
""")


NaT = NaTType()
NAT = NaTType() # C-visible
NaT = NAT # Python-visible


# ----------------------------------------------------------------------

cdef inline bint checknull_with_nat(object val):
""" utility to check if a value is a nat or not """
return val is None or util.is_nan(val) or val is NaT
return val is None or util.is_nan(val) or val is NAT


cdef inline bint is_null_datetimelike(object val):
Expand All @@ -683,7 +684,7 @@ cdef inline bint is_null_datetimelike(object val):
"""
if val is None or util.is_nan(val):
return True
elif val is NaT:
elif val is NAT:
return True
elif util.is_timedelta64_object(val):
return val.view('int64') == NPY_NAT
Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/tslibs/timestamps.pyx
Expand Up @@ -26,8 +26,7 @@ from conversion import tz_localize_to_utc, normalize_i8_timestamps
from conversion cimport (tz_convert_single, _TSObject,
convert_to_tsobject, convert_datetime_to_tsobject)
from fields import get_start_end_field, get_date_name_field
from nattype import NaT
from nattype cimport NPY_NAT
from nattype cimport NPY_NAT, NAT as NaT
from np_datetime import OutOfBoundsDatetime
from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds,
npy_datetimestruct, dt64_to_dtstruct)
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/timezones.pyx
Expand Up @@ -2,6 +2,8 @@

from cython import Py_ssize_t

from cpython.datetime cimport tzinfo

# dateutil compat
from dateutil.tz import (
tzutc as _dateutil_tzutc,
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/arrays/datetimes.py
Expand Up @@ -6,10 +6,9 @@
from pytz import utc

from pandas._libs import lib, tslib
from pandas._libs.tslib import NaT, Timestamp, iNaT
from pandas._libs.tslibs import (
ccalendar, conversion, fields, normalize_date, resolution as libresolution,
timezones)
NaT, Timestamp, ccalendar, conversion, fields, iNaT, normalize_date,
resolution as libresolution, timezones)
import pandas.compat as compat
from pandas.errors import PerformanceWarning
from pandas.util._decorators import Appender, cache_readonly
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/arrays/period.py
Expand Up @@ -4,8 +4,7 @@

import numpy as np

from pandas._libs.tslib import NaT, iNaT
from pandas._libs.tslibs import period as libperiod
from pandas._libs.tslibs import NaT, iNaT, period as libperiod
from pandas._libs.tslibs.fields import isleapyear_arr
from pandas._libs.tslibs.period import (
DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period, get_period_field_arr,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/scalar/test_nat.py
Expand Up @@ -4,7 +4,7 @@
import pytest
import pytz

from pandas._libs.tslib import iNaT
from pandas._libs.tslibs import iNaT

from pandas import (
DatetimeIndex, Index, NaT, Period, Series, Timedelta, TimedeltaIndex,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/scalar/timedelta/test_timedelta.py
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import pytest

from pandas._libs.tslib import NaT, iNaT
from pandas._libs.tslibs import NaT, iNaT
import pandas.compat as compat

import pandas as pd
Expand Down
8 changes: 3 additions & 5 deletions pandas/tests/tseries/offsets/test_offsets.py
Expand Up @@ -5,13 +5,11 @@
import pytest
import pytz

import pandas._libs.tslib as tslib
from pandas._libs.tslib import NaT, Timestamp
from pandas._libs.tslibs import conversion, timezones
from pandas._libs.tslibs import (
NaT, OutOfBoundsDatetime, Timedelta, Timestamp, conversion, timezones)
from pandas._libs.tslibs.frequencies import (
INVALID_FREQ_ERR_MSG, get_freq_code, get_freq_str)
import pandas._libs.tslibs.offsets as liboffsets
from pandas._libs.tslibs.timedeltas import Timedelta
import pandas.compat as compat
from pandas.compat import range
from pandas.compat.numpy import np_datetime64_compat
Expand Down Expand Up @@ -124,7 +122,7 @@ def test_apply_out_of_range(self, tz_naive_fixture):
assert isinstance(result, datetime)
assert t.tzinfo == result.tzinfo

except tslib.OutOfBoundsDatetime:
except OutOfBoundsDatetime:
raise
except (ValueError, KeyError):
# we are creating an invalid offset
Expand Down