# Snowpark pandas Time series / date functionality
See https://pandas.pydata.org/docs/user_guide/timeseries.html as a reference.

In [1]:
from pathlib import Path
import sys
from snowflake.snowpark import Session
import modin.pandas as pd
import snowflake.snowpark.modin.plugin

connection_parameters_path = str(Path("__file__").absolute().parent.parent.parent.parent)
sys.path.append(connection_parameters_path)
from tests.parameters import CONNECTION_PARAMETERS

session = Session.builder.configs(CONNECTION_PARAMETERS).create()

In [2]:
import datetime
import numpy as np

##### Parsing time series information from various sources and formats

In [3]:
dti = pd.to_datetime(
    ["1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018, 1, 1)]
)
dti

DatetimeIndex(['2018-01-01', '2018-01-01', '2018-01-01'], dtype='datetime64[ns]', freq=None)

##### Generate sequences of fixed-frequency dates and time spans

In [4]:
dti = pd.date_range("2018-01-01", periods=3, freq="h")
dti

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00'],
              dtype='datetime64[ns]', freq=None)

##### Manipulating and converting date times with timezone information

In [5]:
# TODO SNOW-783178
dti = dti.tz_localize("UTC")

NotImplementedError: Snowpark pandas does not yet support the method DatetimeIndex.tz_localize

In [6]:
# TODO SNOW-1559264
dti.tz_convert("US/Pacific")

NotImplementedError: Snowpark pandas does not yet support the method DatetimeIndex.tz_convert

##### Resampling or converting a time series to a particular frequency

In [7]:
idx = pd.date_range("2018-01-01", periods=5, freq="h")
ts = pd.Series(range(len(idx)), index=idx)
ts

2018-01-01 00:00:00    0
2018-01-01 01:00:00    1
2018-01-01 02:00:00    2
2018-01-01 03:00:00    3
2018-01-01 04:00:00    4
Freq: None, dtype: int64

In [8]:
ts.resample("2h").mean()

2018-01-01 00:00:00    0.5
2018-01-01 02:00:00    2.5
2018-01-01 04:00:00    4.0
Freq: None, dtype: float64

##### Performing date and time arithmetic with absolute or relative time increments

In [9]:
friday = pd.Timestamp("2018-01-05")
friday.day_name()

'Friday'

In [10]:
# Add 1 day
saturday = friday + pd.Timedelta("1 day")
saturday.day_name()

'Saturday'

In [11]:
# Add 1 business day (Friday --> Monday)
monday = friday + pd.offsets.BDay()
monday.day_name()

'Monday'

In [12]:
rng = pd.date_range("2012-01-01", "2012-01-03")
ts = pd.Series(range(len(rng)), index=rng)
ts = ts[:5]
ts.shift(1)

2012-01-01    NaN
2012-01-02    0.0
2012-01-03    1.0
Freq: None, dtype: float64

# Overview

##### For time series data, it’s conventional to represent the time component in the index of a Series or DataFrame so manipulations can be performed with respect to the time element.

In [13]:
pd.Series(range(3), index=pd.date_range("2000", freq="D", periods=3))

2000-01-01    0
2000-01-02    1
2000-01-03    2
Freq: None, dtype: int64

##### However, Series and DataFrame can directly also support the time component as data itself.

In [14]:
pd.Series(pd.date_range("2000", freq="D", periods=3))

0   2000-01-01
1   2000-01-02
2   2000-01-03
dtype: datetime64[ns]

##### Series and DataFrame have extended data type support and functionality for datetime, timedelta and Period data when passed into those constructors. DateOffset data however will be stored as object data.

In [15]:
pd.Series(pd.period_range("1/1/2011", freq="M", periods=3))

NotImplementedError: pandas type period[M] is not implemented

In [16]:
pd.Series([pd.DateOffset(1), pd.DateOffset(2)])

TypeError: Object of type DateOffset is not JSON serializable

In [17]:
pd.Series(pd.date_range("1/1/2011", freq="ME", periods=3))

0   2011-01-31
1   2011-02-28
2   2011-03-31
dtype: datetime64[ns]

##### Lastly, pandas represents null date times, time deltas, and time spans as NaT which is useful for representing missing or null date like values and behaves similar as np.nan does for float data.

In [18]:
pd.Timestamp(pd.NaT)

NaT

In [19]:
pd.Timedelta(pd.NaT)

NaT

In [20]:
pd.Period(pd.NaT)

NaT

In [21]:
# Equality acts as np.nan would
pd.NaT == pd.NaT

False

# Timestamps vs. time spans

##### Timestamp and Period can serve as an index. Lists of Timestamp and Period are automatically coerced to DatetimeIndex and PeriodIndex respectively.

In [22]:
dates = [
    pd.Timestamp("2012-05-01"),
    pd.Timestamp("2012-05-02"),
    pd.Timestamp("2012-05-03"),
]


ts = pd.Series(np.random.randn(3), dates)

type(ts.index)

snowflake.snowpark.modin.plugin.extensions.datetime_index.DatetimeIndex

In [23]:
ts.index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [24]:
ts

2012-05-01    0.491895
2012-05-02   -1.479234
2012-05-03   -0.271105
Freq: None, dtype: float64

In [25]:
periods = [pd.Period("2012-01"), pd.Period("2012-02"), pd.Period("2012-03")]

ts = pd.Series(np.random.randn(3), periods)

type(ts.index)

NotImplementedError: pandas type period[M] is not implemented

# Converting to timestamps

In [26]:
pd.to_datetime(pd.Series(["Jul 31, 2009", "Jan 10, 2010", None]))

Snowflake automatic format detection is used when a format is not provided.In this case Snowflake's auto format may yield different result values compared to pandas.See https://docs.snowflake.com/en/sql-reference/date-time-input-output#supported-formats-for-auto-detection for details.


SnowparkSQLException: (1304): 01b6b451-0d07-1edc-0002-99038c96a20b: 100035 (22007): Timestamp 'Jul 31, 2009' is not recognized

In [27]:
pd.to_datetime(["2005/11/23", "2010/12/31"])

DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None)

##### If you use dates which start with the day first (i.e. European style), you can pass the dayfirst flag:

In [28]:
pd.to_datetime(["04-01-2012 10:00"], dayfirst=True)

DatetimeIndex(['2012-01-04 10:00:00'], dtype='datetime64[ns]', freq=None)

In [29]:
pd.to_datetime(["04-14-2012 10:00"], dayfirst=True)



DatetimeIndex(['2012-04-14 10:00:00'], dtype='datetime64[ns]', freq=None)

##### You can also use the DatetimeIndex constructor directly:

In [30]:
pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"])

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)

##### The string ‘infer’ can be passed in order to set the frequency of the index as the inferred frequency upon creation:

In [31]:
pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer")

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)

## Providing a format argument

In [32]:
pd.to_datetime("2010/11/12", format="%Y/%m/%d")

Timestamp('2010-11-12 00:00:00')

In [33]:
pd.to_datetime("12-11-2010 00:00", format="%d-%m-%Y %H:%M")

Timestamp('2010-11-12 00:00:00')

## Assembling datetime from multiple DataFrame columns

In [34]:
df = pd.DataFrame(
    {"year": [2015, 2016], "month": [2, 3], "day": [4, 5], "hour": [2, 3]}
)


pd.to_datetime(df)

0   2015-02-04 02:00:00
1   2016-03-05 03:00:00
dtype: datetime64[ns]

In [35]:
pd.to_datetime(df[["year", "month", "day"]])

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]

## Invalid data

##### The default behavior, errors='raise', is to raise when unparsable:

In [36]:
pd.to_datetime(['2009/07/31', 'asd'], errors='raise')

ValueError: time data "asd" doesn't match format "%Y/%m/%d", at position 1. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

##### Pass errors='coerce' to convert unparsable data to NaT (not a time):

In [37]:
pd.to_datetime(['2009/07/31', 'asd'], errors='coerce')

DatetimeIndex(['2009-07-31', 'NaT'], dtype='datetime64[ns]', freq=None)

## Epoch timestamps

In [38]:
pd.to_datetime(
    [1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit="s"
)


DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

In [39]:
pd.to_datetime(
    [1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit="ms"
)


DatetimeIndex(['1970-01-16 14:55:20.105000', '1970-01-16 14:56:46.505000',
               '1970-01-16 14:58:12.905000', '1970-01-16 14:59:39.305000',
               '1970-01-16 15:01:05.705000'],
              dtype='datetime64[ns]', freq=None)

##### Constructing a Timestamp or DatetimeIndex with an epoch timestamp with the tz argument specified will raise a ValueError. If you have epochs in wall time in another timezone, you can read the epochs as timezone-naive timestamps and then localize to the appropriate timezone:

In [40]:
pd.Timestamp(1262347200000000000).tz_localize("US/Pacific")

Timestamp('2010-01-01 12:00:00-0800', tz='US/Pacific')

In [41]:
pd.DatetimeIndex([1262347200000000000]).tz_localize("US/Pacific")

NotImplementedError: Snowpark pandas does not yet support the method DatetimeIndex.tz_localize

### From timestamps to epoch

In [42]:
stamps = pd.date_range("2012-10-08 18:15:05", periods=4, freq="D")
stamps

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05'],
              dtype='datetime64[ns]', freq=None)

#### We subtract the epoch (midnight at January 1, 1970 UTC) and then floor divide by the “unit” (1 second).

In [43]:
(stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")

Index([1349720105, 1349806505, 1349892905, 1349979305], dtype='int64')

## Using the origin parameter

In [44]:
pd.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01"))

DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None)

In [45]:
pd.to_datetime([1, 2, 3], unit="D")

DatetimeIndex(['1970-01-02', '1970-01-03', '1970-01-04'], dtype='datetime64[ns]', freq=None)

# Generating ranges of timestamps

In [46]:
dates = [
    datetime.datetime(2012, 5, 1),
    datetime.datetime(2012, 5, 2),
    datetime.datetime(2012, 5, 3),
]


# Note the frequency information
index = pd.DatetimeIndex(dates)

index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [47]:
# Automatically converted to DatetimeIndex
index = pd.Index(dates)

index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

##### In practice this becomes very cumbersome because we often need a very long index with a large number of timestamps. If we need timestamps on a regular frequency, we can use the date_range() and bdate_range() functions to create a DatetimeIndex. The default frequency for date_range is a calendar day while the default for bdate_range is a business day:

In [48]:
start = datetime.datetime(2011, 1, 1)

end = datetime.datetime(2012, 1, 1)

index = pd.date_range(start, end)

index

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
               '2011-01-09', '2011-01-10',
               ...
               '2011-12-23', '2011-12-24', '2011-12-25', '2011-12-26',
               '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30',
               '2011-12-31', '2012-01-01'],
              dtype='datetime64[ns]', length=366, freq=None)

In [49]:
index = pd.bdate_range(start, end)

index

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14',
               ...
               '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',
               '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',
               '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', length=260, freq=None)

##### Convenience functions like date_range and bdate_range can utilize a variety of frequency aliases:

In [50]:
pd.date_range(start, periods=1000, freq="ME")

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31',
               ...
               '2093-07-31', '2093-08-31', '2093-09-30', '2093-10-31',
               '2093-11-30', '2093-12-31', '2094-01-31', '2094-02-28',
               '2094-03-31', '2094-04-30'],
              dtype='datetime64[ns]', length=1000, freq=None)

In [51]:
pd.bdate_range(start, periods=250, freq="BQS")

DatetimeIndex(['2011-01-03', '2011-04-01', '2011-07-01', '2011-10-03',
               '2012-01-02', '2012-04-02', '2012-07-02', '2012-10-01',
               '2013-01-01', '2013-04-01',
               ...
               '2071-01-01', '2071-04-01', '2071-07-01', '2071-10-01',
               '2072-01-01', '2072-04-01', '2072-07-01', '2072-10-03',
               '2073-01-02', '2073-04-03'],
              dtype='datetime64[ns]', length=250, freq=None)

##### date_range and bdate_range make it easy to generate a range of dates using various combinations of parameters like start, end, periods, and freq. The start and end dates are strictly inclusive, so dates outside of those specified will not be generated:

In [52]:
pd.date_range(start, end, freq="BME")

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq=None)

In [53]:
pd.date_range(start, end, freq="W")

DatetimeIndex(['2011-01-02', '2011-01-09', '2011-01-16', '2011-01-23',
               '2011-01-30', '2011-02-06', '2011-02-13', '2011-02-20',
               '2011-02-27', '2011-03-06', '2011-03-13', '2011-03-20',
               '2011-03-27', '2011-04-03', '2011-04-10', '2011-04-17',
               '2011-04-24', '2011-05-01', '2011-05-08', '2011-05-15',
               '2011-05-22', '2011-05-29', '2011-06-05', '2011-06-12',
               '2011-06-19', '2011-06-26', '2011-07-03', '2011-07-10',
               '2011-07-17', '2011-07-24', '2011-07-31', '2011-08-07',
               '2011-08-14', '2011-08-21', '2011-08-28', '2011-09-04',
               '2011-09-11', '2011-09-18', '2011-09-25', '2011-10-02',
               '2011-10-09', '2011-10-16', '2011-10-23', '2011-10-30',
               '2011-11-06', '2011-11-13', '2011-11-20', '2011-11-27',
               '2011-12-04', '2011-12-11', '2011-12-18', '2011-12-25',
               '2012-01-01'],
              dtype='datetime64[ns]', freq=None

In [54]:
pd.bdate_range(end=end, periods=20)

DatetimeIndex(['2011-12-13', '2011-12-14', '2011-12-15', '2011-12-16',
               '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',
               '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',
               '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', freq=None)

In [55]:
pd.bdate_range(start=start, periods=20)

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14', '2011-01-17', '2011-01-18',
               '2011-01-19', '2011-01-20'],
              dtype='datetime64[ns]', freq=None)

##### Specifying start, end, and periods will generate a range of evenly spaced dates from start to end inclusively, with periods number of elements in the resulting DatetimeIndex:



In [56]:
pd.date_range("2018-01-01", "2018-01-05", periods=5)

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05'],
              dtype='datetime64[ns]', freq=None)

In [57]:
pd.date_range("2018-01-01", "2018-01-05", periods=10)

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 10:40:00',
               '2018-01-01 21:20:00', '2018-01-02 08:00:00',
               '2018-01-02 18:40:00', '2018-01-03 05:20:00',
               '2018-01-03 16:00:00', '2018-01-04 02:40:00',
               '2018-01-04 13:20:00', '2018-01-05 00:00:00'],
              dtype='datetime64[ns]', freq=None)

## Custom frequency ranges

##### bdate_range can also generate a range of custom frequency dates by using the weekmask and holidays parameters. These parameters will only be used if a custom frequency string is passed.



In [58]:
weekmask = "Mon Wed Fri"

holidays = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)]

pd.bdate_range(start, end, freq="C", weekmask=weekmask, holidays=holidays)

NotImplementedError: custom frequency is not supported in Snowpark pandas API

# Indexing

In [59]:
rng = pd.date_range(start, end, freq="BME")

ts = pd.Series(np.random.randn(len(rng)), index=rng)

ts.index

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq=None)

In [60]:
rng = pd.date_range(start, end, freq="ME")

ts = pd.Series(np.random.randn(len(rng)), index=rng)

ts.index

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-31'],
              dtype='datetime64[ns]', freq=None)

In [61]:
ts[:5].index

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31'],
              dtype='datetime64[ns]', freq=None)

In [62]:
ts[::2].index

DatetimeIndex(['2011-01-31', '2011-03-31', '2011-05-31', '2011-07-31',
               '2011-09-30', '2011-11-30'],
              dtype='datetime64[ns]', freq=None)

##### To provide convenience for accessing longer time series, you can also pass in the year or year and month as strings:

In [63]:
ts["2011"]

2011-01-31    0.356705
2011-02-28   -0.334667
2011-03-31   -0.918495
2011-04-30    0.661803
2011-05-31   -0.639713
2011-06-30    0.413448
2011-07-31   -0.679730
2011-08-31   -0.455768
2011-09-30   -1.908129
2011-10-31   -0.316739
2011-11-30   -1.619617
2011-12-31   -2.583194
Freq: None, dtype: float64

In [64]:
ts["2011-6"]

2011-06-30    0.413448
Freq: None, dtype: float64

##### This type of slicing will work on a DataFrame with a DatetimeIndex as well. Since the partial string selection is a form of label slicing, the endpoints will be included. This would include matching times on an included date:

In [65]:
np.random.seed(0)

dft = pd.DataFrame(
    np.random.randn(100000, 1),
    columns=["A"],
    index=pd.date_range("20130101", periods=100000, freq="min"),
)


dft

Unnamed: 0,A
2013-01-01 00:00:00,1.764052
2013-01-01 00:01:00,0.400157
2013-01-01 00:02:00,0.978738
2013-01-01 00:03:00,2.240893
2013-01-01 00:04:00,1.867558
...,...
2013-03-11 10:35:00,-0.337715
2013-03-11 10:36:00,-2.028548
2013-03-11 10:37:00,0.726182
2013-03-11 10:38:00,-1.167831


In [66]:
dft.loc["2013"]

Unnamed: 0,A
2013-01-01 00:00:00,1.764052
2013-01-01 00:01:00,0.400157
2013-01-01 00:02:00,0.978738
2013-01-01 00:03:00,2.240893
2013-01-01 00:04:00,1.867558
...,...
2013-03-11 10:35:00,-0.337715
2013-03-11 10:36:00,-2.028548
2013-03-11 10:37:00,0.726182
2013-03-11 10:38:00,-1.167831


##### This starts on the very first time in the month, and includes the last date and time for the month:

In [67]:
dft["2013-1":"2013-2"]

Unnamed: 0,A
2013-01-01 00:00:00,1.764052
2013-01-01 00:01:00,0.400157
2013-01-01 00:02:00,0.978738
2013-01-01 00:03:00,2.240893
2013-01-01 00:04:00,1.867558
...,...
2013-02-28 23:55:00,-3.284701
2013-02-28 23:56:00,0.475275
2013-02-28 23:57:00,0.501877
2013-02-28 23:58:00,0.222138


##### This specifies a stop time that includes all of the times on the last day:

In [68]:
dft["2013-1":"2013-2-28"]

Unnamed: 0,A
2013-01-01 00:00:00,1.764052
2013-01-01 00:01:00,0.400157
2013-01-01 00:02:00,0.978738
2013-01-01 00:03:00,2.240893
2013-01-01 00:04:00,1.867558
...,...
2013-02-28 23:55:00,-3.284701
2013-02-28 23:56:00,0.475275
2013-02-28 23:57:00,0.501877
2013-02-28 23:58:00,0.222138


##### This specifies an exact stop time (and is not the same as the above):

In [69]:
dft["2013-1":"2013-2-28 00:00:00"]

Unnamed: 0,A
2013-01-01 00:00:00,1.764052
2013-01-01 00:01:00,0.400157
2013-01-01 00:02:00,0.978738
2013-01-01 00:03:00,2.240893
2013-01-01 00:04:00,1.867558
...,...
2013-02-27 23:56:00,-0.036098
2013-02-27 23:57:00,-1.679458
2013-02-27 23:58:00,0.443969
2013-02-27 23:59:00,1.390478


##### We are stopping on the included end-point as it is part of the index:

In [70]:
dft["2013-1-15":"2013-1-15 12:30:00"]

Unnamed: 0,A
2013-01-15 00:00:00,-1.195459
2013-01-15 00:01:00,1.543360
2013-01-15 00:02:00,0.237914
2013-01-15 00:03:00,0.767214
2013-01-15 00:04:00,-2.109814
...,...
2013-01-15 12:26:00,0.817564
2013-01-15 12:27:00,-0.649760
2013-01-15 12:28:00,1.245159
2013-01-15 12:29:00,0.300473


##### DatetimeIndex partial string indexing also works on a DataFrame with a MultiIndex:

In [71]:
dft2 = pd.DataFrame(
    np.random.randn(20, 1),
    columns=["A"],
    index=pd.MultiIndex.from_product(
        [pd.date_range("20130101", periods=10, freq="12h"), ["a", "b"]]
    ),
)


dft2

Unnamed: 0,Unnamed: 1,A
2013-01-01 00:00:00,a,-0.483797
2013-01-01 00:00:00,b,1.288057
2013-01-01 12:00:00,a,-0.129879
2013-01-01 12:00:00,b,-0.198078
2013-01-02 00:00:00,a,-0.334488
2013-01-02 00:00:00,b,-0.391443
2013-01-02 12:00:00,a,-0.612406
2013-01-02 12:00:00,b,-0.676524
2013-01-03 00:00:00,a,1.32723
2013-01-03 00:00:00,b,-0.448695


In [72]:
dft2.loc["2013-01-05"]

Unnamed: 0,Unnamed: 1,A
2013-01-05 00:00:00,a,1.122017
2013-01-05 00:00:00,b,0.112339
2013-01-05 12:00:00,a,1.37234
2013-01-05 12:00:00,b,2.062562


In [73]:
idx = pd.IndexSlice

dft2 = dft2.swaplevel(0, 1).sort_index()

dft2.loc[idx[:, "2013-01-05"], :]

NotImplementedError: Snowpark pandas does not yet support the method DataFrame.swaplevel

##### Slicing with string indexing also honors UTC offset.

## Slice vs. exact match

##### The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of the index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match.

Consider a Series object with a minute resolution index:

In [74]:
series_minute = pd.Series(
    [1, 2, 3],
    pd.DatetimeIndex(
        ["2011-12-31 23:59:00", "2012-01-01 00:00:00", "2012-01-01 00:02:00"]
    ),
)


series_minute.index.resolution

NotImplementedError: Index.resolution is not yet implemented

##### A timestamp string less accurate than a minute gives a Series object.

In [75]:
series_minute["2011-12-31 23"] # we return series instead

2011-12-31 23:59:00    1
Freq: None, dtype: int64

## Exact indexing

In [76]:
dft[datetime.datetime(2013, 1, 1): datetime.datetime(2013, 2, 28)]

Unnamed: 0,A
2013-01-01 00:00:00,1.764052
2013-01-01 00:01:00,0.400157
2013-01-01 00:02:00,0.978738
2013-01-01 00:03:00,2.240893
2013-01-01 00:04:00,1.867558
...,...
2013-02-27 23:56:00,-0.036098
2013-02-27 23:57:00,-1.679458
2013-02-27 23:58:00,0.443969
2013-02-27 23:59:00,1.390478


In [77]:
dft[
    datetime.datetime(2013, 1, 1, 10, 12, 0): datetime.datetime(
        2013, 2, 28, 10, 12, 0
    )
]

Unnamed: 0,A
2013-01-01 10:12:00,0.605120
2013-01-01 10:13:00,0.895556
2013-01-01 10:14:00,-0.131909
2013-01-01 10:15:00,0.404762
2013-01-01 10:16:00,0.223844
...,...
2013-02-28 10:08:00,0.746108
2013-02-28 10:09:00,1.754498
2013-02-28 10:10:00,-0.622373
2013-02-28 10:11:00,-0.449793


## Truncating & fancy indexing

In [78]:
rng2 = pd.date_range("2011-01-01", "2012-01-01", freq="W")

ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)

ts2.truncate(before="2011-11", after="2011-12")

NotImplementedError: Snowpark pandas does not yet support the method Series.truncate

In [79]:
ts2["2011-11":"2011-12"]

2011-11-06   -0.433416
2011-11-13    0.773872
2011-11-20   -0.834212
2011-11-27   -0.728240
2011-12-04    0.674975
2011-12-11   -0.477772
2011-12-18    1.492301
2011-12-25   -0.658391
Freq: None, dtype: float64

# Time/date components

In [80]:
idx = pd.date_range(start="2019-12-29", freq="D", periods=4)

idx.isocalendar()

NotImplementedError: Index.isocalendar is not yet implemented

In [81]:
idx.to_series().dt.isocalendar()

Unnamed: 0,year,week,day
2019-12-29,2019,52,7
2019-12-30,2020,1,1
2019-12-31,2020,1,2
2020-01-01,2020,1,3


# DateOffset objects

In [82]:
ts = pd.Timestamp("2016-10-30 00:00:00", tz="Europe/Helsinki")

ts + pd.Timedelta(days=1)

Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')

In [83]:
ts + pd.DateOffset(days=1)

Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki')

# Time Series-related instance methods

In [84]:
ts = pd.Series(range(len(rng)), index=rng)

ts = ts[:5]

ts.shift(1)

2011-01-31    NaN
2011-02-28    0.0
2011-03-31    1.0
2011-04-30    2.0
2011-05-31    3.0
Freq: None, dtype: float64

In [85]:
ts.shift(5, freq="D")

NotImplementedError: shifting index values not yet supported.

In [86]:
ts.shift(5, freq=pd.offsets.BDay())

NotImplementedError: shifting index values not yet supported.

In [87]:
ts.shift(5, freq="BME")

NotImplementedError: shifting index values not yet supported.

## Frequency conversion

In [88]:
dr = pd.date_range("1/1/2010", periods=3, freq=3 * pd.offsets.BDay())

ts = pd.Series(np.random.randn(3), index=dr)

ts.asfreq(pd.offsets.BDay())

ValueError: Length of values (3) does not match length of index (1)

# Converting between representations

In [89]:
rng = pd.date_range("1/1/2012", periods=5, freq="ME")

ts = pd.Series(np.random.randn(len(rng)), index=rng)

ts

2012-01-31   -1.384413
2012-02-29    2.278423
2012-03-31    1.384368
2012-04-30   -0.940811
2012-05-31    0.021496
Freq: None, dtype: float64

In [90]:
ps = ts.to_period()

ps

NotImplementedError: Snowpark pandas does not yet support the method Series.to_period

In [91]:
ps.to_timestamp()

NameError: name 'ps' is not defined

### Timestamp Binary Operations

In [92]:
pd.to_datetime('2018-10-26 12:00:00') - pd.to_datetime('2017-09-25 09:00:00')

Timedelta('396 days 03:00:00')

In [93]:
pd.Timestamp("2014-08-01 10:00") - pd.Timestamp("2014-07-26 03:00")

Timedelta('6 days 07:00:00')

In [94]:
pd.Timestamp(year=2017, month=1, day=1, hour=12) - pd.Timestamp(year=2015, month=2, day=19, hour=9)

Timedelta('682 days 03:00:00')

In [95]:
(pd.to_datetime("2018-8-26 15:09:02") - pd.to_datetime('2018-09-26 12:00:00'))

Timedelta('-31 days +03:09:02')