# Snowpark pandas Time series / date functionality

In [1]:
from pathlib import Path
import sys
from snowflake.snowpark import Session
import modin.pandas as pd
import snowflake.snowpark.modin.plugin

connection_parameters_path = str(Path("__file__").absolute().parent.parent.parent.parent)
sys.path.append(connection_parameters_path)
from tests.parameters import CONNECTION_PARAMETERS

session = Session.builder.configs(CONNECTION_PARAMETERS).create()

In [2]:
import datetime
import numpy as np

### Parsing time series information from various sources and formats

In [3]:
dti = pd.to_datetime(
    ["1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018, 1, 1)]
)
dti

`to_datetime` implementation has mismatches with pandas:
Snowpark pandas to_datetime uses Snowflake's automatic format detection to convert string to datetime when a format is not provided. In this case Snowflake's auto format may yield different result values compared to pandas..


DatetimeIndex(['2018-01-01', '2018-01-01', '2018-01-01'], dtype='datetime64[ns]', freq=None)

### Generate sequences of fixed-frequency dates and time spans

In [4]:
dti = pd.date_range("2018-01-01", periods=3, freq="h")
dti

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00'],
              dtype='datetime64[ns]', freq=None)

### Manipulating and converting date times with timezone information

In [5]:
# TODO SNOW-1635620: uncomment when TimeDelta is implemented
#dti = dti.tz_localize("UTC")

In [6]:
# TODO SNOW-1635620: uncomment when TimeDelta is implemented
#dti.tz_convert("US/Pacific")

### Resampling or converting a time series to a particular frequency

In [7]:
idx = pd.date_range("2018-01-01", periods=5, freq="h")
ts = pd.Series(range(len(idx)), index=idx)
ts

2018-01-01 00:00:00    0
2018-01-01 01:00:00    1
2018-01-01 02:00:00    2
2018-01-01 03:00:00    3
2018-01-01 04:00:00    4
Freq: None, dtype: int64

In [8]:
ts.resample("2h").mean()

2018-01-01 00:00:00    0.5
2018-01-01 02:00:00    2.5
2018-01-01 04:00:00    4.0
Freq: None, dtype: float64

### Performing date and time arithmetic with absolute or relative time increments

In [9]:
friday = pd.Timestamp("2018-01-05")
friday.day_name()

'Friday'

In [10]:
saturday = friday + pd.Timedelta("1 day")
saturday.day_name()

'Saturday'

In [11]:
monday = friday + pd.offsets.BDay()
monday.day_name()

'Monday'

In [12]:
rng = pd.date_range("2012-01-01", "2012-01-03")
ts = pd.Series(range(len(rng)), index=rng)
ts = ts[:5]
ts.shift(1)

2012-01-01    NaN
2012-01-02    0.0
2012-01-03    1.0
Freq: None, dtype: float64

### Time Series-related instance methods

### From timestamps to epoch

In [13]:
stamps = pd.date_range("2012-10-08 18:15:05", periods=4, freq="D")
stamps

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05'],
              dtype='datetime64[ns]', freq=None)

In [14]:
# TODO SNOW-1635620: uncomment when TimeDelta is implemented
# (stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")

### DateOffset objects

In [15]:
ts = pd.Timestamp("2016-10-30 00:00:00", tz="Europe/Helsinki")

ts + pd.Timedelta(days=1)

Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')

In [16]:
ts + pd.DateOffset(days=1)

Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki')

### Timestamp Binary Operations

In [17]:
pd.to_datetime('2018-10-26 12:00:00') - pd.to_datetime('2017-09-25 09:00:00')

Timedelta('396 days 03:00:00')

In [18]:
pd.Timestamp("2014-08-01 10:00") - pd.Timestamp("2014-07-26 03:00")

Timedelta('6 days 07:00:00')

In [19]:
pd.Timestamp(year=2017, month=1, day=1, hour=12) - pd.Timestamp(year=2015, month=2, day=19, hour=9)

Timedelta('682 days 03:00:00')

In [20]:
(pd.to_datetime("2018-8-26 15:09:02") - pd.to_datetime('2018-09-26 12:00:00'))

Timedelta('-31 days +03:09:02')