# Pandas Time Series

In [None]:
from pandas import DataFrame
from pandas import Series
from numpy.random import randint
from numpy.random import randn
import numpy as np
import pandas as pd
import copy

## Background

I find Pandas handling of time much easier to work with than time in standard Python 3.x.

Pandas captures four general time-related concepts:

1. Date times: A specific date and time with timezone support. Similar to datetime.datetime from the standard library.
2. Time deltas: An absolute time duration. Similar to datetime.timedelta from the standard library.
3. Time spans: A span of time defined by a point in time and its associated frequency.
4. Date offsets: A relative time duration that respects calendar arithmetic. Similar to dateutil.relativedelta.relativedelta from the dateutil package.

![concepts](pandas_time_concepts.png) 

## Create `Series` with `D`ay Frequency

In [None]:
time_index = pd.Series(pd.period_range('2019-3', freq = 'D', periods=10))
print(time_index)
print(type(time_index))
print(type(time_index[0]))

## Create `DataFrame` Using `time_index`

In [None]:
df_time1 = DataFrame(randint(1, 11, (10,4)), index=time_index, columns=list('ABCD'))
print(df_time1)
df_time_hold = copy.deepcopy(df_time1)

### Metadata About `DataFrame` 

In [None]:
print(type(df_time1))
print(type(df_time1.iloc[0,0]))
print(type(df_time1.index))
print(type(df_time1.index.values[0]))

## Converting Periodic Index to String Values 
- Needed for plot with Bokeh
- `date_str` is of type period

In [None]:
# Create new column with index values

#df_time1['date_str'] = df_time1.index.values
df_time1['date_str'] = df_time1.index
print(df_time1)
print(type(df_time1['date_str']))
print(type(df_time1.iloc[0, 4]))

In [None]:
# Convert column from Period type to timestamp type

df_time1['date_str'] = df_time1['date_str'].apply(lambda x : x.to_timestamp())

In [None]:
df_time1['date_str']

In [None]:
# The next statement does not work because the row is now a series and 
# not a <class 'pandas.core.indexes.period.PeriodIndex'>
#df_time1['date_str'] = df_time1['date_str'].to_timestamp()
print(df_time1)
print(type(df_time1['date_str']))
print(type(df_time1.iloc[0, 4]))

In [None]:
# Convert from timestamp to string

df_time1['date_str'] = df_time1['date_str'].dt.strftime('%Y-%m-%d')
print(df_time1)
print(type(df_time1['date_str']))
print(type(df_time1.iloc[0, 4]))

### In One Statement

In [None]:
df_time1.drop('date_str', axis=1, inplace=True)

# this works as df_time1.index is a <class 'pandas.core.indexes.period.PeriodIndex'> 
# so .to_timestamp() can be applied. 

# This is still a DatetimeIdex and .dt.strftime('%Y-%m-%d') is 
# not implemented - it works on Series. And the DatetimeIndex is not converted 
# to a Series until the assignment happens. 

# The values of the DatetimeIndex can be turned into strings by using the .astype('str') 
# function which is a little odd but it works.

df_time1['date_str'] = df_time1.index.to_timestamp().astype('str')
print(df_time1)
print(type(df_time1['date_str']))
print(type(df_time1.iloc[0,4]))

### Writing to CSV File

In [None]:
df_time1.drop('date_str', axis = 1, inplace = True)
print(df_time1)

In [None]:
# Write out csv file

df_time1.to_csv('./time_index.csv')

In [None]:
!cat time_index.csv

## From CSV String to Index with Period
- Data in CSV stored as string 

In [None]:
# Reading from csv file

df_str = pd.read_csv('./time_index.csv', index_col = None, header = None)
print(df_str)

- **NOTICE:** The real index is column 0 

In [None]:
# Getting the basic DataFrame

df_time2 = DataFrame(df_str.iloc[1:, 1:].astype(np.int64))
print(df_time2)

- The `.astype(np.int64)` is just good programming practice

In [None]:
# Setting up to change the index to datetime object
# Taking the name out makes it easier to see what was happening

date_time = pd.to_datetime(df_str.iloc[1:, 0])
date_time.name = None
print(date_time)

In [None]:
# Change the index to datetime64 type

df_time2.set_index(date_time, inplace = True)
print(df_time2)
print(df_time2.index)

In [None]:
# Just to show me what the column labels are

df_str.iloc[0, 1:]

In [None]:
# Column labels did not work correctly from a Series; thus the value

df_time2.columns = df_str.iloc[0, 1:].values
print(df_time2)
print(df_time2.index)

In [None]:
# This converts the datetime64 to a type of period[D] 

df_time2.set_index(df_time2.index.to_period('D'), inplace = True)
print(df_time2)
print(df_time2.index)

## Dates Through Pandas

### Single Time State

In [None]:
# Create a single date stamp
d1 = pd.to_datetime('2017-03-08 13:34')
print(type(d1))
print(d1)

### Current Instance Timestamp

In [None]:
now = pd.Timestamp.now()
print(now)
now

### `DatetimeIndex` Data Type

In [None]:
time_list = ['2019-03-01', '2019-03-02', '2019-03-03', '2019-03-04',
               '2019-03-05', '2019-03-06', '2019-03-07', '2019-03-08',
               '2019-03-09', '2019-03-10']

time_stamp_array = pd.to_datetime(time_list)
print(time_stamp_array)

## Time Math

### Time Deltas


In [None]:
pd.Timedelta( "1D")

In [None]:
pd.Timedelta( "1M")

In [None]:
pd.Timedelta( "1H")

### Date Addition and Subtraction

In [None]:
d1 = pd.to_datetime('2017-03-08')
d2 = pd.to_datetime('2018-03-08')
one_hour = pd.Timedelta('1h')

In [None]:
print(d1 + one_hour)
print(d1 - one_hour)

In [None]:
print('d2 - d1', d2 - d1)
print('d1 - d2', d1 - d2)

### Date Division

In [None]:
(d2 - d1) / one_hour

## Time Periods and Period Arithmetic

In [None]:
yp = pd.Period('2019', freq = 'A-DEC')

print(repr(yp))

print(yp.start_time)
print(yp.end_time)

- The `freq` is annual (`A`) ending in December (`DEC`)

In [None]:
yp2 = pd.Period('2019', freq = 'A-JAN')

print(repr(yp2))

print(yp2.start_time)
print(yp2.end_time)

In [None]:
# Quarterly Frequency

q1 = pd.Period('2019Q1', freq = 'Q-DEC')

print(repr(q1))

print(q1.start_time)
print(q1.end_time)

- The `freq` is quarterly (`Q`) for the year with the year ending in December `DEC`

- **Different Time Span**

    `M` is month<br>
    `D` is day<br>
    `H` is hours<br>
    `T` is minute<br>
    `S` is seconds

### Changing Frequency

In [None]:
print(repr(yp2))

ym2 = yp2.asfreq('M', how = 'start')

print(repr(ym2))

- To see all frequencies go to http://pandas.pydata.org/pandas-docs/stable/timeseries.html#period



## Addition, Subtraction 


### Addition

In [None]:
day = pd.Period('2020-02-28', freq = 'D')

print(day.is_leap_year)

print(day + 1)

- **NOTICE:** `Period` understands the calendar

## Time Offsets

In [None]:
hours = pd.Period('2020-02-28 01:00:00', freq = 'H')
print(repr(hours))

hours3 = pd.offsets.Hour(3)
print(repr(hours3))

sometime_tomorrow = hours + 12 * hours3
print(sometime_tomorrow)


- **NOTICE:** `hours` uses a 24-hour clock

### Subtraction

- Subtraction works as you would expect it to

## Period Indexes

In [None]:
idx = pd.period_range('2014', '2019', freq = 'Q-DEC')
print(idx)

In [None]:
idx2 = pd.period_range('2014', periods = 21, freq = 'Q-DEC')
print(idx)

In [None]:
print(idx2[20].start_time)
print(idx2[20].end_time)

### Converting to `DatetimeIndex`

In [None]:
idx3 = idx2.to_timestamp()

print(idx3)

- `.to_datetime()` has been deprecated

### Converting to `PeriodIndex`

In [None]:
idx4 = idx3.to_period()
print(idx4)

## Resampling
- Aggregating higher frequency data to lower frequency is called downsampling
- Converting lower frequency to higher frequency is called upsampling

### Downsampling Example

In [None]:
day_data = pd.date_range('1/1/2014', periods = 1826, freq = 'D')

print(day_data[0:11])

print(day_data[-11:-1])

In [None]:
ts = Series(randn(len(day_data)), index = day_data)
print(ts.head())
print(ts.tail())

In [None]:
#ts_week = ts.resample('W', how='mean', kind='period')
ts_week = ts.resample('W').mean()
print(ts_week.head())
print(ts_week.tail())

# End of Notebook