# Timeseries

In [None]:
import numpy as np
import pandas as pd
%matplotlib inline
pd.options.display.max_rows = 12
pd.options.display.max_columns = 8
pd.options.display.width = 80

nice blog on Seattle bikeshare

https://jakevdp.github.io/blog/2015/10/17/analyzing-pronto-cycleshare-data-with-python-and-pandas/

Get the NYC data for Sept 2015

https://www.citibikenyc.com/system-data

```
unzip data/201509-citibike-tripdata.zip
```

In [None]:
!head -5 data/201509-citibike-tripdata.csv

In [None]:
df = pd.read_csv('data/201509-citibike-tripdata.csv',
                 infer_datetime_format=True, 
                 parse_dates=['starttime','stoptime'])
df

In [None]:
df.info()

In [None]:
res = (df
   .set_index('starttime')
   .bikeid
   .resample('H',how='count')
)
res

In [None]:
res.plot.line(figsize=(12,12))

In [None]:
res = (df
   .groupby([pd.Grouper(key='starttime',freq='D'),'usertype'])
   .starttime
   .count()
   .reset_index(level='usertype')
   .pivot(columns='usertype')
   .T.reset_index(level=0,drop=True).T # .reset_index(level=0,axis=1)
 )
res

In [None]:
res.plot.line(figsize=(12,12))

In [None]:
r = df.set_index('starttime')
(r['20150907':'20150914']
                         .bikeid
                         .resample('H',how='count')
                         .plot
                         .line(figsize=(12,12))
 )

# Frequency

http://pandas.pydata.org/pandas-docs/stable/timeseries.html#dateoffset-objects

In [None]:
i = pd.date_range('20130101 09:00:00',periods=5)
i

In [None]:
i + pd.offsets.Hour(1)

In [None]:
i = pd.date_range('20130101 09:00:00',periods=5,freq='MS')
i

In [None]:
i + pd.offsets.MonthEnd()

In [None]:
date_range(i[0], periods=10, freq='1D10U')

# Timezones

http://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-zone-handling

In [None]:
s = Series(pd.date_range('20130101 09:00:00',periods=5,tz='US/Eastern'))
s

In [None]:
s.dt.tz_convert('UTC')

In [None]:
s.astype('datetime64[ns]')

# Timedeltas

http://pandas.pydata.org/pandas-docs/stable/timedeltas.html

In [None]:
s = Series(pd.timedelta_range('1 day',periods=5,freq='2 s'))
s

In [None]:
s[0]

In [None]:
# create from a date_range
dr = pd.date_range('20130101 09:00:00',periods=5,freq='MS')
dr-dr[0]

In [None]:
r = s + (dr-dr[0])
r

In [None]:
r.astype('timedelta64[s]')

In [None]:
r.dt.components

# Resampling Part 2

In [None]:
np.random.seed(1234)
rng = pd.date_range('20130101 09:30:00',periods=1000,freq='ms')
s = Series(np.random.randn(1000)*.1+50,
           index=rng.take(np.random.randint(0,
                                            len(rng),
                                            size=len(rng))))
s

In [None]:
s.sort_index().plot(figsize=(12,12))

In [None]:
res = s.resample('1ms',how='ohlc')
res

# Missing Values

In [None]:
s.resample('1ms',how='ohlc').ffill()

# Computational Tools

http://pandas.pydata.org/pandas-docs/stable/computation.html

In [None]:
pd.rolling_mean(s.sort_index(),freq='10ms',window=1).plot(figsize=(12,12))
pd.expanding_mean(s.sort_index(),freq='10ms').plot(figsize=(12,12))