# v0.15.x
October 2014

In [24]:
import pandas as pd
import numpy as np
pd.options.display.max_rows=8
pd.options.display.width=100

## Categoricals

In [4]:
s1 = Series(list('daabbca')).astype('category')
s1

0    d
1    a
2    a
3    b
4    b
5    c
6    a
dtype: category
Categories (4, object): [a, b, c, d]

In [5]:
s2 = Series(list('daabbca')).astype('category', 
                                    categories=list('dbca'), 
                                    ordered=True)
s2

0    d
1    a
2    a
3    b
4    b
5    c
6    a
dtype: category
Categories (4, object): [d < b < c < a]

In [6]:
s2.sort_values(ascending=False)

6    a
2    a
1    a
5    c
4    b
3    b
0    d
dtype: category
Categories (4, object): [d < b < c < a]

In [7]:
s2.max()

'a'

In [8]:
s2.min()

'd'

In [9]:
df = DataFrame({'A' : np.random.randn(5), 
                'B' : Series(['a','foo','bar',
                              'a really long string',
                              'baz'])})
df['C'] = df['B'].astype('category')
df = pd.concat([df]*1000,ignore_index=True)
df

Unnamed: 0,A,B,C
0,-0.448820,a,a
1,-0.211907,foo,foo
2,0.982746,bar,bar
3,0.695341,a really long string,a really long string
4,-0.810538,baz,baz
5,-0.448820,a,a
...,...,...,...
4994,-0.810538,baz,baz
4995,-0.448820,a,a
4996,-0.211907,foo,foo


In [10]:
df.memory_usage()

A    40000
B    40000
C     5040
dtype: int64

# Timedeltas

In [11]:
Timedelta('1 days 1 h 3 s 2 ms')

Timedelta('1 days 01:00:03.002000')

In [12]:
s = Series(pd.date_range('20140101 09:05:03',periods=5,freq='20s'))
s

0   2014-01-01 09:05:03
1   2014-01-01 09:05:23
2   2014-01-01 09:05:43
3   2014-01-01 09:06:03
4   2014-01-01 09:06:23
dtype: datetime64[ns]

In [13]:
s.diff()

0        NaT
1   00:00:20
2   00:00:20
3   00:00:20
4   00:00:20
dtype: timedelta64[ns]

In [14]:
s = s-s.iloc[0]
s

0   00:00:00
1   00:00:20
2   00:00:40
3   00:01:00
4   00:01:20
dtype: timedelta64[ns]

In [15]:
s.dt.components

Unnamed: 0,days,hours,minutes,seconds,milliseconds,microseconds,nanoseconds
0,0,0,0,0,0,0,0
1,0,0,0,20,0,0,0
2,0,0,0,40,0,0,0
3,0,0,1,0,0,0,0
4,0,0,1,20,0,0,0


In [16]:
# freq conversions
s.dt.total_seconds()

0     0
1    20
2    40
3    60
4    80
dtype: float64

In [17]:
# ops
s + Timedelta('1us')

0   00:00:00.000001
1   00:00:20.000001
2   00:00:40.000001
3   00:01:00.000001
4   00:01:20.000001
dtype: timedelta64[ns]

In [26]:
df = (DataFrame({'A' : np.arange(100), 
                 'B' : pd.timedelta_range('1 day',freq='h',periods=100) })
      .set_index('B')
      )
df

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
1 days 00:00:00,0
1 days 01:00:00,1
1 days 02:00:00,2
1 days 03:00:00,3
...,...
5 days 00:00:00,96
5 days 01:00:00,97
5 days 02:00:00,98
5 days 03:00:00,99


In [19]:
df.resample('1 d',how='sum')

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
1 days,276
2 days,852
3 days,1428
4 days,2004
5 days,390


# dt accessors

In [20]:
s = Series(pd.date_range('20140101 09:05:03',periods=5,freq='20s'))
s

0   2014-01-01 09:05:03
1   2014-01-01 09:05:23
2   2014-01-01 09:05:43
3   2014-01-01 09:06:03
4   2014-01-01 09:06:23
dtype: datetime64[ns]

In [21]:
s.dt.month

0    1
1    1
2    1
3    1
4    1
dtype: int64

In [22]:
s.dt.tz_localize('US/Eastern')

0   2014-01-01 09:05:03-05:00
1   2014-01-01 09:05:23-05:00
2   2014-01-01 09:05:43-05:00
3   2014-01-01 09:06:03-05:00
4   2014-01-01 09:06:23-05:00
dtype: datetime64[ns, US/Eastern]

In [23]:
s[s.dt.second<6]

0   2014-01-01 09:05:03
3   2014-01-01 09:06:03
dtype: datetime64[ns]