# v0.14.x
May 2014

In [1]:
import pandas as pd
import numpy as np
pd.options.display.max_rows=12

## multi indexing using slicers

In [12]:
# create the test data
def mklbl(prefix,n):
    return ["%s%s" % (prefix,i)  for i in range(n)]

miindex = MultiIndex.from_product([mklbl('A',4),
                                   mklbl('B',2),
                                   mklbl('C',4),
                                   mklbl('D',2)],
                                  names=['first','second','third','fourth']) 

micolumns = MultiIndex.from_tuples([('a','foo'),('a','bar'),
                                    ('b','foo'),('b','bah')],
                                     names=['lvl0', 'lvl1'])


dfmi = DataFrame(np.arange(len(miindex)*len(micolumns)).reshape((len(miindex),len(micolumns))),
                 index=miindex,
                 columns=micolumns).sortlevel().sortlevel(axis=1)

In [13]:
dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
first,second,third,fourth,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
A0,B0,C2,D1,21,20,23,22
...,...,...,...,...,...,...,...
A3,B1,C1,D0,233,232,235,234
A3,B1,C1,D1,237,236,239,238
A3,B1,C2,D0,241,240,243,242


In [14]:
# selection
idx = pd.IndexSlice
dfmi.loc[idx[:,:,['C1','C2'],'D0'],idx[:,'foo']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
first,second,third,fourth,Unnamed: 4_level_2,Unnamed: 5_level_2
A0,B0,C1,D0,8,10
A0,B0,C2,D0,16,18
A0,B1,C1,D0,40,42
A0,B1,C2,D0,48,50
A1,B0,C1,D0,72,74
A1,B0,C2,D0,80,82
...,...,...,...,...,...
A2,B1,C1,D0,168,170
A2,B1,C2,D0,176,178
A3,B0,C1,D0,200,202


In [15]:
# assignment
dfmi.loc[idx[:,:,['C1','C2'],'D0'],idx[:,'foo']] *= 10
dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
first,second,third,fourth,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,80,11,100
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,80,19,100
A0,B0,C2,D1,21,20,23,22
...,...,...,...,...,...,...,...
A3,B1,C1,D0,233,1120,235,1140
A3,B1,C1,D1,237,236,239,238
A3,B1,C2,D0,241,1120,243,1140


In [16]:
# string-like selection
dfmi.query("third in ['C1','C2'] and fourth == 'D0'")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
first,second,third,fourth,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C1,D0,9,80,11,100
A0,B0,C2,D0,17,80,19,100
A0,B1,C1,D0,41,160,43,180
A0,B1,C2,D0,49,160,51,180
A1,B0,C1,D0,73,400,75,420
A1,B0,C2,D0,81,400,83,420
...,...,...,...,...,...,...,...
A2,B1,C1,D0,169,800,171,820
A2,B1,C2,D0,177,800,179,820
A3,B0,C1,D0,201,1040,203,1060


future syntax
```python
dfmi.loc[dict(third=['C1','C2'], 
              fourth='D0')]
```

# select_dtypes

In [14]:
df = DataFrame({'A' : [1,2,3], 
                'B' : [1.,2,3], 
                'C' : 'foo', 
                'D' : pd.date_range('20130101',periods=3),
                'E' : Series(list('aab')).astype('category'),
                'F' : pd.timedelta_range('1 day',periods=3,freq='s'),
                'G' : pd.date_range('20130101',periods=3,tz='US/Eastern')})
df

Unnamed: 0,A,B,C,D,E,F,G
0,1,1,foo,2013-01-01,a,1 days 00:00:00,2013-01-01 00:00:00-05:00
1,2,2,foo,2013-01-02,a,1 days 00:00:01,2013-01-02 00:00:00-05:00
2,3,3,foo,2013-01-03,b,1 days 00:00:02,2013-01-03 00:00:00-05:00


In [15]:
df.dtypes

A                         int64
B                       float64
C                        object
D                datetime64[ns]
E                      category
F               timedelta64[ns]
G    datetime64[ns, US/Eastern]
dtype: object

In [16]:
df.select_dtypes(include=['number']).dtypes

A              int64
B            float64
F    timedelta64[ns]
dtype: object

In [17]:
df.select_dtypes(include=['number'],exclude=['timedelta']).dtypes

A      int64
B    float64
dtype: object