In [2]:
import pandas as pd
import numpy as np
df = pd.DataFrame({'key1' : ['a', 'a', 'b', 'b', 'a'],
                   'key2' : ['one', 'two', 'one', 'two', 'one'],
                   'data1' : np.random.randn(5),
                   'data2' : np.random.randn(5)})
df

Unnamed: 0,key1,key2,data1,data2
0,a,one,-1.225015,-1.060249
1,a,two,-0.760509,2.62262
2,b,one,-0.198431,-1.838573
3,b,two,-0.547586,0.940407
4,a,one,-0.04979,0.460289


In [3]:
grouped = df['data1'].groupby(df['key1'])
grouped

<pandas.core.groupby.generic.SeriesGroupBy object at 0x0000022708FF04E0>

In [4]:
grouped.mean()

key1
a    1.151773
b   -0.422509
Name: data1, dtype: float64

In [7]:
means = df['data1'].groupby([df['key1'], df['key2']]).mean()
means

key1  key2
a     one     1.291772
      two     0.871777
b     one    -0.803105
      two    -0.041913
Name: data1, dtype: float64

In [8]:
means.unstack()

key2,one,two
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,1.291772,0.871777
b,-0.803105,-0.041913


In [4]:
states = np.array(['Ohio', 'California', 'California', 'Ohio', 'Ohio'])
years = np.array([2005, 2005, 2006, 2005, 2006])
df['data1'].groupby([states, years]).mean()

California  2005   -0.760509
            2006   -0.198431
Ohio        2005   -0.886301
            2006   -0.049790
Name: data1, dtype: float64

In [5]:
df.groupby('key1').mean()

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,-0.678438,0.67422
b,-0.373009,-0.449083


In [6]:
df.groupby(['key1','key2']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,-0.637402,-0.29998
a,two,-0.760509,2.62262
b,one,-0.198431,-1.838573
b,two,-0.547586,0.940407


In [7]:
df.groupby(['key1', 'key2']).size()

key1  key2
a     one     2
      two     1
b     one     1
      two     1
dtype: int64

In [8]:
for name,group in df.groupby('key1'):
    print(name)
    print(group)

a
  key1 key2     data1     data2
0    a  one -1.225015 -1.060249
1    a  two -0.760509  2.622620
4    a  one -0.049790  0.460289
b
  key1 key2     data1     data2
2    b  one -0.198431 -1.838573
3    b  two -0.547586  0.940407


In [9]:
for (k1, k2) , group in df.groupby(['key1', 'key2']):
    print((k1, k2))
    print(group)

('a', 'one')
  key1 key2     data1     data2
0    a  one -1.225015 -1.060249
4    a  one -0.049790  0.460289
('a', 'two')
  key1 key2     data1    data2
1    a  two -0.760509  2.62262
('b', 'one')
  key1 key2     data1     data2
2    b  one -0.198431 -1.838573
('b', 'two')
  key1 key2     data1     data2
3    b  two -0.547586  0.940407


In [10]:
pieces = dict(list(df.groupby('key1')))
pieces['b']

Unnamed: 0,key1,key2,data1,data2
2,b,one,-0.198431,-1.838573
3,b,two,-0.547586,0.940407


In [11]:
df.dtypes

key1      object
key2      object
data1    float64
data2    float64
dtype: object

In [13]:
grouped = df.groupby(df.dtypes, axis = 1)
for dtype, group in grouped:
    print(dtype)
    print(group)

float64
      data1     data2
0 -1.225015 -1.060249
1 -0.760509  2.622620
2 -0.198431 -1.838573
3 -0.547586  0.940407
4 -0.049790  0.460289
object
  key1 key2
0    a  one
1    a  two
2    b  one
3    b  two
4    a  one


In [20]:
df['data1'].groupby(df['key1']).mean()

key1
a   -0.678438
b   -0.373009
Name: data1, dtype: float64

In [22]:
df.groupby('key1')['data1'].mean()

key1
a   -0.678438
b   -0.373009
Name: data1, dtype: float64

In [23]:
df.groupby(['key1','key2'])['data2'].mean()

key1  key2
a     one    -0.299980
      two     2.622620
b     one    -1.838573
      two     0.940407
Name: data2, dtype: float64

In [25]:
df.groupby(['key1','key2'])[['data2']].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,data2
key1,key2,Unnamed: 2_level_1
a,one,-0.29998
a,two,2.62262
b,one,-1.838573
b,two,0.940407


In [27]:
rng = pd.date_range('2000-01-01', periods=12, freq='T')
ts = pd.Series(np.arange(12), index=rng)
ts

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32

In [31]:
ts.resample('5min', closed='right', label='right').sum()

2000-01-01 00:00:00     0
2000-01-01 00:05:00    15
2000-01-01 00:10:00    40
2000-01-01 00:15:00    11
Freq: 5T, dtype: int32

In [32]:
ts.resample('5min', closed='right', label='right', loffset="-1s").sum()

1999-12-31 23:59:59     0
2000-01-01 00:04:59    15
2000-01-01 00:09:59    40
2000-01-01 00:14:59    11
Freq: 5T, dtype: int32

In [33]:
ts.resample('5min').ohlc()

Unnamed: 0,open,high,low,close
2000-01-01 00:00:00,0,4,0,4
2000-01-01 00:05:00,5,9,5,9
2000-01-01 00:10:00,10,11,10,11


In [34]:
frame = pd.DataFrame(np.random.randn(2, 4), index=pd.date_range('1/1/2000', periods=2,freq='W-WED'),columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.755578,1.000904,0.258631,-0.329256
2000-01-12,-1.012095,-0.316186,0.647726,-0.218275


In [35]:
df_daily = frame.resample('D').asfreq()
df_daily

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.755578,1.000904,0.258631,-0.329256
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-1.012095,-0.316186,0.647726,-0.218275


In [36]:
frame.resample('D').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.755578,1.000904,0.258631,-0.329256
2000-01-06,-0.755578,1.000904,0.258631,-0.329256
2000-01-07,-0.755578,1.000904,0.258631,-0.329256
2000-01-08,-0.755578,1.000904,0.258631,-0.329256
2000-01-09,-0.755578,1.000904,0.258631,-0.329256
2000-01-10,-0.755578,1.000904,0.258631,-0.329256
2000-01-11,-0.755578,1.000904,0.258631,-0.329256
2000-01-12,-1.012095,-0.316186,0.647726,-0.218275


In [38]:
frame.resample('D').ffill(limit=2)

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.755578,1.000904,0.258631,-0.329256
2000-01-06,-0.755578,1.000904,0.258631,-0.329256
2000-01-07,-0.755578,1.000904,0.258631,-0.329256
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-1.012095,-0.316186,0.647726,-0.218275


In [39]:
frame.resample('W-THU').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-06,-0.755578,1.000904,0.258631,-0.329256
2000-01-13,-1.012095,-0.316186,0.647726,-0.218275
