In [3]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame

#let's learn how to use dict or series with groupby

In [4]:
# Let's make a Dframe

animals = DataFrame(np.arange(16).reshape(4, 4),
                   columns=['W', 'X', 'Y', 'Z'],
                   index=['Dog', 'Cat', 'Bird', 'Mouse'])

#Now lets add some NAN values
animals.ix[1:2, ['W', 'Y']] = np.nan 

#Show
animals

Unnamed: 0,W,X,Y,Z
Dog,0.0,1,2.0,3
Cat,,5,,7
Bird,8.0,9,10.0,11
Mouse,12.0,13,14.0,15


In [14]:
# Now let's say I had a dictionary with ebhavior values in it
# map letters to behavior values
behavior_map = {'W': 'good', 'X': 'bad', 'Y': 'good','Z': 'bad'}

In [15]:
# Now we can groupby using that mapping; axis 1 is columns
animal_col = animals.groupby(behavior_map, axis=1)

# Show the sum accroding to the groupby with the mapping
animal_col.sum()

# For example [dog][good] = [dog][Y]+[dog][W]

Unnamed: 0,bad,good
Dog,4,2.0
Cat,12,
Bird,20,18.0
Mouse,28,26.0


In [16]:
# Now let's try it with a Series
behav_series = Series(behavior_map)

#Show
behav_series

W    good
X     bad
Y    good
Z     bad
dtype: object

In [17]:
# Now let's groupby the Series

print animals.groupby(behav_series, axis=1).count()
print animals.groupby(behav_series, axis=1).sum()
print animals.groupby(behav_series, axis=1).mean()

       bad  good
Dog      2     2
Cat      2     0
Bird     2     2
Mouse    2     2
       bad  good
Dog      4     2
Cat     12   NaN
Bird    20    18
Mouse   28    26
       bad  good
Dog      2     1
Cat      6   NaN
Bird    10     9
Mouse   14    13


In [18]:
# We can also groupby with functions!

#Show our dframe again
animals

Unnamed: 0,W,X,Y,Z
Dog,0.0,1,2.0,3
Cat,,5,,7
Bird,8.0,9,10.0,11
Mouse,12.0,13,14.0,15


In [19]:
# Lets assume we wanted to group by the length of the animal names, we can pass the len function into groupby!

# Show
animals.groupby(len).sum()

#Note the index is now number of letters in the animal name

Unnamed: 0,W,X,Y,Z
3,0,6,2,10
4,8,9,10,11
5,12,13,14,15


In [20]:
# We can also mix functions with arrays,dicts, and Series for groupby methods

# Set a list for keys
keys = ['A', 'B', 'A', 'B']

# Now groupby length of name and the keys to show max values
animals.groupby([len, keys]).max()

Unnamed: 0,Unnamed: 1,W,X,Y,Z
3,A,0.0,1,2.0,3
3,B,,5,,7
4,A,8.0,9,10.0,11
5,B,12.0,13,14.0,15


In [24]:
# We can also use groupby with hierarchaly index levels

#Create a hierarchal column index
hier_col = pd.MultiIndex.from_arrays([['NY','NY','NY','SF','SF'],[1,2,3,1,2]],
                                     names=['City','District'])

# Create a dframe with hierarchal index
dframe_hr = DataFrame(np.arange(25).reshape(5,5),columns=hier_col)

#Multiply values by 100 for clarity
dframe_hr = dframe_hr*100

#Show
dframe_hr

City,NY,NY,NY,SF,SF
District,1,2,3,1,2
0,0,100,200,300,400
1,500,600,700,800,900
2,1000,1100,1200,1300,1400
3,1500,1600,1700,1800,1900
4,2000,2100,2200,2300,2400


In [None]:
#Up next: Data Aggregation!!