In [1]:
import numpy as np
import pandas as pd

In [5]:
df = pd.DataFrame(np.random.randn(10,4))
# break df into pieces
pieces = [df[:3], df[3:7], df[7:]]

[          0         1         2         3
 0  0.154526  0.947946 -0.702398 -0.679874
 1  0.950435  0.154106  1.129342 -0.250276
 2  1.324093 -1.768330 -1.662935  1.444312,
           0         1         2         3
 3 -0.271170  0.644278 -0.601540 -0.240102
 4 -0.685307 -1.435306  1.768309 -1.068874
 5  0.424016  0.175929 -1.304817 -1.843880
 6 -1.602742  0.651313 -0.331911 -1.064883,
           0         1         2         3
 7 -0.490204 -1.476149  0.980634  0.092740
 8 -1.306739  1.179804 -0.078659  0.823273
 9 -1.548689  0.655916  1.638697  0.141725]

## Concat
`pd.concat([df1,df2],axis = 1)` axis = 1 means concat df along **columns**<br>
`df1.append(df2)` They concatenate along axis=0 (**rows**)

In [16]:
pd.concat(pieces,ignore_index=True, sort=False)

Unnamed: 0,0,1,2,3
0,0.154526,0.947946,-0.702398,-0.679874
1,0.950435,0.154106,1.129342,-0.250276
2,1.324093,-1.76833,-1.662935,1.444312
3,-0.27117,0.644278,-0.60154,-0.240102
4,-0.685307,-1.435306,1.768309,-1.068874
5,0.424016,0.175929,-1.304817,-1.84388
6,-1.602742,0.651313,-0.331911,-1.064883
7,-0.490204,-1.476149,0.980634,0.09274
8,-1.306739,1.179804,-0.078659,0.823273
9,-1.548689,0.655916,1.638697,0.141725


In [15]:
pieces[0].append(pieces[1],ignore_index=True, sort=False)

Unnamed: 0,0,1,2,3
0,0.154526,0.947946,-0.702398,-0.679874
1,0.950435,0.154106,1.129342,-0.250276
2,1.324093,-1.76833,-1.662935,1.444312
3,-0.27117,0.644278,-0.60154,-0.240102
4,-0.685307,-1.435306,1.768309,-1.068874
5,0.424016,0.175929,-1.304817,-1.84388
6,-1.602742,0.651313,-0.331911,-1.064883


## Merge
`pd.merge(left, right)`

In [10]:
left = pd.DataFrame({"key": ["foo", "bar"], "lval": [1, 2]})

right = pd.DataFrame({"key": ["foo", "bar"], "rval": [4, 5]})

In [11]:
pd.merge(left, right, on = 'key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,bar,2,5


## Join 
`df.join()`

In [19]:
left = pd.DataFrame({"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=["K0", "K1", "K2"] )

right = pd.DataFrame({"C": ["C0", "C2", "C3"], "D": ["D0", "D2", "D3"]}, index=["K0", "K2", "K3"])

result = left.join(right, how = 'outer')
result

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


## Grouping
By “group by” we are referring to a process involving one or more of the following steps:

- **Splitting** the data into groups based on some criteria  
- **Applying** a function to each group independently
- **Combining** the results into a data structure

`df.groupby()`

In [20]:
df = pd.DataFrame( {
            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
            "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
            "C": np.random.randn(8),
            "D": np.random.randn(8),
        } )
df

Unnamed: 0,A,B,C,D
0,foo,one,3.282145,1.11265
1,bar,one,-1.236156,0.615783
2,foo,two,-0.322419,-0.282678
3,bar,three,-1.10835,0.771539
4,foo,two,3.138969,0.002101
5,bar,two,-0.305809,-0.168753
6,foo,one,0.65688,1.547449
7,foo,three,-0.307284,-0.003852


In [21]:
df.groupby('A').sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,-2.650315,1.218569
foo,6.448292,2.37567


In [22]:
df.groupby(['A','B']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-1.236156,0.615783
bar,three,-1.10835,0.771539
bar,two,-0.305809,-0.168753
foo,one,3.939025,2.660099
foo,three,-0.307284,-0.003852
foo,two,2.81655,-0.280577
