# Joining Tables
http://pandas.pydata.org/pandas-docs/stable/merging.html

In [1]:
import pandas as pd
import numpy as np

# 1. One-to-one join

In [46]:
left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})
left

Unnamed: 0,key,lval
0,foo,1
1,bar,2


In [47]:
right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})
right

Unnamed: 0,key,rval
0,foo,4
1,bar,5


In [44]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,bar,2,5


## 2. Many-to-Many join

In [2]:
left = pd.DataFrame({'key': ['foo', 'bar','bar'], 'lval': [1, 2, 3]})
left

Unnamed: 0,key,lval
0,foo,1
1,bar,2
2,bar,3


In [3]:
right = pd.DataFrame({'key': ['foo', 'foo', 'bar'], 'rval': [4, 5, 6]})
right

Unnamed: 0,key,rval
0,foo,4
1,foo,5
2,bar,6


In [4]:
# find all potential matches
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,bar,2,6
3,bar,3,6


## 3. Left, right, inner, outer join

In [6]:
A = pd.DataFrame({'lkey':['foo','bar','baz','foo'], 'value':[1,2,3,4]})
A

Unnamed: 0,lkey,value
0,foo,1
1,bar,2
2,baz,3
3,foo,4


In [7]:
B = pd.DataFrame({'rkey':['foo','bar','qux','bar'], 'value':[5,6,7,8]})
B

Unnamed: 0,rkey,value
0,foo,5
1,bar,6
2,qux,7
3,bar,8


In [8]:
# union with outer
pd.merge(A,B,left_on='lkey',right_on='rkey',how='outer')

Unnamed: 0,lkey,value_x,rkey,value_y
0,foo,1.0,foo,5.0
1,foo,4.0,foo,5.0
2,bar,2.0,bar,6.0
3,bar,2.0,bar,8.0
4,baz,3.0,,
5,,,qux,7.0


In [9]:
# intersection with inner join
pd.merge(A,B,left_on='lkey',right_on='rkey',how='inner')

Unnamed: 0,lkey,value_x,rkey,value_y
0,foo,1,foo,5
1,foo,4,foo,5
2,bar,2,bar,6
3,bar,2,bar,8


In [25]:
pd.merge(A,B,left_on='lkey',right_on='rkey',how='left')

Unnamed: 0,lkey,value_x,rkey,value_y
0,foo,1,foo,5.0
1,bar,2,bar,6.0
2,bar,2,bar,8.0
3,baz,3,,
4,foo,4,foo,5.0


In [10]:
# indicator, debugging
pd.merge(A,B,left_on='lkey',right_on='rkey',how='outer',indicator=True)

Unnamed: 0,lkey,value_x,rkey,value_y,_merge
0,foo,1.0,foo,5.0,both
1,foo,4.0,foo,5.0,both
2,bar,2.0,bar,6.0,both
3,bar,2.0,bar,8.0,both
4,baz,3.0,,,left_only
5,,,qux,7.0,right_only


In [11]:
# customized suffixes for common columns
pd.merge(A,B,left_on='lkey',right_on='rkey',how='left',suffixes=['','2'])

Unnamed: 0,lkey,value,rkey,value2
0,foo,1,foo,5.0
1,bar,2,bar,6.0
2,bar,2,bar,8.0
3,baz,3,,
4,foo,4,foo,5.0


## 4. Joining on multiple columns

In [56]:
left = pd.DataFrame({'key1':['foo','foo','bar'], 'key2':['one','two','one'], 'lval':[1,2,3]})
left

Unnamed: 0,key1,key2,lval
0,foo,one,1
1,foo,two,2
2,bar,one,3


In [57]:
right = pd.DataFrame({'key1':['foo','foo','bar','bar'], 'key2':['one','one','one','two'], 'rval':[4,5,6,7]})
right

Unnamed: 0,key1,key2,rval
0,foo,one,4
1,foo,one,5
2,bar,one,6
3,bar,two,7


In [58]:
pd.merge(left,right,on=['key1','key2'])

Unnamed: 0,key1,key2,lval,rval
0,foo,one,1,4
1,foo,one,1,5
2,bar,one,3,6


In [59]:
pd.merge(left,right,on=['key1','key2'], how='outer')

Unnamed: 0,key1,key2,lval,rval
0,foo,one,1.0,4.0
1,foo,one,1.0,5.0
2,foo,two,2.0,
3,bar,one,3.0,6.0
4,bar,two,,7.0


# 5. Joining using Index

### 5.1 one using a column, the other using index

In [60]:
left1 = pd.DataFrame({'key':list('abaabc'), 'value':range(6)})
left1

Unnamed: 0,key,value
0,a,0
1,b,1
2,a,2
3,a,3
4,b,4
5,c,5


In [67]:
right1 = pd.DataFrame({'group_val':[3.5,7]}, index=list('ab'))
right1

Unnamed: 0,group_val
a,3.5
b,7.0


In [68]:
pd.merge(left1,right1,left_on='key',right_index=True)

Unnamed: 0,key,value,group_val
0,a,0,3.5
2,a,2,3.5
3,a,3,3.5
1,b,1,7.0
4,b,4,7.0


### 5.2 both using index

In [72]:
left2 = pd.DataFrame([[1,2],[3,4],[5,6]],index=list('ace'),columns=['OH','PA'])
left2

Unnamed: 0,OH,PA
a,1,2
c,3,4
e,5,6


In [73]:
right2 = pd.DataFrame([[7,8],[9,10],[11,12],[13,14]],index=list('bcde'), columns=['CT','NY'])
right2

Unnamed: 0,CT,NY
b,7,8
c,9,10
d,11,12
e,13,14


In [74]:
pd.merge(left2,right2,how='outer',left_index=True,right_index=True)

Unnamed: 0,OH,PA,CT,NY
a,1.0,2.0,,
b,,,7.0,8.0
c,3.0,4.0,9.0,10.0
d,,,11.0,12.0
e,5.0,6.0,13.0,14.0
