# Demo of [pandas issue 7632](https://github.com/pandas-dev/pandas/issues/7632)

In [1]:
import pandas as pd
pd.__version__

'0.22.0'

## Example from [documentation](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html)

In [2]:
A = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1,2,3,4]})
B = pd.DataFrame({'rkey': ['foo', 'bar', 'qux', 'bar'], 'value': [5,6,7,8]})

In [3]:
A

Unnamed: 0,lkey,value
0,foo,1
1,bar,2
2,baz,3
3,foo,4


In [4]:
B

Unnamed: 0,rkey,value
0,foo,5
1,bar,6
2,qux,7
3,bar,8


In [5]:
r1 = A.merge(B, left_on='lkey', right_on='rkey', how='outer')
r1

Unnamed: 0,lkey,value_x,rkey,value_y
0,foo,1.0,foo,5.0
1,foo,4.0,foo,5.0
2,bar,2.0,bar,6.0
3,bar,2.0,bar,8.0
4,baz,3.0,,
5,,,qux,7.0


## set indeces

In [6]:
B['another_key'] = list('abcd')
B = B.set_index('another_key')
A = A.set_index('lkey')

In [7]:
A

Unnamed: 0_level_0,value
lkey,Unnamed: 1_level_1
foo,1
bar,2
baz,3
foo,4


In [8]:
B

Unnamed: 0_level_0,rkey,value
another_key,Unnamed: 1_level_1,Unnamed: 2_level_1
a,foo,5
b,bar,6
c,qux,7
d,bar,8


## Merge again

In [9]:
r2 = A.merge(B, left_index=True, right_on='rkey', how='outer').reset_index() # .rename(columns={'index':'lkey'})
r2

Unnamed: 0,another_key,value_x,rkey,value_y
0,a,1.0,foo,5.0
1,a,4.0,foo,5.0
2,b,2.0,bar,6.0
3,d,2.0,bar,8.0
4,d,3.0,baz,
5,c,,qux,7.0


In [10]:
assert 'lkey' in r2.columns

AssertionError: 

In [11]:
pd.testing.assert_frame_equal(r1, r2)

AssertionError: DataFrame.columns are different

DataFrame.columns values are different (25.0 %)
[left]:  Index(['lkey', 'value_x', 'rkey', 'value_y'], dtype='object')
[right]: Index(['another_key', 'value_x', 'rkey', 'value_y'], dtype='object')

## workaround

In [12]:
r3 = A.reset_index().merge(B, left_on='lkey', right_on='rkey', how='outer') # .reset_index().rename(columns={'index':'lkey'})
r3

Unnamed: 0,lkey,value_x,rkey,value_y
0,foo,1.0,foo,5.0
1,foo,4.0,foo,5.0
2,bar,2.0,bar,6.0
3,bar,2.0,bar,8.0
4,baz,3.0,,
5,,,qux,7.0


In [13]:
pd.testing.assert_frame_equal(r1, r3)

## Example by [matthiasjfrank](https://github.com/pandas-dev/pandas/issues/7632) in the github issue

In [26]:
import numpy as np
df1 = pd.DataFrame(np.zeros(8).reshape(4,2), columns=['A', 'B'])
df2 = pd.DataFrame(np.arange(4*4).reshape(2*4,2), columns=['C', 'D'])

In [27]:
df1

Unnamed: 0,A,B
0,0.0,0.0
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0


In [28]:
df2

Unnamed: 0,C,D
0,0,1
1,2,3
2,4,5
3,6,7
4,8,9
5,10,11
6,12,13
7,14,15


In [29]:
pd.merge(df1, df2, left_index=True, right_on='C', how='left')

Unnamed: 0,A,B,C,D
0,0.0,0.0,0,1.0
7,0.0,0.0,1,
1,0.0,0.0,2,3.0
7,0.0,0.0,3,


In [18]:
df1['i'] = pd.Series(df1.index)
pd.merge(df1, df2, left_on='i', right_on='C', how='left').drop(['i'],axis=1)

Unnamed: 0,A,B,C,D
0,0.0,0.0,0.0,1.0
1,0.0,0.0,,
2,0.0,0.0,2.0,3.0
3,0.0,0.0,,


In [19]:
# based on my workaround
df1 = df1.drop(['i'],axis=1)
pd.merge(df1.reset_index(), df2, left_on='index', right_on='C', how='left').drop(['index'],axis=1)

Unnamed: 0,A,B,C,D
0,0.0,0.0,0.0,1.0
1,0.0,0.0,,
2,0.0,0.0,2.0,3.0
3,0.0,0.0,,


In [20]:
# workaround by cwwalter
pd.merge(df1, df2, left_index=True, right_on='C', how='left').set_index('C')

Unnamed: 0_level_0,A,B,D
C,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.0,0.0,1.0
1,0.0,0.0,
2,0.0,0.0,3.0
3,0.0,0.0,
