# Pandas Documentation on Merge, Join and Concatenation

In this notebook, you will work through the Pandas documentation on merging, joining and concatenation.

## Imports

In [1]:
import numpy as np
import pandas as pd

## Merge, join and concatenation

In this notebook, you are going to learn how to use Pandas by typing the code from the Pandas documentation into this notebook.

* Go to the Pandas documentation on [merge, join, and concatenate](http://pandas.pydata.org/pandas-docs/stable/merging.html#merge-join-and-concatenate).
* Type all of the code from that section of the documentation into this notebook and get it working.
* **To learn this API well, you must type the code rather than copy and pasting it**.
* Create a new cell in this section for each `In[]` prompt in the documentation.
* Ignore the cells in the **Grading** section below.
* No Markdown comments are needed.
* Skip the following sub-sections:
  - More concatenating with group keys
  - Joining a single Index to a Multi-index
  - Joining with two multi-indexes
  - Joining multiple DataFrame or Panel objects

In [2]:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                       'B': ['B0', 'B1', 'B2', 'B3'],
                       'C': ['C0', 'C1', 'C2', 'C3'],
                       'D': ['D0', 'D1', 'D2', 'D3']},
                       index=[0, 1, 2, 3])

In [3]:
   df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                       'B': ['B4', 'B5', 'B6', 'B7'],
                       'C': ['C4', 'C5', 'C6', 'C7'],
                       'D': ['D4', 'D5', 'D6', 'D7']},
                        index=[4, 5, 6, 7])

In [4]:
   df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
                       'B': ['B8', 'B9', 'B10', 'B11'],
                       'C': ['C8', 'C9', 'C10', 'C11'],
                       'D': ['D8', 'D9', 'D10', 'D11']},
                       index=[8, 9, 10, 11])

In [5]:
frames = [df1, df2, df3]
result = pd.concat(frames)

In [6]:
result = pd.concat(frames, keys=['x', 'y', 'z'])

In [7]:
result.ix['y']

Unnamed: 0,A,B,C,D
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [8]:
df4 = pd.DataFrame({'B': ['B2', 'B3', 'B6', 'B7'],
                    'D': ['D2', 'D3', 'D6', 'D7'],
                    'F': ['F2', 'F3', 'F6', 'F7']},
                   index=[2, 3, 6, 7])

In [9]:
result = pd.concat([df1, df4], axis=1)

In [10]:
result = pd.concat([df1, df4], axis=1, join='inner')

In [11]:
result = pd.concat([df1, df4], axis=1, join_axes=[df1.index])

In [12]:
result = df1.append(df2)


In [13]:
result = df1.append(df4)

In [14]:
result = df1.append([df2, df3])

In [15]:
result = pd.concat([df1, df4], ignore_index=True)

In [16]:
result = df1.append(df4, ignore_index=True)

In [17]:
s1 = pd.Series(['X0', 'X1', 'X2', 'X3'], name='X')

In [18]:
result = pd.concat([df1, s1], axis=1)

In [19]:
s2 = pd.Series(['_0', '_1', '_2', '_3'])

In [20]:
result = pd.concat([df1, s2, s2, s2], axis=1)

In [21]:
result = pd.concat([df1, s1], axis=1, ignore_index=True)

In [22]:
s3 = pd.Series([0, 1, 2, 3], name='foo')

In [23]:
s4 = pd.Series([0, 1, 2, 3])
   

In [24]:
s5 = pd.Series([0, 1, 4, 5])

In [25]:
pd.concat([s3, s4, s5], axis=1)

Unnamed: 0,foo,0,1
0,0,0,0
1,1,1,1
2,2,2,4
3,3,3,5


In [26]:
pd.concat([s3, s4, s5], axis=1, keys=['red','blue','yellow'])

Unnamed: 0,red,blue,yellow
0,0,0,0
1,1,1,1
2,2,2,4
3,3,3,5


In [27]:
result = pd.concat(frames, keys=['x', 'y', 'z'])

In [28]:
pieces = {'x': df1, 'y': df2, 'z': df3}

In [29]:
result = pd.concat(pieces)

In [30]:
result = pd.concat(pieces, keys=['z', 'y'])

In [31]:
result.index.levels

FrozenList([['z', 'y'], [4, 5, 6, 7, 8, 9, 10, 11]])

In [32]:
result = pd.concat(pieces, keys=['x', 'y', 'z'],levels=[['z', 'y', 'x', 'w']],names=['group_key'])

In [33]:
result.index.levels

FrozenList([['z', 'y', 'x', 'w'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]])

In [34]:
s2 = pd.Series(['X0', 'X1', 'X2', 'X3'], index=['A', 'B', 'C', 'D'])

In [35]:
result = df1.append(s2, ignore_index=True)

In [36]:
dicts = [{'A': 1, 'B': 2, 'C': 3, 'X': 4},
            {'A': 5, 'B': 6, 'C': 7, 'Y': 8}]

In [37]:
result = df1.append(dicts, ignore_index=True)

In [38]:
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                        'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3']})

   

In [39]:
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                         'C': ['C0', 'C1', 'C2', 'C3'],
                         'D': ['D0', 'D1', 'D2', 'D3']})

In [40]:
result = pd.merge(left, right, on='key')

In [41]:
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
                        'key2': ['K0', 'K1', 'K0', 'K1'],
                        'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3']})

   

In [42]:
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
                         'key2': ['K0', 'K0', 'K0', 'K0'],
                         'C': ['C0', 'C1', 'C2', 'C3'],
                         'D': ['D0', 'D1', 'D2', 'D3']})

In [43]:
result = pd.merge(left, right, on=['key1', 'key2'])

In [44]:
result = pd.merge(left, right, how='left', on=['key1', 'key2'])

In [45]:
result = pd.merge(left, right, how='right', on=['key1', 'key2'])

In [46]:
result = pd.merge(left, right, how='outer', on=['key1', 'key2'])

In [47]:
result = pd.merge(left, right, how='inner', on=['key1', 'key2'])

In [48]:
df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b']})

In [49]:
df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})

In [50]:
merge(df1, df2, on='col1', how='outer', indicator=True)

NameError: name 'merge' is not defined

In [51]:
pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column')

Unnamed: 0,col1,col_left,col_right,indicator_column
0,0,a,,left_only
1,1,b,2.0,both
2,2,,2.0,right_only
3,2,,2.0,right_only


In [52]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                        'B': ['B0', 'B1', 'B2']},
                        index=['K0', 'K1', 'K2'])

In [53]:
right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                         'D': ['D0', 'D2', 'D3']},
                         index=['K0', 'K2', 'K3'])

In [54]:
result = left.join(right)

In [55]:
result = left.join(right, how='outer')

In [56]:
result = left.join(right, how='inner')

In [57]:
result = pd.merge(left, right, left_index=True, right_index=True, how='outer')

In [58]:
result = pd.merge(left, right, left_index=True, right_index=True, how='inner');

In [59]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3'],
                        'key': ['K0', 'K1', 'K0', 'K1']})

In [60]:
right = pd.DataFrame({'C': ['C0', 'C1'],
                         'D': ['D0', 'D1']},
                         index=['K0', 'K1'])

In [61]:
result = left.join(right, on='key')

In [62]:
result = pd.merge(left, right, left_on='key', right_index=True,
                     how='left', sort=False);

In [63]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3'],
                        'key1': ['K0', 'K0', 'K1', 'K2'],
                        'key2': ['K0', 'K1', 'K0', 'K1']})

In [64]:
index = pd.MultiIndex.from_tuples([('K0', 'K0'), ('K1', 'K0'),
                                     ('K2', 'K0'), ('K2', 'K1')])

In [65]:
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
                      'D': ['D0', 'D1', 'D2', 'D3']},
                     index=index)

In [66]:
result = left.join(right, on=['key1', 'key2'])

In [67]:
result = left.join(right, on=['key1', 'key2'], how='inner')

In [70]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                        'B': ['B0', 'B1', 'B2']},
                        index=pd.Index(['K0', 'K1', 'K2'], name='key'))

In [71]:
index = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'),
                                     ('K2', 'Y2'), ('K2', 'Y3')],
                                      names=['key', 'Y'])

In [72]:
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
                         'D': ['D0', 'D1', 'D2', 'D3']},
                         index=index)

In [73]:
result = left.join(right, how='inner')

In [74]:
result = pd.merge(left.reset_index(), right.reset_index(),
          on=['key'], how='inner').set_index(['key','Y'])

In [75]:
index = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'),
                                      ('K1', 'X2')],
                                       names=['key', 'X'])

In [76]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                        'B': ['B0', 'B1', 'B2']},
                         index=index)

In [77]:
result = pd.merge(left.reset_index(), right.reset_index(),
                     on=['key'], how='inner').set_index(['key','X','Y'])

In [78]:
 left = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'v': [1, 2, 3]})

In [79]:
right = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'v': [4, 5, 6]})

In [80]:
result = pd.merge(left, right, on='k')

In [81]:
result = pd.merge(left, right, on='k', suffixes=['_l', '_r'])

In [82]:
left = left.set_index('k')

In [83]:
right = right.set_index('k')

In [84]:
result = left.join(right, lsuffix='_l', rsuffix='_r')

In [85]:
right2 = pd.DataFrame({'v': [7, 8, 9]}, index=['K1', 'K1', 'K2'])

In [86]:
result = left.join([right, right2])

In [93]:
left = pd.DataFrame({'k': ['K0', 'K1', 'K1', 'K2'],
                     'lv': [1, 2, 3, 4],
                     's': ['a', 'b', 'c', 'd']})

In [94]:
right = pd.DataFrame({'k': ['K1', 'K2', 'K4'],
                      'rv': [1, 2, 3]})

In [95]:
result = pd.ordered_merge(left, right, fill_method='ffill', left_by='s')

In [96]:
df1 = pd.DataFrame([[np.nan, 3., 5.], [-4.6, np.nan, np.nan],
                      [np.nan, 7., np.nan]])

In [97]:
df2 = pd.DataFrame([[-42.6, np.nan, -8.2], [-5., 1.6, 4]],
                      index=[1, 2])

In [98]:
result = df1.combine_first(df2)

In [99]:
df1.update(df2)

## Grading

YOUR ANSWER HERE