In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
df1 = pd.DataFrame({'name': ['Mark', 'Juli', 'Denial'], 'Place': ['Paris', 'London', 'Moscow']}, index=['K0', 'K1', 'K2'])
df2 = pd.DataFrame({'age': [25, 35, 28], 'Gender': ['M', 'F', 'M']}, index=['K0', 'K2', 'K3'])

In [3]:
df1.join(df2)


Unnamed: 0,name,Place,age,Gender
K0,Mark,Paris,25.0,M
K1,Juli,London,,
K2,Denial,Moscow,35.0,F


In [4]:
df1.join(df2, how='outer')

Unnamed: 0,name,Place,age,Gender
K0,Mark,Paris,25.0,M
K1,Juli,London,,
K2,Denial,Moscow,35.0,F
K3,,,28.0,M


In [5]:
df1.join(df2, how='inner')

Unnamed: 0,name,Place,age,Gender
K0,Mark,Paris,25,M
K2,Denial,Moscow,35,F


# Joining key columns on an index
join() takes an optional on argument which may be a column or multiple column names, which specifies that the passed DataFrame is to be aligned on that column in the DataFrame.

In [6]:
df1 = pd.DataFrame({'key':['k0','k1','k0'] , 'name': ['Mark', 'Juli', 'Denial'] , 
                    'Place': ['Paris', 'London', 'Moscow']})
df2 = pd.DataFrame({'age': [25, 35, 28], 'Gender': ['M', 'F', 'M']}, 
                   index=['k0', 'k1', 'k2'])

Result = df1.join(df2,on='key')
Result

Unnamed: 0,key,name,Place,age,Gender
0,k0,Mark,Paris,25,M
1,k1,Juli,London,35,F
2,k0,Denial,Moscow,25,M


![m](https://studymachinelearning.com/wp-content/uploads/2019/11/pandas_join_ex_2.png)

# Joining Multiple DataFrames


In [7]:
df1 = pd.DataFrame({'A': [1, 2, 3]}, index=['K0', 'K1', 'K2'])
df2 = pd.DataFrame({'A': [4, 5, 6]}, index=['K0', 'K0', 'K3'])
df3 = pd.DataFrame({'A': [7, 8, 9]}, index=['K1', 'K1', 'K2'])

Result = df1.join([df2, df3])
Result

Unnamed: 0,A_x,A_y,A
K0,1,4.0,
K0,1,5.0,
K1,2,,7.0
K1,2,,8.0
K2,3,,9.0


![m](https://studymachinelearning.com/wp-content/uploads/2019/11/pandas_join.png)

# pd.concat() vs merge() vs join()

The merge() and join() methods are the DataFrame method, not a series method. The concat() method is the pandas’ method which provides the functionality to combine the pandas’ objects such as DataFrame and Series.

Merge – 

* The merge() function used to merge the DataFrames with database-style join such as inner join, outer join, left join, right join.
* Combining exactly two DataFrames.
* The join is done on columns or indexes.
* If joining columns on columns, the DataFrame indexes will be ignored.
* If joining indexes on indexes or indexes on a column, the index will be passed on.
 

Join – 

* The join() function used to join two or more pandas DataFrames/Series horizontally.
* Join() uses merge internally for the index-on-index (by default) and column(s)-on-index join.
* Aligns the calling DataFrame’s column(s) or index with the other objects’ index (and not the columns).
* Defaults to left join with options for right, inner and outer join


Concat – 

* concatenate two or more pandas DataFrames/Series vertically or horizontally.
* Aligns only on the index by specifying the axis parameter.
* Defaults to outer join with the option for inner join

<div class="s-table-container">
<table class="s-table">
<thead>
<tr>
<th style="text-align: center;"></th>
<th style="text-align: left;">PROS</th>
<th style="text-align: left;">CONS</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align: center;"><code>merge</code></td>
<td style="text-align: left;"><p>• supports inner/left/right/full <br>• supports column-column, index-column, index-index joins</p></td>
<td style="text-align: left;"><p> • can only join two frames at a time</p></td>
</tr>
<tr>
<td style="text-align: center;"><code>join</code></td>
<td style="text-align: left;"><p>• supports inner/left (default)/right/full <br>• can join multiple DataFrames at a time </p></td>
<td style="text-align: left;"><p> • only supports index-index joins</p></td>
</tr>
<tr>
<td style="text-align: center;"><code>concat</code></td>
<td style="text-align: left;"><p>•  specializes in joining multiple DataFrames at a time <br>• very fast (concatenation is linear time)</p></td>
<td style="text-align: left;"><p> • only supports inner/full (default) joins <br> • only supports index-index joins</p></td>
</tr>
</tbody>
</table>
</div>