Pandas Merge Tutorial
---------------------

Basic Merge Using a Dataframe Column

In [2]:
import pandas as pd

In [3]:
df1 = pd.DataFrame({
    "city": ["new york","chicago","orlando"],
    "temperature": [21,14,35],
})

df1

Unnamed: 0,city,temperature
0,new york,21
1,chicago,14
2,orlando,35


In [4]:
df2 = pd.DataFrame({
    "city": ["chicago","new york","orlando"],
    "humidity": [65,68,75],
})

df2

Unnamed: 0,city,humidity
0,chicago,65
1,new york,68
2,orlando,75


In [5]:
df3 = pd.merge(df1, df2, on="city")

df3

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65
2,orlando,35,75


Type Of DataBase Joins
----------------------

In [6]:
df1 = pd.DataFrame({
    "city": ["new york","chicago","orlando", "baltimore"],
    "temperature": [21,14,35, 38],
})

df1

Unnamed: 0,city,temperature
0,new york,21
1,chicago,14
2,orlando,35
3,baltimore,38


In [7]:
df2 = pd.DataFrame({
    "city": ["chicago","new york","san diego"],
    "humidity": [65,68,71],
})

df2

Unnamed: 0,city,humidity
0,chicago,65
1,new york,68
2,san diego,71


In [8]:
df3=pd.merge(df1,df2,on="city",how="inner")

df3

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65


In [9]:
df3=pd.merge(df1,df2,on="city",how="outer")

df3

Unnamed: 0,city,temperature,humidity
0,baltimore,38.0,
1,chicago,14.0,65.0
2,new york,21.0,68.0
3,orlando,35.0,
4,san diego,,71.0


In [10]:
df3=pd.merge(df1,df2,on="city",how="left")

df3

Unnamed: 0,city,temperature,humidity
0,new york,21,68.0
1,chicago,14,65.0
2,orlando,35,
3,baltimore,38,


In [11]:
df3=pd.merge(df1,df2,on="city",how="right")

df3

Unnamed: 0,city,temperature,humidity
0,chicago,14.0,65
1,new york,21.0,68
2,san diego,,71


Indicator flag
--------------

In [12]:
df3=pd.merge(df1,df2,on="city",how="outer",indicator=True)

df3

Unnamed: 0,city,temperature,humidity,_merge
0,baltimore,38.0,,left_only
1,chicago,14.0,65.0,both
2,new york,21.0,68.0,both
3,orlando,35.0,,left_only
4,san diego,,71.0,right_only


Suffixes
--------

In [13]:
df1 = pd.DataFrame({
    "city": ["new york","chicago","orlando", "baltimore"],
    "temperature": [21,14,35,38],
    "humidity": [65,68,71, 75]
})

df1

Unnamed: 0,city,temperature,humidity
0,new york,21,65
1,chicago,14,68
2,orlando,35,71
3,baltimore,38,75


In [14]:
df2 = pd.DataFrame({
    "city": ["chicago","new york","san diego"],
    "temperature": [21,14,35],
    "humidity": [65,68,71]
})

df2

Unnamed: 0,city,temperature,humidity
0,chicago,21,65
1,new york,14,68
2,san diego,35,71


In [19]:
df3= pd.merge(df1,df2,on="city",how="outer", suffixes=('_first','_second'))

df3

Unnamed: 0,city,temperature_first,humidity_first,temperature_second,humidity_second
0,baltimore,38.0,75.0,,
1,chicago,14.0,68.0,21.0,65.0
2,new york,21.0,65.0,14.0,68.0
3,orlando,35.0,71.0,,
4,san diego,,,35.0,71.0


join
----

In [20]:
df1 = pd.DataFrame({
    "city": ["new york","chicago","orlando"],
    "temperature": [21,14,35],
})
df1.set_index('city',inplace=True)

df1

Unnamed: 0_level_0,temperature
city,Unnamed: 1_level_1
new york,21
chicago,14
orlando,35


In [21]:
df2 = pd.DataFrame({
    "city": ["chicago","new york","orlando"],
    "humidity": [65,68,75],
})
df2.set_index('city',inplace=True)

df2

Unnamed: 0_level_0,humidity
city,Unnamed: 1_level_1
chicago,65
new york,68
orlando,75


In [22]:
df1.join(df2,lsuffix='_l', rsuffix='_r')

Unnamed: 0_level_0,temperature,humidity
city,Unnamed: 1_level_1,Unnamed: 2_level_1
new york,21,68
chicago,14,65
orlando,35,75
