# Merge Data Frames

In [1]:
import pandas as pd
temperature_df = pd.DataFrame({
    "city": ["mumbai","delhi","banglore"],
    "temperature": [32,45,30]})
temperature_df

Unnamed: 0,city,temperature
0,mumbai,32
1,delhi,45
2,banglore,30


In [2]:
humidity_df = pd.DataFrame({
    "city": ["delhi","mumbai","banglore"],
    "humidity": [68, 65, 75]})
humidity_df

Unnamed: 0,city,humidity
0,delhi,68
1,mumbai,65
2,banglore,75


In [3]:
#merge two dataframes with out explicitly mention index
df = pd.merge(temperature_df, humidity_df, on='city')
df

Unnamed: 0,city,temperature,humidity
0,mumbai,32,65
1,delhi,45,68
2,banglore,30,75


In [4]:
temperature_df = pd.DataFrame({
    "city": ["mumbai","delhi","banglore", 'chennai'],
    "temperature": [32,45,30, 43]})
temperature_df

Unnamed: 0,city,temperature
0,mumbai,32
1,delhi,45
2,banglore,30
3,chennai,43


In [5]:
humidity_df = pd.DataFrame({
    "city": ["delhi","mumbai","guntur"],
    "humidity": [68, 65, 75]})
humidity_df

Unnamed: 0,city,humidity
0,delhi,68
1,mumbai,65
2,guntur,75


In [6]:
df = pd.merge(temperature_df, humidity_df, on='city')
df #missing some cities information, because it perform intersection

Unnamed: 0,city,temperature,humidity
0,mumbai,32,65
1,delhi,45,68


In [7]:
#union of two dataframes or outer join
df = pd.merge(temperature_df, humidity_df, on='city', how='outer')
df

Unnamed: 0,city,temperature,humidity
0,mumbai,32.0,65.0
1,delhi,45.0,68.0
2,banglore,30.0,
3,chennai,43.0,
4,guntur,,75.0


In [8]:
#inner join (default)
df = pd.merge(temperature_df, humidity_df, on='city', how='inner')
df

Unnamed: 0,city,temperature,humidity
0,mumbai,32,65
1,delhi,45,68


In [9]:
#left join
df = pd.merge(temperature_df, humidity_df, on='city', how='left')
df

Unnamed: 0,city,temperature,humidity
0,mumbai,32,65.0
1,delhi,45,68.0
2,banglore,30,
3,chennai,43,


In [10]:
#right join
df = pd.merge(temperature_df, humidity_df, on='city', how='right')
df

Unnamed: 0,city,temperature,humidity
0,mumbai,32.0,65
1,delhi,45.0,68
2,guntur,,75


In [12]:
#inorder to know the datapoint is from left or right join use indicator argument
df = pd.merge(temperature_df, humidity_df, on='city', how='outer', indicator=True)
df

Unnamed: 0,city,temperature,humidity,_merge
0,mumbai,32.0,65.0,both
1,delhi,45.0,68.0,both
2,banglore,30.0,,left_only
3,chennai,43.0,,left_only
4,guntur,,75.0,right_only


In [15]:
df1 = pd.DataFrame({
    "city": ["mumbai","delhi","banglore", "chennai"],
    "temperature": [32,45,30, 43],
    "humidity": [80, 60, 78, 75]
})
df1

Unnamed: 0,city,humidity,temperature
0,mumbai,80,32
1,delhi,60,45
2,banglore,78,30
3,chennai,75,43


In [16]:
df2 = pd.DataFrame({
    "city": ["delhi","banglore", "guntu"],
    "temperature": [32,45,30],
    "humidity": [80, 60, 78]
})
df2

Unnamed: 0,city,humidity,temperature
0,delhi,80,32
1,banglore,60,45
2,guntu,78,30


In [17]:
df = pd.merge(df1, df2, on="city")
df
#automatically append x and y to columns because both dataframes has same columns

Unnamed: 0,city,humidity_x,temperature_x,humidity_y,temperature_y
0,delhi,60,45,80,32
1,banglore,78,30,60,45


In [18]:
#if you want explicitly mention suffixes then 
df = pd.merge(df1, df2, on="city", suffixes=('_left', '_right'))
df

Unnamed: 0,city,humidity_left,temperature_left,humidity_right,temperature_right
0,delhi,60,45,80,32
1,banglore,78,30,60,45
