In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [30]:
# Loading a dataset

tips = sns.load_dataset('tips')
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [31]:
# Creating two dataframes by grouping

tips_bill = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
tips_tip = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()

In [32]:
tips_bill

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Yes,1337.07,183.07
Male,No,1919.75,302.0
Female,Yes,593.27,96.74
Female,No,977.68,149.77


In [33]:
# then we make two dataframes to train some merging

del tips_bill['tip']

In [34]:
tips_bill

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill
sex,smoker,Unnamed: 2_level_1
Male,Yes,1337.07
Male,No,1919.75
Female,Yes,593.27
Female,No,977.68


In [35]:
del tips_tip['total_bill']
tips_tip

Unnamed: 0_level_0,Unnamed: 1_level_0,tip
sex,smoker,Unnamed: 2_level_1
Male,Yes,183.07
Male,No,302.0
Female,Yes,96.74
Female,No,149.77


In [21]:
# merging on the indexes
pd.merge(tips_bill, tips_tip, right_index=True, left_index=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Yes,1337.07,183.07
Male,No,1919.75,302.0
Female,Yes,593.27,96.74
Female,No,977.68,149.77


In [36]:
# reseting indexes and then merge on the columns
pd.merge(
    tips_bill.reset_index(), 
    tips_tip.reset_index(),
    on=['sex', 'smoker']
)

Unnamed: 0,sex,smoker,total_bill,tip
0,Male,Yes,1337.07,183.07
1,Male,No,1919.75,302.0
2,Female,Yes,593.27,96.74
3,Female,No,977.68,149.77


In [37]:
# Also like this, but be carefull!

pd.merge(
    tips_bill.reset_index(), 
    tips_tip.reset_index()
)

Unnamed: 0,sex,smoker,total_bill,tip
0,Male,Yes,1337.07,183.07
1,Male,No,1919.75,302.0
2,Female,Yes,593.27,96.74
3,Female,No,977.68,149.77


In [38]:
# partial column and index
pd.merge(
    tips_bill.reset_index(), 
    tips_tip,
    left_on=['sex', 'smoker'],
    right_index=True
)

Unnamed: 0,sex,smoker,total_bill,tip
0,Male,Yes,1337.07,183.07
1,Male,No,1919.75,302.0
2,Female,Yes,593.27,96.74
3,Female,No,977.68,149.77


In [42]:
# combinations
tips_bill_s = tips_bill.reset_index(level=1)
tips_bill_s

Unnamed: 0_level_0,smoker,total_bill
sex,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,Yes,1337.07
Male,No,1919.75
Female,Yes,593.27
Female,No,977.68


In [44]:
pd.merge(
    tips_tip.reset_index(), 
    tips_bill_s,
    on=['sex', 'smoker']
)

Unnamed: 0,sex,smoker,tip,total_bill
0,Male,Yes,183.07,1337.07
1,Male,No,302.0,1919.75
2,Female,Yes,96.74,593.27
3,Female,No,149.77,977.68


In [46]:
# left join
pd.merge(
    tips_bill.reset_index(), 
    tips_tip.reset_index().head(2),
    how='left'
)

Unnamed: 0,sex,smoker,total_bill,tip
0,Male,Yes,1337.07,183.07
1,Male,No,1919.75,302.0
2,Female,Yes,593.27,
3,Female,No,977.68,


In [47]:
# inner join
pd.merge(
    tips_bill.reset_index(), 
    tips_tip.reset_index().head(2),
    how='inner'
)

Unnamed: 0,sex,smoker,total_bill,tip
0,Male,Yes,1337.07,183.07
1,Male,No,1919.75,302.0


In [48]:
# outer join + indicator
pd.merge(
    tips_bill.reset_index().tail(3), 
    tips_tip.reset_index().head(3),
    how='outer',
    indicator=True
)

Unnamed: 0,sex,smoker,total_bill,tip,_merge
0,Male,No,1919.75,302.0,both
1,Female,Yes,593.27,96.74,both
2,Female,No,977.68,,left_only
3,Male,Yes,,183.07,right_only


In [49]:
# columns with the same name
pd.merge(tips_bill, 
         tips_bill, 
         right_index=True, 
         left_index=True,
         suffixes=('_left', '_right')
)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill_left,total_bill_right
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Yes,1337.07,1337.07
Male,No,1919.75,1919.75
Female,Yes,593.27,593.27
Female,No,977.68,977.68


In [50]:
# row wise
pd.concat([tips_bill, tips_bill, tips_tip], sort=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Yes,1337.07,
Male,No,1919.75,
Female,Yes,593.27,
Female,No,977.68,
Male,Yes,1337.07,
Male,No,1919.75,
Female,Yes,593.27,
Female,No,977.68,
Male,Yes,,183.07
Male,No,,302.0


In [51]:
# column wise
pd.concat([tips_bill, tips_tip], axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Yes,1337.07,183.07
Male,No,1919.75,302.0
Female,Yes,593.27,96.74
Female,No,977.68,149.77


In [52]:
# adding keys
pd.concat([tips_bill, tips_tip], sort=False, keys=['num0', 'num1'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total_bill,tip
Unnamed: 0_level_1,sex,smoker,Unnamed: 3_level_1,Unnamed: 4_level_1
num0,Male,Yes,1337.07,
num0,Male,No,1919.75,
num0,Female,Yes,593.27,
num0,Female,No,977.68,
num1,Male,Yes,,183.07
num1,Male,No,,302.0
num1,Female,Yes,,96.74
num1,Female,No,,149.77
