# Merge and Join

In [1]:
import pandas as pd

In [2]:
# Creating DataFrames with sales data
sales_jan = pd.DataFrame({
    'Product': ['Apple', 'Bananas', 'Cherries'],
    'Sales': [150, 200, 90],
}, index=['a', 'b', 'c'])

sales_feb = pd.DataFrame({
    'Product': ['Apple', 'Bananas', 'Cherries'],
    'Sales': [180, 210, 120],
}, index=['d', 'e', 'f'])

sales_mar = pd.DataFrame({
    'Product': ['Apple', 'Bananas', 'Cherries'],
    'Sales': [170, 220, 110],
}, index=['g', 'h', 'i'])


In [3]:
sales_jan

Unnamed: 0,Product,Sales
a,Apple,150
b,Bananas,200
c,Cherries,90


In [4]:
sales_feb

Unnamed: 0,Product,Sales
d,Apple,180
e,Bananas,210
f,Cherries,120


In [5]:
sales_mar

Unnamed: 0,Product,Sales
g,Apple,170
h,Bananas,220
i,Cherries,110


## Concatenate row wise

In [6]:
pd.concat([sales_jan, sales_feb, sales_mar])

Unnamed: 0,Product,Sales
a,Apple,150
b,Bananas,200
c,Cherries,90
d,Apple,180
e,Bananas,210
f,Cherries,120
g,Apple,170
h,Bananas,220
i,Cherries,110


## Concatenate column wise

In [7]:
pd.concat([sales_jan, sales_feb, sales_mar], axis=1)

Unnamed: 0,Product,Sales,Product.1,Sales.1,Product.2,Sales.2
a,Apple,150.0,,,,
b,Bananas,200.0,,,,
c,Cherries,90.0,,,,
d,,,Apple,180.0,,
e,,,Bananas,210.0,,
f,,,Cherries,120.0,,
g,,,,,Apple,170.0
h,,,,,Bananas,220.0
i,,,,,Cherries,110.0


## Merging or join. Combine 2 data frames by matching rows based on one or more columns

In [8]:
# Creating customer data
customers = pd.DataFrame({
    'CustomerID': [101, 102, 103, 104],
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana']
})

# Creating purchase data
purchases = pd.DataFrame({
    'CustomerID': [104, 101, 103],
    'PurchaseAmount': [250, 300, 150]
})

In [9]:
customers

Unnamed: 0,CustomerID,Name
0,101,Alice
1,102,Bob
2,103,Charlie
3,104,Diana


In [10]:
purchases

Unnamed: 0,CustomerID,PurchaseAmount
0,104,250
1,101,300
2,103,150


In [12]:
# merge 2 dataframes using a common column
pd.merge(customers, purchases,on='CustomerID')

Unnamed: 0,CustomerID,Name,PurchaseAmount
0,101,Alice,300
1,103,Charlie,150
2,104,Diana,250


In [13]:
# merge 2 dataframes using a common column. inner join
pd.merge(customers, purchases, how='inner', on='CustomerID')

Unnamed: 0,CustomerID,Name,PurchaseAmount
0,101,Alice,300
1,103,Charlie,150
2,104,Diana,250


In [14]:
# merge 2 dataframes using a common column. outer join
pd.merge(customers, purchases, how='outer', on='CustomerID')

Unnamed: 0,CustomerID,Name,PurchaseAmount
0,101,Alice,300.0
1,102,Bob,
2,103,Charlie,150.0
3,104,Diana,250.0


In [15]:
# merge 2 dataframes using a common column. left join
pd.merge(customers, purchases, how='left', on='CustomerID')

Unnamed: 0,CustomerID,Name,PurchaseAmount
0,101,Alice,300.0
1,102,Bob,
2,103,Charlie,150.0
3,104,Diana,250.0


In [16]:
# merge 2 dataframes using a common column. right join
pd.merge(customers, purchases, how='right', on='CustomerID')

Unnamed: 0,CustomerID,Name,PurchaseAmount
0,104,Diana,250
1,101,Alice,300
2,103,Charlie,150


In [18]:
# Creating sales info  data
sales_info = pd.DataFrame({
    'StoreID': ['S1', 'S2', 'S3', 'S4'],
    'Product': ['Apples', 'Bananas', 'Cherries', 'Corn'],
    'Sales': [120, 200, 90, 150],
})

# Creating store info data
store_info = pd.DataFrame({
    'StoreID': ['S1', 'S2', 'S3', 'S4'],
    'Product': ['Apples', 'Bananas', 'Cherries', 'Dates'],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston'],
    'Manager': ['John', 'Anna', 'Peter', 'Mike']
})

In [19]:
sales_info

Unnamed: 0,StoreID,Product,Sales
0,S1,Apples,120
1,S2,Bananas,200
2,S3,Cherries,90
3,S4,Corn,150


In [20]:
store_info

Unnamed: 0,StoreID,Product,City,Manager
0,S1,Apples,New York,John
1,S2,Bananas,Los Angeles,Anna
2,S3,Cherries,Chicago,Peter
3,S4,Dates,Houston,Mike


In [22]:
pd.merge(sales_info, store_info, on=['StoreID', 'Product'])

Unnamed: 0,StoreID,Product,Sales,City,Manager
0,S1,Apples,120,New York,John
1,S2,Bananas,200,Los Angeles,Anna
2,S3,Cherries,90,Chicago,Peter


In [26]:
# Creating sales info  data
sales_data = pd.DataFrame({
    'StoreID': ['S1', 'S2', 'S3'],
    'Sales': [300, 450, 150]
}, index=['NY', 'LA', 'CH'])

# Creating store info data
revenue_data = pd.DataFrame({
    'Revenue': [1200, 1500, 700]
}, index=['NY', 'LA', 'SF'])

In [27]:
sales_data

Unnamed: 0,StoreID,Sales
NY,S1,300
LA,S2,450
CH,S3,150


In [28]:
revenue_data

Unnamed: 0,Revenue
NY,1200
LA,1500
SF,700


In [29]:
sales_data.join(revenue_data)

Unnamed: 0,StoreID,Sales,Revenue
NY,S1,300,1200.0
LA,S2,450,1500.0
CH,S3,150,


In [30]:
sales_data.join(revenue_data, how='inner')

Unnamed: 0,StoreID,Sales,Revenue
NY,S1,300,1200
LA,S2,450,1500


In [31]:
sales_data.join(revenue_data, how='outer')

Unnamed: 0,StoreID,Sales,Revenue
CH,S3,150.0,
LA,S2,450.0,1500.0
NY,S1,300.0,1200.0
SF,,,700.0
