# Explanation of joins
This notebook demonstrates the different types of joins in Pandas (SQL).

In [3]:
import pandas as pd
import numpy as np

In [35]:
# Let's make two tables first
np.random.seed(10)
# Table 1
keys1 = ['A', 'B', 'C', 'D', 'E']
values1 = np.random.randint(0, 10, size=(5))
df1 = pd.DataFrame({'key': keys1, 'value_1': values1})

# Table 2
keys2 = ['A', 'B', 'F', 'G', 'H']
values2 = np.random.randint(0, 10, size=(5))
df2 = pd.DataFrame({'key': keys2, 'value_2': values2})

# Let's see what they look like
print(df1) 
print(df2)


  key  value_1
0   A        9
1   B        4
2   C        0
3   D        1
4   E        9
  key  value_2
0   A        0
1   B        1
2   F        8
3   G        9
4   H        0


## 1. Inner Join
It returns all rows from both tables where the join condition is met. If there is no match, the row is not returned. 

In [36]:
# INNER JOIN
df_inner = pd.merge(df1, df2, on='key', how='inner')
print(df_inner)

  key  value_1  value_2
0   A        9        0
1   B        4        1


## 2. Left Join
It returns all rows from the left table and the matching rows from the right table. If there is no match, the right side will contain null.

In [37]:
# LEFT JOIN
df_left = pd.merge(df1, df2, on='key', how='left')
print(df_left)

  key  value_1  value_2
0   A        9      0.0
1   B        4      1.0
2   C        0      NaN
3   D        1      NaN
4   E        9      NaN


## 3. Right Join
It returns all rows from the right table and the matching rows from the left table. If there is no match, the left side will contain null.

In [38]:
# RIGHT JOIN
df_right = pd.merge(df1, df2, on='key', how='right')
print(df_right)

  key  value_1  value_2
0   A      9.0        0
1   B      4.0        1
2   F      NaN        8
3   G      NaN        9
4   H      NaN        0


## 4. Full Outer Join
It returns all rows from both tables. If there is no match, the missing side will contain null.

In [39]:
# OUTER JOIN
df_outer = pd.merge(df1, df2, on='key', how='outer')
print(df_outer)

  key  value_1  value_2
0   A      9.0      0.0
1   B      4.0      1.0
2   C      0.0      NaN
3   D      1.0      NaN
4   E      9.0      NaN
5   F      NaN      8.0
6   G      NaN      9.0
7   H      NaN      0.0


## 5. Anti Join
It returns all rows from the left table where there is no match in the right table.

In [40]:
# ANTI JOIN
df_anti = df1[~df1['key'].isin(df2['key'])]
print(df_anti)

  key  value_1
2   C        0
3   D        1
4   E        9
