# Demo 5.1 Combining Dataframes: Left Joins with pandas  

- Data source:  w3schools_Data.xlsx  
- Tables of Data to Combine:  
  - Products  
  - Categories  

In [1]:
import pandas as pd
import plotly.express as px

# 1. Read one of the data sources to combine into a Dataframe 

In [2]:
df_products = pd.read_excel("Data/w3schools_Data.xlsx", "Products", skiprows=0)

print(df_products.shape)
df_products.head()

(77, 6)


Unnamed: 0,Product_ID,Product_Name,SupplierID,CategoryID,ProductDescription,ProductPrice
0,1,Chais,1,1,10 boxes x 20 bags,18.0
1,2,Chang,1,1,24 - 12 oz bottles,19.0
2,3,Aniseed Syrup,1,2,12 - 550 ml bottles,10.0
3,4,Chef Anton's Cajun Seasoning,2,2,48 - 6 oz jars,22.0
4,5,Chef Anton's Gumbo Mix,2,2,36 boxes,21.35


# 2. Read a second data source to combine into a Dataframe  

In [3]:
df_categories = pd.read_excel("Data/w3schools_Data.xlsx", "Categories", skiprows=0)

print(df_categories.shape)
df_categories.head(3)

(8, 3)


Unnamed: 0,CategoryID,CategoryName,Description
0,1,Beverages,"Soft drinks, coffees, teas, beers, and ales"
1,2,Condiments,"Sweet and savory sauces, relishes, spreads, an..."
2,3,Confections,"Desserts, candies, and sweet breads"


# 3. Make Crucial Decisions  

### Decision 1: Choose the Left and Right Tables  

In [4]:
left_table = df_products
right_table = df_categories 

### Decision 2:  Choose the Join Fields     

In [5]:
left_table.head(1)

Unnamed: 0,Product_ID,Product_Name,SupplierID,CategoryID,ProductDescription,ProductPrice
0,1,Chais,1,1,10 boxes x 20 bags,18.0


In [6]:
right_table.head(1)

Unnamed: 0,CategoryID,CategoryName,Description
0,1,Beverages,"Soft drinks, coffees, teas, beers, and ales"


In [7]:
left_table_join_field = 'CategoryID'
right_table_join_field = 'CategoryID'

# 4. Use pandas *merge()* to create a Left Join 

In [8]:
df_joined = pd.merge(left_table,       
                       right_table,     
                       left_on=left_table_join_field,
                       right_on=right_table_join_field,
                       how='left'         # Type of Join:  Left!
                      )

print()
print("5.1 Demo")
print("Left Table:  ", left_table.shape)
print("Right Table: ", right_table.shape)
print("Joined Dataframe: ", df_joined.shape)
print()

df_joined.head(2)


5.1 Demo
Left Table:   (77, 6)
Right Table:  (8, 3)
Joined Dataframe:  (77, 8)



Unnamed: 0,Product_ID,Product_Name,SupplierID,CategoryID,ProductDescription,ProductPrice,CategoryName,Description
0,1,Chais,1,1,10 boxes x 20 bags,18.0,Beverages,"Soft drinks, coffees, teas, beers, and ales"
1,2,Chang,1,1,24 - 12 oz bottles,19.0,Beverages,"Soft drinks, coffees, teas, beers, and ales"


# 5. Check the Left Join: Number of Rows and Columns  
- **Number of Rows** - Should be the same number as the Left Table    
- **Number of Columns** - Should be Left # Cols + Right # Cols - 1  


# Optional:  Save the Joined Dataframe as a CSV file

In [None]:
# csv_file_to_create = "Demo_5.1_Prod_Cat_Join.csv"

# filename_with_path = "Data/" + csv_file_to_create
# df_joined.to_csv(filename_with_path, index=False)