## Ensuring Consistency in Multi-source Data Integration

**Description**: Validate the integration of two datasets `products_A.csv` and `products_B.csv` . Ensure consistency in product "category" information.

In [1]:
import pandas as pd

def validate_category_consistency(products_A_df, products_B_df):
    try:
        # Step 1: Check if 'product_id' and 'category' columns exist in both datasets
        if 'product_id' not in products_A_df.columns or 'category' not in products_A_df.columns:
            raise ValueError("'product_id' or 'category' column missing in products_A")
        if 'product_id' not in products_B_df.columns or 'category' not in products_B_df.columns:
            raise ValueError("'product_id' or 'category' column missing in products_B")

        # Step 2: Merge datasets on 'product_id'
        merged_df = pd.merge(products_A_df[['product_id', 'category']], 
                             products_B_df[['product_id', 'category']], 
                             on='product_id', 
                             suffixes=('_A', '_B'), 
                             how='inner')

        # Step 3: Find inconsistencies in 'category' between the two datasets
        inconsistent_categories = merged_df[merged_df['category_A'] != merged_df['category_B']]

        if inconsistent_categories.empty:
            print("✅ No inconsistencies found in category information.")
        else:
            print("⚠️ Inconsistencies found in category information:")
            print(inconsistent_categories[['product_id', 'category_A', 'category_B']])

    except ValueError as ve:
        print(f"⚠️ Validation error: {ve}")
    except Exception as e:
        print(f"🚨 Unexpected error: {str(e)}")


# Step 4: Simulate the data for products_A and products_B
products_A_data = {
    'product_id': [101, 102, 103, 104],
    'category': ['Electronics', 'Clothing', 'Home Appliances', 'Electronics']
}

products_B_data = {
    'product_id': [101, 102, 103, 104],
    'category': ['Electronics', 'Fashion', 'Home Appliances', 'Electronics']  # Note the inconsistency for product_id 102
}

# Step 5: Create DataFrames
products_A_df = pd.DataFrame(products_A_data)
products_B_df = pd.DataFrame(products_B_data)

# Step 6: Call the function to check for inconsistencies
validate_category_consistency(products_A_df, products_B_df)

⚠️ Inconsistencies found in category information:
   product_id category_A category_B
1         102   Clothing    Fashion
