## Find Conflicting Values Across Datasets

**Description**: You have two datasets: `crm_customers.csv` and `erp_customers.csv` . Find customers with conflicting "email" information.

In [5]:
import pandas as pd

def find_conflicting_emails_from_dataframes(crm_df, erp_df):
    try:
        # Step 1: Validation
        required_columns = {'customer_id', 'email'}
        if not required_columns.issubset(crm_df.columns):
            raise ValueError("Missing required columns in CRM dataset")
        if not required_columns.issubset(erp_df.columns):
            raise ValueError("Missing required columns in ERP dataset")

        if crm_df.empty or erp_df.empty:
            raise ValueError("One or both datasets are empty")

        # Step 2: Ensure customer_id is a string
        crm_df['customer_id'] = crm_df['customer_id'].astype(str)
        erp_df['customer_id'] = erp_df['customer_id'].astype(str)

        # Step 3: Merge
        merged_df = pd.merge(crm_df, erp_df, on='customer_id', suffixes=('_crm', '_erp'))

        # Step 4: Normalize and compare emails
        merged_df['email_crm'] = merged_df['email_crm'].fillna('').str.strip().str.lower()
        merged_df['email_erp'] = merged_df['email_erp'].fillna('').str.strip().str.lower()

        conflicts = merged_df[merged_df['email_crm'] != merged_df['email_erp']]

        if conflicts.empty:
            print("✅ No conflicting emails found.")
        else:
            print("🔍 Conflicting Email Records:")
            print(conflicts[['customer_id', 'email_crm', 'email_erp']])

    except ValueError as ve:
        print(f"⚠️ Validation error: {ve}")
    except Exception as e:
        print(f"🚨 Unexpected error: {str(e)}")


# ✅ Simulated CRM and ERP datasets
crm_data = {
    'customer_id': [1, 2, 3, 4],
    'email': ['alice@example.com', 'bob@example.com', 'charlie@example.com', 'david@example.com']
}
erp_data = {
    'customer_id': [1, 2, 3, 4],
    'email': ['alice@example.com', 'bobby@example.com', 'charlie@example.com', 'david@example.com']
}

crm_df = pd.DataFrame(crm_data)
erp_df = pd.DataFrame(erp_data)

# Run the function with simulated data
find_conflicting_emails_from_dataframes(crm_df, erp_df)

🔍 Conflicting Email Records:
  customer_id        email_crm          email_erp
1           2  bob@example.com  bobby@example.com
