## Find Conflicting Values Across Datasets

**Description**: You have two datasets: `crm_customers.csv` and `erp_customers.csv` . Find customers with conflicting "email" information.

In [3]:
import pandas as pd

def find_conflicting_emails(crm_path, erp_path):
    try:
        # Step 1: Read the CSV files
        crm_df = pd.read_csv(crm_path)
        erp_df = pd.read_csv(erp_path)

        # Step 2: Basic validation checks
        required_columns = {'customer_id', 'email'}
        if not required_columns.issubset(crm_df.columns):
            raise ValueError("Missing required columns in CRM dataset")
        if not required_columns.issubset(erp_df.columns):
            raise ValueError("Missing required columns in ERP dataset")

        # Step 3: Check for empty dataframes
        if crm_df.empty or erp_df.empty:
            raise ValueError("One or both datasets are empty")

        # Step 4: Ensure customer_id is same dtype in both
        crm_df['customer_id'] = crm_df['customer_id'].astype(str)
        erp_df['customer_id'] = erp_df['customer_id'].astype(str)

        # Step 5: Merge datasets on customer_id
        merged_df = pd.merge(crm_df, erp_df, on='customer_id', suffixes=('_crm', '_erp'))

        # Step 6: Identify conflicts in emails
        merged_df['email_crm'] = merged_df['email_crm'].fillna('').str.strip().str.lower()
        merged_df['email_erp'] = merged_df['email_erp'].fillna('').str.strip().str.lower()
        conflicts = merged_df[merged_df['email_crm'] != merged_df['email_erp']]

        if conflicts.empty:
            print("✅ No conflicting emails found.")
        else:
            print("🔍 Conflicting Email Records:")
            print(conflicts[['customer_id', 'email_crm', 'email_erp']])

    except FileNotFoundError as e:
        print(f"❌ File not found: {e.filename}")
    except ValueError as ve:
        print(f"⚠️ Validation error: {ve}")
    except Exception as e:
        print(f"🚨 Unexpected error: {str(e)}")

# Example usage with file paths (replace with your actual file paths)
find_conflicting_emails('crm_customers.csv', 'erp_customers.csv')

❌ File not found: crm_customers.csv


In [2]:
# Write your code from here
import pandas as pd

# Step 1: Simulate CRM data
crm_data = {
    'customer_id': [1, 2, 3, 4],
    'name': ['Alice', 'Bob', 'Charlie', 'David'],
    'email': ['alice@example.com', 'bob@example.com', 'charlie@example.com', 'david@example.com']
}
crm_df = pd.DataFrame(crm_data)

# Step 2: Simulate ERP data with one conflicting email
erp_data = {
    'customer_id': [1, 2, 3, 4],
    'name': ['Alice', 'Bob', 'Charlie', 'David'],
    'email': ['alice@example.com', 'bobby@example.com', 'charlie@example.com', 'david@example.com']
}
erp_df = pd.DataFrame(erp_data)

# Step 3: Merge the two datasets on customer_id
merged_df = pd.merge(crm_df, erp_df, on='customer_id', suffixes=('_crm', '_erp'))

# Step 4: Find conflicting email addresses
conflicting_emails = merged_df[merged_df['email_crm'] != merged_df['email_erp']]

# Step 5: Display result
print("🔍 Customers with Conflicting Email Addresses:")
print(conflicting_emails[['customer_id', 'name_crm', 'email_crm', 'email_erp']])

🔍 Customers with Conflicting Email Addresses:
   customer_id name_crm        email_crm          email_erp
1            2      Bob  bob@example.com  bobby@example.com
