# Invalid Email Insights


In [1]:
from pathlib import Path
import pandas as pd
import yaml
from IPython.display import display

search_root = Path.cwd()
config_path = None
for candidate in [search_root, *search_root.parents]:
    maybe = candidate / 'config.yaml'
    if maybe.exists():
        config_path = maybe
        project_root = candidate
        break
if config_path is None:
    raise FileNotFoundError("Unable to locate config.yaml from current working directory.")

cfg = yaml.safe_load(config_path.read_text()) or {}
outputs_dir = (project_root / cfg.get('outputs', {}).get('dir', 'output')).resolve()
if not outputs_dir.exists():
    raise FileNotFoundError(f"Outputs directory not found: {outputs_dir}")

contacts_path = outputs_dir / 'consolidated_contacts.csv'
confidence_path = outputs_dir / 'confidence_report.csv'
referral_path = outputs_dir / 'referral_targets.csv'
tagged_path = outputs_dir / 'tagged_contacts.csv'

df = pd.read_csv(contacts_path, dtype=str, keep_default_na=False)
if 'invalid_emails' not in df.columns:
    raise ValueError("The consolidated contacts file does not contain an 'invalid_emails' column.")

invalid = df[df['invalid_emails'].str.strip() != ''].copy()
if invalid.empty:
    print('No invalid emails recorded in the current dataset.')
else:
    invalid['invalid_email_list'] = invalid['invalid_emails'].str.split('|')
    exploded = invalid.explode('invalid_email_list')
    exploded['invalid_email_list'] = exploded['invalid_email_list'].fillna('').str.strip()
    exploded = exploded[exploded['invalid_email_list'] != '']
    display_columns = [
        'contact_id',
        'full_name',
        'company',
        'title',
        'invalid_email_list',
        'emails',
    ]
    available = [col for col in display_columns if col in exploded.columns]
    result = exploded[available].sort_values(['contact_id', 'invalid_email_list']).head(100)
    display(result)


No invalid emails recorded in the current dataset.
