# High-Confidence Contacts
This notebook surfaces contacts with the strongest confidence scores so you can review the best outreach candidates first.

In [None]:
from pathlib import Path
import pandas as pd
import yaml
from IPython.display import display

search_root = Path.cwd()
config_path = None
for candidate in [search_root, *search_root.parents]:
    maybe = candidate / 'config.yaml'
    if maybe.exists():
        config_path = maybe
        project_root = candidate
        break
if config_path is None:
    raise FileNotFoundError("Unable to locate config.yaml from current working directory.")

cfg = yaml.safe_load(config_path.read_text()) or {}
outputs_dir = (project_root / cfg.get('outputs', {}).get('dir', 'output')).resolve()
if not outputs_dir.exists():
    raise FileNotFoundError(f"Outputs directory not found: {outputs_dir}")

contacts_path = outputs_dir / 'consolidated_contacts.csv'
confidence_path = outputs_dir / 'confidence_report.csv'
referral_path = outputs_dir / 'referral_targets.csv'
tagged_path = outputs_dir / 'tagged_contacts.csv'

confidence_df = pd.read_csv(confidence_path, dtype=str, keep_default_na=False)
contacts_df = pd.read_csv(contacts_path, dtype=str, keep_default_na=False)

high_confidence = confidence_df[confidence_df['confidence_bucket'].isin(['very_high', 'high'])].copy()
high_confidence['confidence_score'] = pd.to_numeric(high_confidence['confidence_score'], errors='coerce')

contact_columns = [
    'contact_id',
    'full_name',
    'company',
    'title',
    'source_count',
    'source_row_count',
    'emails',
    'invalid_emails',
    'phones',
    'non_standard_phones',
]
contacts_subset = contacts_df[[col for col in contact_columns if col in contacts_df.columns]].copy()
summary = high_confidence.merge(contacts_subset, on='contact_id', how='left', suffixes=('', '_contact'))
ordered_columns = [
    'contact_id',
    'full_name',
    'company',
    'title',
    'confidence_score',
    'confidence_bucket',
    'source_count',
    'source_row_count',
    'emails',
    'invalid_emails',
    'phones',
    'non_standard_phones',
]
summary = summary[[col for col in ordered_columns if col in summary.columns]]
sort_columns = [col for col in ['confidence_score', 'source_count'] if col in summary.columns]
if sort_columns:
    summary = summary.sort_values(sort_columns, ascending=[False] * len(sort_columns))
display(summary.head(25))
