# High-Confidence Contacts
This notebook surfaces contacts with the strongest confidence scores so you can review the best outreach candidates first.

In [2]:
from pathlib import Path
import pandas as pd
import yaml
from IPython.display import display

search_root = Path.cwd()
config_path = None
for candidate in [search_root, *search_root.parents]:
    maybe = candidate / 'config.yaml'
    if maybe.exists():
        config_path = maybe
        project_root = candidate
        break
if config_path is None:
    raise FileNotFoundError("Unable to locate config.yaml from current working directory.")

cfg = yaml.safe_load(config_path.read_text()) or {}
outputs_dir = (project_root / cfg.get('outputs', {}).get('dir', 'output')).resolve()
if not outputs_dir.exists():
    raise FileNotFoundError(f"Outputs directory not found: {outputs_dir}")

contacts_path = outputs_dir / 'consolidated_contacts.csv'
confidence_path = outputs_dir / 'confidence_report.csv'
referral_path = outputs_dir / 'referral_targets.csv'
tagged_path = outputs_dir / 'tagged_contacts.csv'

confidence_df = pd.read_csv(confidence_path, dtype=str, keep_default_na=False)
contacts_df = pd.read_csv(contacts_path, dtype=str, keep_default_na=False)

high_confidence = confidence_df[confidence_df['confidence_bucket'].isin(['very_high', 'high'])].copy()
high_confidence['confidence_score'] = pd.to_numeric(high_confidence['confidence_score'], errors='coerce')

contact_columns = [
    'contact_id',
    'full_name',
    'company',
    'title',
    'source_count',
    'source_row_count',
    'emails',
    'invalid_emails',
    'phones',
    'non_standard_phones',
]
contacts_subset = contacts_df[[col for col in contact_columns if col in contacts_df.columns]].copy()
summary = high_confidence.merge(contacts_subset, on='contact_id', how='left', suffixes=('', '_contact'))
ordered_columns = [
    'contact_id',
    'full_name',
    'company',
    'title',
    'confidence_score',
    'confidence_bucket',
    'source_count',
    'source_row_count',
    'emails',
    'invalid_emails',
    'phones',
    'non_standard_phones',
]
summary = summary[[col for col in ordered_columns if col in summary.columns]]
sort_columns = [col for col in ['confidence_score', 'source_count'] if col in summary.columns]
if sort_columns:
    summary = summary.sort_values(sort_columns, ascending=[False] * len(sort_columns))
display(summary.head(25))


Unnamed: 0,contact_id,full_name,company,title,confidence_score,confidence_bucket,source_count,source_row_count,emails,invalid_emails,phones,non_standard_phones
53,9aa1f347-1949-50e9-a828-ecc59d1e5669,Ben Bertka,LogicMonitor,"Global Success Manager, GSI & MSP Partner Alli...",100,very_high,3,4,bbertka@gmail.com::|ben@tetrate.io::,,+12062148602::other,
54,c49f1725-1bcb-50aa-974f-98ed0b8a0324,Chris Smith,Red Hat,Partner Alliance Manager,100,very_high,3,4,chris.smith@tetrate.io::|smith.chris7@pm.me::,,+19194142423::work,
57,7086ffff-6ed5-5326-b865-a42021484623,Sam Herren,SUSE,Lead for ISV Ecosystem & Channel Solutions - N...,100,very_high,3,4,herren.sam@gmail.com::|sam.herren@suse.com::|s...,,+14046414700::mobile,
59,2c4e9e43-1ee4-56a1-8066-8ab1621473ce,Prasad Radhakrishnan,Yugabyte,VP Field Engineering,100,very_high,3,4,prasad.may@gmail.com::|prasad@tetrate.io::,,+13125453727::mobile,
60,0a226778-976c-574a-95ee-b405fafc0c15,James Williams,Elastic,Senior Principal Solutions Architect,100,very_high,3,4,cloudnativeguy@gmail.com::|james@tetrate.io::,,+16782830633::mobile,
61,b46bcbbe-ff5f-5995-903e-70b90bf8097c,Sean McAdam,Core Scientific,Full Stack Engineer,100,very_high,3,4,mcadams@tessco.com::|sean@fredcom.com::,,+13013254615::|+13015914113::work,
63,a75908fa-8f35-51e2-8005-90a676db4367,Adam Zwickey,GitLab,Principal Solutions Architect,100,very_high,3,4,adam@tetrate.io::|azwickey@gmail.com::,,+17042584033::mobile,
64,253e7360-8439-5190-9f0e-67363dec32dd,Nick White,Arelion,Senior IP Core Engineer,100,very_high,3,4,nick.white@edgeiq.io::|nickdwhite@gmail.com::,,+17752304642::mobile,
65,9b62ad84-db1f-54b4-bdc8-6e5c92505bd4,Maurizio Carioli,Red Hat,Principal Solution Architect,100,very_high,3,4,maurizio.carioli@gmail.com::|mcarioli@redhat.c...,,+16178037028::mobile,
69,2310325d-6582-5511-8a6f-077df3ea13e7,Gennadiy Mark Borisov,"Klika Tech, Inc",President & Co-CEO,100,very_high,3,4,gborisov@klika-tech.com::,,+12128428187::work|+19177703133::mobile,
