In [None]:
import os
import re
import datetime
import pandas as pd
import requests
import time
import warnings
from urllib.parse import urlparse

warnings.filterwarnings("ignore")

# === Env Keys ===
SERPER_API_KEY = "   "#Enter SERPER API KEY

# === Search Function ===
def search_google(query):
    """Search function using SerperDev API with better targeting"""
    headers = {
        'X-API-KEY': SERPER_API_KEY,
        'Content-Type': 'application/json'
    }
    payload = {
        'q': query,
        'gl': 'us',
        'num': 20
    }
    try:
        response = requests.post('https://google.serper.dev/search', json=payload, headers=headers)
        return response.json()
    except Exception as e:
        print(f"Search error: {e}")
        return {}

# === Helper to Parse Entry ===
def parse_entry(entry):
    parts = entry.split("~")
    return {
        "BIN": parts[0] if parts[0] != "NULL" else "",
        "PCN": parts[1] if parts[1] != "NULL" else "",
        "GROUP": parts[2] if parts[2] != "NULL" else ""
    }

# === Official Pharmacy Databases and Sources ===
TRUSTED_DOMAINS = [
    'cms.gov',
    'medicare.gov', 
    'medicaid.gov',
    'caremark.com',
    'express-scripts.com',
    'optum.com',
    'humana.com',
    'bcbs.com',
    'anthem.com',
    'aetna.com',
    'uhc.com',
    'primetherapeutics.com',
    'magellanrx.com',
    'esi.com'
]

def lookup_plan_info_targeted(bin_id, pcn, group_id):
    """Targeted search prioritizing BIN as the primary identifier"""
    
    print(f"    Starting BIN-focused lookup...")
    
    # PRIMARY STRATEGY: BIN is the master key - find the plan network first
    if bin_id:
        print(f"    🔑 BIN {bin_id} is PRIMARY - finding the plan network...")
        bin_result = search_bin_primary(bin_id)
        
        if bin_result["Plan Type"] != "Not Found":
            print(f"    ✅ Found plan network: {bin_result['Plan Type']}")
            
            # Now use PCN and GroupID to get MORE SPECIFIC info within this network
            # PCN helps identify the specific benefit design
            # GroupID helps identify the specific coverage tier
            
            refined_result = bin_result.copy()
            
            if pcn:
                print(f"    🔍 Refining with PCN {pcn} for benefit details...")
                pcn_refinement = get_pcn_details_within_network(bin_result, pcn)
                if pcn_refinement and pcn_refinement != bin_result["Plan Type"]:
                    refined_result["Plan Type"] = f"{bin_result['Plan Type']} - {pcn_refinement}"
                    print(f"    ✅ Refined to: {refined_result['Plan Type']}")
            
            if group_id:
                print(f"    🎯 Further refining with GroupID {group_id} for specific coverage...")
                group_refinement = get_group_details_within_plan(refined_result, group_id)
                if group_refinement and group_refinement != refined_result["Plan Type"]:
                    refined_result["Plan Type"] = f"{refined_result['Plan Type']} ({group_refinement})"
                    print(f"    ✅ Final refinement: {refined_result['Plan Type']}")
            
            return refined_result
        else:
            print(f"    ❌ BIN {bin_id} not found in primary databases")
    
    # FALLBACK STRATEGIES (only if BIN not found or not provided)
    print(f"    📋 Using fallback strategies...")
    
    # Try known group patterns first (these might give us clues about the BIN)
    if group_id:
        print(f"    Checking known GroupID patterns for {group_id}...")
        known_group_result = search_known_group_patterns(group_id)
        if known_group_result["Plan Type"] != "Not Found":
            return known_group_result
    
    # Try PCN-based lookup (less reliable without BIN)
    if pcn:
        print(f"    Searching PCN {pcn} (without BIN context)...")
        pcn_result = search_pcn_standalone(pcn)
        if pcn_result["Plan Type"] != "Not Found":
            return pcn_result
    
    # Last resort: try any combination
    return search_any_available_info(bin_id, pcn, group_id)

def search_bin_primary(bin_id):
    """Search BIN as the primary identifier to find the plan network"""
    
    # BIN identifies the Pharmacy Benefit Manager/Network
    # Focus on finding WHICH network this BIN belongs to
    
    primary_queries = [
        f'BIN {bin_id} pharmacy network database',
        f'BIN {bin_id} PBM pharmacy benefit manager',
        f'"{bin_id}" pharmacy benefits network',
        f'BIN {bin_id} formulary site:caremark.com OR site:express-scripts.com OR site:optum.com',
        f'BIN {bin_id} site:cms.gov OR site:medicare.gov OR site:medicaid.gov'
    ]
    
    for query in primary_queries:
        print(f"      Primary BIN search: {query}")
        results = search_google(query)
        
        # Look for the network/PBM name in results
        network_info = identify_network_from_results(results, bin_id)
        if network_info["Plan Type"] != "Not Found":
            print(f"      🎯 Identified network: {network_info['Plan Type']}")
            return network_info
        
        time.sleep(1)
    
    # Try direct BIN lookup in known PBM patterns
    known_network = check_known_bin_patterns(bin_id)
    if known_network["Plan Type"] != "Not Found":
        return known_network
    
    return {"Plan Type": "Not Found", "PDF Link": "Not Found", "Document Date": "Not Found"}

def identify_network_from_results(results, bin_id):
    """Identify the pharmacy network/PBM from search results"""
    
    organic_results = results.get('organic', [])
    
    # Network identification patterns
    network_indicators = {
        'caremark': 'CVS Caremark',
        'cvs': 'CVS Caremark', 
        'express-scripts': 'Express Scripts',
        'esi.com': 'Express Scripts',
        'optum': 'OptumRx',
        'uhc.com': 'OptumRx',
        'humana': 'Humana Pharmacy',
        'anthem': 'Anthem Pharmacy',
        'aetna': 'Aetna Better Health',
        'bcbs': 'Blue Cross Blue Shield',
        'prime': 'Prime Therapeutics',
        'magellan': 'Magellan Rx',
        'medicare.gov': 'Medicare Part D',
        'medicaid': 'Medicaid'
    }
    
    # Check results for network indicators
    for result in organic_results:
        title = result.get('title', '').lower()
        link = result.get('link', '').lower()
        snippet = result.get('snippet', '').lower()
        
        combined_text = title + " " + link + " " + snippet
        
        for indicator, network_name in network_indicators.items():
            if indicator in combined_text:
                # Found the network - get more details
                pdf_link = find_pdf_in_result(result)
                doc_date = extract_date_from_result(result)
                
                return {
                    "Plan Type": network_name,
                    "PDF Link": pdf_link,
                    "Document Date": doc_date
                }
    
    return {"Plan Type": "Not Found", "PDF Link": "Not Found", "Document Date": "Not Found"}

def check_known_bin_patterns(bin_id):
    """Check against known BIN patterns"""
    
    # Known BIN patterns (you can expand this database)
    known_bins = {
        '610144': 'AbsoluteCare Network',
        '004336': 'Express Scripts',
        '016820': 'Tennessee Medicaid (TennCare)',
        '006558': 'IBEW Union Benefits',
        # Add more as you discover them
    }
    
    if bin_id in known_bins:
        return {
            "Plan Type": known_bins[bin_id],
            "PDF Link": "Known Pattern - Search Needed",
            "Document Date": "Not Found"
        }
    
    return {"Plan Type": "Not Found", "PDF Link": "Not Found", "Document Date": "Not Found"}

def get_pcn_details_within_network(network_result, pcn):
    """Get PCN-specific details within the identified network"""
    
    network_name = network_result["Plan Type"]
    
    # PCN usually indicates benefit design or specific plan variant
    pcn_queries = [
        f'"{network_name}" PCN {pcn} benefit design',
        f'PCN {pcn} "{network_name}" formulary',
        f'PCN {pcn} coverage details "{network_name}"'
    ]
    
    for query in pcn_queries:
        print(f"        PCN refinement: {query}")
        results = search_google(query)
        
        # Look for specific benefit design or plan variant
        pcn_detail = extract_pcn_variant_from_results(results, pcn)
        if pcn_detail != "Not Found":
            return pcn_detail
        
        time.sleep(0.5)
    
    return "Not Found"

def get_group_details_within_plan(plan_result, group_id):
    """Get GroupID-specific details within the plan"""
    
    plan_name = plan_result["Plan Type"]
    
    # GroupID usually indicates employer group or specific coverage tier
    group_queries = [
        f'"{plan_name}" Group {group_id} coverage',
        f'Group {group_id} "{plan_name}" benefits',
        f'Group {group_id} employer plan "{plan_name}"'
    ]
    
    for query in group_queries:
        print(f"          Group refinement: {query}")
        results = search_google(query)
        
        # Look for employer-specific or tier-specific details
        group_detail = extract_group_variant_from_results(results, group_id)
        if group_detail != "Not Found":
            return group_detail
        
        time.sleep(0.5)
    
    return "Not Found"

def extract_pcn_variant_from_results(results, pcn):
    """Extract PCN-specific plan variant"""
    
    organic_results = results.get('organic', [])
    
    for result in organic_results:
        title = result.get('title', '')
        snippet = result.get('snippet', '')
        
        # Look for benefit design indicators
        benefit_patterns = [
            r'(Standard|Premium|Basic|Enhanced|Plus|Select|Choice)\s*(Plan|Benefits?|Coverage)',
            r'(Tier\s*\d+)',
            r'(Low|High|Standard)\s*Deductible',
            r'(Commercial|Government|Federal|State)\s*(Plan|Benefits?)'
        ]
        
        combined_text = title + " " + snippet
        
        for pattern in benefit_patterns:
            match = re.search(pattern, combined_text, re.IGNORECASE)
            if match:
                return match.group(0).strip()
    
    return "Not Found"

def extract_group_variant_from_results(results, group_id):
    """Extract GroupID-specific plan details"""
    
    organic_results = results.get('organic', [])
    
    for result in organic_results:
        title = result.get('title', '')
        snippet = result.get('snippet', '')
        
        # Look for employer or group-specific indicators
        group_patterns = [
            r'(Employee|Employer|Union|Government|Federal|State|Municipal)\s*(Plan|Benefits?|Coverage)',
            r'(Active|Retired|Retiree)\s*(Members?|Benefits?)',
            r'(Group\s*\w+)',
            f'({group_id}[A-Z]*\d*)'  # Variations of the group ID
        ]
        
        combined_text = title + " " + snippet
        
        for pattern in group_patterns:
            match = re.search(pattern, combined_text, re.IGNORECASE)
            if match:
                return match.group(0).strip()
    
    return "Not Found"

def search_known_group_patterns(group_id):
    """Search for known group patterns that might indicate specific plans"""
    
    # Handle special known groups
    if group_id.upper() == "TENNCARE":
        return {
            "Plan Type": "TennCare Medicaid",
            "PDF Link": search_for_tenncare_formulary(),
            "Document Date": "Not Found"
        }
    
    if "IBEW" in group_id.upper():
        return {
            "Plan Type": "IBEW Union Health Plan",
            "PDF Link": search_for_ibew_formulary(), 
            "Document Date": "Not Found"
        }
    
    if group_id.upper() == "WKLA":
        return {
            "Plan Type": "Louisiana Workers Compensation",
            "PDF Link": search_for_wkla_formulary(),
            "Document Date": "Not Found"
        }
    
    return {"Plan Type": "Not Found", "PDF Link": "Not Found", "Document Date": "Not Found"}

def search_pcn_standalone(pcn):
    """Search PCN without BIN context (less reliable)"""
    
    queries = [
        f'PCN {pcn} pharmacy formulary',
        f'PCN {pcn} prescription benefits',
        f'PCN {pcn} PBM network'
    ]
    
    for query in queries:
        print(f"      Standalone PCN search: {query}")
        results = search_google(query)
        
        plan_info = extract_plan_from_official_results(results, pcn=pcn)
        if plan_info["Plan Type"] != "Not Found":
            return plan_info
        
        time.sleep(1)
    
    return {"Plan Type": "Not Found", "PDF Link": "Not Found", "Document Date": "Not Found"}

def search_any_available_info(bin_id, pcn, group_id):
    """Last resort search with any available information"""
    
    search_terms = []
    if bin_id:
        search_terms.append(f"BIN {bin_id}")
    if pcn:
        search_terms.append(f"PCN {pcn}")
    if group_id:
        search_terms.append(f"Group {group_id}")
    
    if not search_terms:
        return {"Plan Type": "Not Found", "PDF Link": "Not Found", "Document Date": "Not Found"}
    
    query = " ".join(search_terms) + " pharmacy benefits"
    print(f"      Last resort search: {query}")
    
    results = search_google(query)
    return extract_plan_from_official_results(results, bin_id=bin_id, pcn=pcn, group_id=group_id)

def search_pcn_formularies(pcn, bin_id=None, group_id=None):
    """Search PCN in formulary documents"""
    
    queries = [
        f'PCN {pcn} formulary filetype:pdf',
        f'PCN {pcn} pharmacy benefit plan',
        f'"{pcn}" prescription coverage formulary'
    ]
    
    if bin_id:
        queries.insert(0, f'BIN {bin_id} PCN {pcn} formulary filetype:pdf')
    
    for query in queries:
        print(f"      Trying: {query}")
        results = search_google(query)
        
        plan_info = extract_plan_from_official_results(results, pcn=pcn, bin_id=bin_id)
        if plan_info["Plan Type"] != "Not Found":
            print(f"      Found: {plan_info['Plan Type']}")
            return plan_info
        
        time.sleep(1)
    
    return {"Plan Type": "Not Found", "PDF Link": "Not Found", "Document Date": "Not Found"}

def search_group_plans(group_id, bin_id=None, pcn=None):
    """Search for group-specific plans"""
    
    # Handle known group patterns
    if group_id.upper() == "TENNCARE":
        return {
            "Plan Type": "TennCare Medicaid",
            "PDF Link": search_for_tenncare_formulary(),
            "Document Date": "Not Found"
        }
    
    if "IBEW" in group_id.upper():
        return {
            "Plan Type": "IBEW Union Health Plan", 
            "PDF Link": search_for_ibew_formulary(),
            "Document Date": "Not Found"
        }
    
    if group_id.upper() == "WKLA":
        return {
            "Plan Type": "Louisiana Workers Compensation",
            "PDF Link": search_for_wkla_formulary(),
            "Document Date": "Not Found"
        }
    
    # Generic group search
    queries = [
        f'Group {group_id} pharmacy formulary filetype:pdf',
        f'"{group_id}" prescription benefit plan',
        f'Group {group_id} pharmacy coverage'
    ]
    
    for query in queries:
        print(f"      Trying: {query}")
        results = search_google(query)
        
        plan_info = extract_plan_from_official_results(results, group_id=group_id)
        if plan_info["Plan Type"] != "Not Found":
            print(f"      Found: {plan_info['Plan Type']}")
            return plan_info
        
        time.sleep(1)
    
    return {"Plan Type": "Not Found", "PDF Link": "Not Found", "Document Date": "Not Found"}

def search_pbm_sites(bin_id, pcn, group_id):
    """Search major PBM sites for plan information"""
    
    search_terms = []
    if bin_id:
        search_terms.append(f"BIN {bin_id}")
    if pcn:
        search_terms.append(f"PCN {pcn}")
    if group_id:
        search_terms.append(f"Group {group_id}")
    
    if not search_terms:
        return {"Plan Type": "Not Found", "PDF Link": "Not Found", "Document Date": "Not Found"}
    
    pbm_sites = [
        'caremark.com',
        'express-scripts.com', 
        'optum.com',
        'primetherapeutics.com'
    ]
    
    combined_terms = " ".join(search_terms)
    
    for site in pbm_sites:
        query = f'site:{site} {combined_terms} formulary'
        print(f"      Trying: {query}")
        results = search_google(query)
        
        plan_info = extract_plan_from_official_results(results, bin_id=bin_id, pcn=pcn, group_id=group_id)
        if plan_info["Plan Type"] != "Not Found":
            print(f"      Found: {plan_info['Plan Type']}")
            return plan_info
        
        time.sleep(1)
    
    return {"Plan Type": "Not Found", "PDF Link": "Not Found", "Document Date": "Not Found"}

def extract_plan_from_official_results(results, bin_id=None, pcn=None, group_id=None):
    """Extract plan information from official search results"""
    
    organic_results = results.get('organic', [])
    
    # Prioritize results from trusted domains
    trusted_results = []
    other_results = []
    
    for result in organic_results:
        link = result.get('link', '').lower()
        if any(domain in link for domain in TRUSTED_DOMAINS):
            trusted_results.append(result)
        else:
            other_results.append(result)
    
    # Check trusted results first
    for result_list in [trusted_results, other_results]:
        for result in result_list:
            title = result.get('title', '')
            snippet = result.get('snippet', '')
            link = result.get('link', '')
            
            # Extract plan name from title (most reliable)
            plan_name = extract_plan_name_from_title(title)
            if plan_name != "Not Found":
                pdf_link = link if link.lower().endswith('.pdf') else find_pdf_in_result(result)
                doc_date = extract_date_from_result(result)
                
                return {
                    "Plan Type": plan_name,
                    "PDF Link": pdf_link,
                    "Document Date": doc_date
                }
    
    return {"Plan Type": "Not Found", "PDF Link": "Not Found", "Document Date": "Not Found"}

def extract_plan_name_from_title(title):
    """Extract clean plan names from document titles"""
    
    # Common formulary document patterns
    formulary_patterns = [
        r'^([A-Z][a-zA-Z\s&\-]+)\s+Formulary',
        r'^([A-Z][a-zA-Z\s&\-]+)\s+Preferred\s+Drug\s+List',
        r'^([A-Z][a-zA-Z\s&\-]+)\s+PDL',
        r'([A-Z][a-zA-Z\s&\-]+)\s+Medicaid\s+Formulary',
        r'([A-Z][a-zA-Z\s&\-]+)\s+Medicare\s+Formulary',
    ]
    
    for pattern in formulary_patterns:
        match = re.search(pattern, title, re.IGNORECASE)
        if match:
            plan_name = match.group(1).strip()
            if len(plan_name) > 2 and not any(word in plan_name.lower() for word in ['pdf', 'download', 'file']):
                return plan_name
    
    # PBM-specific patterns
    if 'caremark' in title.lower():
        return "CVS Caremark"
    elif 'express scripts' in title.lower() or 'esi' in title.lower():
        return "Express Scripts"
    elif 'optum' in title.lower():
        return "OptumRx"
    elif 'prime therapeutics' in title.lower():
        return "Prime Therapeutics"
    
    # State Medicaid patterns
    state_medicaid_match = re.search(r'([A-Z][a-z]+)\s+Medicaid', title, re.IGNORECASE)
    if state_medicaid_match:
        return f"{state_medicaid_match.group(1)} Medicaid"
    
    return "Not Found"

def find_pdf_in_result(result):
    """Find PDF link in search result"""
    link = result.get('link', '')
    
    if link.lower().endswith('.pdf'):
        return link
    
    # Check sitelinks for PDFs
    sitelinks = result.get('sitelinks', [])
    for sitelink in sitelinks:
        sitelink_url = sitelink.get('link', '')
        if sitelink_url.lower().endswith('.pdf'):
            return sitelink_url
    
    return link if link else "Not Found"

def extract_date_from_result(result):
    """Extract document date from search result"""
    title = result.get('title', '')
    snippet = result.get('snippet', '')
    combined_text = title + " " + snippet
    
    # Date patterns - more restrictive to avoid false matches
    date_patterns = [
        r'\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(\d{4})\b',
        r'\b(\d{1,2})\s+(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{4})\b',
        r'\b(\d{1,2})/(\d{1,2})/(\d{4})\b',
        r'\b(\d{4})-(\d{1,2})-(\d{1,2})\b'
    ]
    
    for i, pattern in enumerate(date_patterns):
        matches = re.findall(pattern, combined_text, re.IGNORECASE)
        for match in matches:
            try:
                if i == 0:  # Month DD, YYYY
                    month, day, year = match
                    return f"{month} {day}, {year}"
                elif i == 1:  # DD Month YYYY  
                    day, month, year = match
                    return f"{month} {day}, {year}"
                elif i == 2:  # MM/DD/YYYY
                    month, day, year = match
                    if 1 <= int(month) <= 12 and 1 <= int(day) <= 31 and 1990 <= int(year) <= 2030:
                        month_names = ["", "January", "February", "March", "April", "May", "June",
                                      "July", "August", "September", "October", "November", "December"]
                        return f"{month_names[int(month)]} {day}, {year}"
                elif i == 3:  # YYYY-MM-DD
                    year, month, day = match
                    if 1 <= int(month) <= 12 and 1 <= int(day) <= 31 and 1990 <= int(year) <= 2030:
                        month_names = ["", "January", "February", "March", "April", "May", "June",
                                      "July", "August", "September", "October", "November", "December"]
                        return f"{month_names[int(month)]} {day}, {year}"
            except (ValueError, IndexError):
                continue
    
    return "Not Found"

# Helper functions for specific known plans
def search_for_tenncare_formulary():
    """Search for TennCare formulary"""
    query = 'site:tn.gov TennCare formulary filetype:pdf'
    results = search_google(query)
    organic_results = results.get('organic', [])
    if organic_results:
        return organic_results[0].get('link', 'Not Found')
    return "Not Found"

def search_for_ibew_formulary():
    """Search for IBEW formulary"""
    query = 'IBEW prescription drug formulary filetype:pdf'
    results = search_google(query)
    organic_results = results.get('organic', [])
    for result in organic_results:
        if result.get('link', '').lower().endswith('.pdf'):
            return result.get('link', 'Not Found')
    return "Not Found"

def search_for_wkla_formulary():
    """Search for Louisiana Workers Comp formulary"""
    query = 'Louisiana workers compensation prescription drug formulary filetype:pdf'
    results = search_google(query)
    organic_results = results.get('organic', [])
    for result in organic_results:
        if result.get('link', '').lower().endswith('.pdf'):
            return result.get('link', 'Not Found')
    return "Not Found"

def enhance_with_pcn_group(base_result, pcn, group_id):
    """Enhance basic plan info with PCN/Group details"""
    # This would search for more specific information
    # For now, return the base result
    return base_result

def verify_plan_info(plan_info):
    """Verify plan information with a secondary search"""
    if plan_info["Plan Type"] == "Not Found":
        return plan_info
    
    # Perform verification search
    plan_type = plan_info["Plan Type"]
    verification_query = f'"{plan_type}" pharmacy formulary official'
    
    try:
        time.sleep(1)
        verification_results = search_google(verification_query)
        
        # Check if we find official confirmation
        organic_results = verification_results.get('organic', [])
        for result in organic_results:
            link = result.get('link', '').lower()
            title = result.get('title', '').lower()
            
            if (any(domain in link for domain in TRUSTED_DOMAINS) and 
                plan_type.lower() in title):
                # Confirmed - return original info
                return plan_info
        
    except Exception as e:
        print(f"Verification error: {e}")
    
    return plan_info

entries = [
    "610014~NULL~UHEALTH",
    "610141~NULL~NULL",
    "018902~P303018902~NULL",
    "610084~DRMSPROD~NULL",
    "014864~NULL~NULL",
    "610118~NULL~NULL",
    "016820~P086016820~NULL",
    "610144~CPRCASH~NULL",
    "NULL~NULL~RXAETD",
    "610014~NULL~BCTCOMM",
    "016499~HMOPOSNJ~NULL",
    "610480~5529~NULL",
    "610415~FEPRX~NULL",
    "NULL~NULL~ACUNY",
    "NULL~NULL~TXS000000287356",
    "610170~NULL~NULL",
    "610084~DRCOPROD~NULL",
    "610014~NULL~BCBSMAN",
    "601475~NULL~NULL",
    "610455~BHP~NULL",
    "016127~NULL~NULL",
    "003585~38151~NULL",
    "015905~PDPNCG~NULL",
    "610245~NULL~NULL",
    "004336~380~NULL",
    "017010~CIMCAID~NULL",
    "610342~BCAID~NULL",
    "017035~NULL~NULL",
    "610127~GASF~NULL",
    "018141~NULL~NULL",
    "610398~CV1~NULL",
    "011552~ESI011552~NULL",
    "011933~IAPOP~NULL",
    "610830~REALRX~NULL",
    "NULL~NULL~BCBSMRX1",
    "NULL~NULL~BCBSMAN",
    "019595~06280000~NULL",
    "018643~WAGACTIVES~NULL",
    "004336~NULL~SCB15",
    "610239~NULL~65006500",
    "610455~AHPPARTD~NULL",
    "800001~NULL~NULL",
    "610480~NULL~NULL",
    "020123~NULL~NULL",
    "610648~1890000~NULL",
    "600426~CN~NULL",
    "003585~40020~NULL",
    "610097~NULL~PDPIND",
    "600428~07130000~NULL",
    "610014~MIBCNRX~NULL",
    "610455~HORMEL~NULL"
]




# === Main Execution ===
results = []

print("Starting targeted pharmacy plan lookup...")
print("Focusing on official sources and formulary documents...")

for idx, entry in enumerate(entries):
    print(f"\n{'='*60}")
    print(f"Processing entry {idx+1}/{len(entries)}: {entry}")
    parsed = parse_entry(entry)
    
    print(f"  BIN: {parsed['BIN'] or 'NULL'}")
    print(f"  PCN: {parsed['PCN'] or 'NULL'}")
    print(f"  Group ID: {parsed['GROUP'] or 'NULL'}")
    
    # Get plan info using targeted approach
    plan_info = lookup_plan_info_targeted(parsed['BIN'], parsed['PCN'], parsed['GROUP'])
    
    print(f"  Initial Plan Type: {plan_info['Plan Type']}")
    print(f"  Initial PDF Link: {plan_info['PDF Link']}")
    print(f"  Initial Document Date: {plan_info['Document Date']}")
    
    # Verify the information
    print("  Verifying information...")
    verified_info = verify_plan_info(plan_info)
    
    print(f"  Final Plan Type: {verified_info['Plan Type']}")
    print(f"  Final PDF Link: {verified_info['PDF Link']}")
    print(f"  Final Document Date: {verified_info['Document Date']}")
    
    # Store results
    results.append({
        "BIN": parsed['BIN'],
        "PCN": parsed['PCN'], 
        "GroupID": parsed['GROUP'],
        "Plan Type": verified_info['Plan Type'],
        "PDF Link": verified_info['PDF Link'],
        "Document Date": verified_info['Document Date']
    })
    
    # Rate limiting delay
    if idx < len(entries) - 1:
        time.sleep(3)

# === Export to Excel ===
df = pd.DataFrame(results)
filename = f"accurate_plan_results_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
df.to_excel(filename, index=False)

print(f"\n✅ Excel file saved as: {filename}")
print("\n📋 Summary:")
print("- Focused search on official pharmacy databases")
print("- Prioritized trusted domains (CMS, PBMs, State Medicaid)")
print("- Targeted formulary documents and PDFs")
print("- Enhanced date extraction from document metadata")