# ActiveCampaign Contacts - Read Only

This notebook fetches all contacts from ActiveCampaign API (READ ONLY - no modifications) and loads them into a pandas DataFrame with all available datapoints.

⚠️ **This notebook only reads data. No changes will be made to ActiveCampaign.**


In [None]:
import os
import pandas as pd
import requests
from dotenv import load_dotenv
import json
from typing import List, Dict, Any

# Load environment variables
load_dotenv()

# ActiveCampaign configuration
ACTIVE_CAMPAIGN_API_KEY = os.environ.get('ACTIVE_CAMPAIGN_API_KEY')
API_URL = os.environ.get('API_URL')

headers = {
    "Api-Token": ACTIVE_CAMPAIGN_API_KEY,
    "Content-Type": "application/json",
}

print(f"API URL: {API_URL}")
print(f"API Key configured: {'Yes' if ACTIVE_CAMPAIGN_API_KEY else 'No'}")


API URL: https://ehb.api-us1.com
API Key configured: Yes


In [None]:
import time
from datetime import timedelta

def fetch_all_contacts(api_url: str, headers: Dict[str, str], limit: int = 100) -> List[Dict[str, Any]]:
    """
    Fetch all contacts from ActiveCampaign API with pagination (READ ONLY).
    Includes custom field values (fieldValues) in the request.
    
    Args:
        api_url: Base API URL
        headers: Request headers with API token
        limit: Number of contacts per page (max 100)
    
    Returns:
        List of all contact records with all available datapoints
    """
    all_contacts = []
    offset = 0
    batch_num = 0
    start_time = time.time()
    
    print("=" * 80)
    print("FETCHING CONTACTS FROM ACTIVECAMPAIGN (READ ONLY)")
    print("=" * 80)
    
    while True:
        # Using GET request with fieldValues included - read only, no modifications
        url = f"{api_url}/api/3/contacts?include=fieldValues&limit={limit}&offset={offset}"
        batch_num += 1
        batch_start = time.time()
        
        try:
            response = requests.get(url, headers=headers)  # GET only - no modifications
            response.raise_for_status()
            
            batch_elapsed = time.time() - batch_start
            
            data = response.json()
            contacts = data.get('contacts', [])
            
            if not contacts:
                print("\n✓ No more contacts found. Fetch complete!")
                break
                
            all_contacts.extend(contacts)
            
            # Calculate metrics
            total_elapsed = time.time() - start_time
            contacts_per_second = len(all_contacts) / total_elapsed if total_elapsed > 0 else 0
            
            # Print detailed status
            print(f"\n📦 Batch #{batch_num}")
            print(f"   ├─ Retrieved: {len(contacts)} contacts in {batch_elapsed:.2f}s")
            print(f"   ├─ Offset: {offset}")
            print(f"   ├─ Total so far: {len(all_contacts):,} contacts")
            print(f"   ├─ Speed: {contacts_per_second:.1f} contacts/sec")
            print(f"   └─ Elapsed time: {timedelta(seconds=int(total_elapsed))}")
            
            # If we got fewer contacts than the limit, we've reached the end
            if len(contacts) < limit:
                print("\n✓ Reached end of contacts (last page).")
                break
                
            offset += limit
            
        except requests.exceptions.RequestException as e:
            print(f"\n❌ Error fetching contacts: {e}")
            break
    
    # Final summary
    total_time = time.time() - start_time
    avg_speed = len(all_contacts) / total_time if total_time > 0 else 0
    
    print("\n" + "=" * 80)
    print("FETCH SUMMARY")
    print("=" * 80)
    print(f"✓ Total contacts retrieved: {len(all_contacts):,}")
    print(f"✓ Total batches: {batch_num}")
    print(f"✓ Total time: {timedelta(seconds=int(total_time))} ({total_time:.2f}s)")
    print(f"✓ Average speed: {avg_speed:.1f} contacts/sec")
    print(f"✓ Batch size: {limit}")
    print(f"✓ Includes: Custom field values (fieldValues)")
    print("=" * 80)
    
    return all_contacts

# Fetch all contacts (READ ONLY operation)
contacts = fetch_all_contacts(API_URL, headers, limit=100)


FETCHING CONTACTS FROM ACTIVECAMPAIGN (READ ONLY)

📦 Batch #1
   ├─ Retrieved: 100 contacts in 1.08s
   ├─ Offset: 0
   ├─ Total so far: 100 contacts
   ├─ Speed: 92.1 contacts/sec
   └─ Elapsed time: 0:00:01

📦 Batch #2
   ├─ Retrieved: 100 contacts in 0.96s
   ├─ Offset: 100
   ├─ Total so far: 200 contacts
   ├─ Speed: 97.6 contacts/sec
   └─ Elapsed time: 0:00:02

📦 Batch #3
   ├─ Retrieved: 100 contacts in 0.94s
   ├─ Offset: 200
   ├─ Total so far: 300 contacts
   ├─ Speed: 100.1 contacts/sec
   └─ Elapsed time: 0:00:02

📦 Batch #4
   ├─ Retrieved: 100 contacts in 0.94s
   ├─ Offset: 300
   ├─ Total so far: 400 contacts
   ├─ Speed: 101.5 contacts/sec
   └─ Elapsed time: 0:00:03

📦 Batch #5
   ├─ Retrieved: 100 contacts in 0.93s
   ├─ Offset: 400
   ├─ Total so far: 500 contacts
   ├─ Speed: 102.6 contacts/sec
   └─ Elapsed time: 0:00:04

📦 Batch #6
   ├─ Retrieved: 100 contacts in 0.70s
   ├─ Offset: 500
   ├─ Total so far: 600 contacts
   ├─ Speed: 107.5 contacts/sec
   └─ Elap

In [None]:
# Convert to DataFrame
if contacts:
    df = pd.DataFrame(contacts)
    
    print(f"DataFrame shape: {df.shape}")
    print(f"\nColumns: {list(df.columns)}")
    print(f"\nFirst few rows:")
    display(df.head())
    
    # Show data types
    print(f"\nData types:")
    print(df.dtypes)
    
    # Show basic statistics
    print(f"\nBasic info:")
    print(df.info())
    
else:
    print("No contacts found or error occurred.")


DataFrame shape: (185775, 42)

Columns: ['cdate', 'email', 'phone', 'firstName', 'lastName', 'orgid', 'orgname', 'segmentio_id', 'bounced_hard', 'bounced_soft', 'bounced_date', 'ip', 'ua', 'hash', 'socialdata_lastcheck', 'email_local', 'email_domain', 'sentcnt', 'rating_tstamp', 'gravatar', 'deleted', 'anonymized', 'adate', 'udate', 'edate', 'deleted_at', 'created_utc_timestamp', 'updated_utc_timestamp', 'created_timestamp', 'updated_timestamp', 'created_by', 'updated_by', 'mpp_tracking', 'last_click_date', 'last_open_date', 'last_mpp_open_date', 'best_send_hour', 'scoreValues', 'accountContacts', 'links', 'id', 'organization']

First few rows:


Unnamed: 0,cdate,email,phone,firstName,lastName,orgid,orgname,segmentio_id,bounced_hard,bounced_soft,...,mpp_tracking,last_click_date,last_open_date,last_mpp_open_date,best_send_hour,scoreValues,accountContacts,links,id,organization
0,2024-12-17T09:10:40-06:00,,447971166835.0,Anthony,Kemp,0,,,0,0,...,0,,,,0.0,"[338356, 338387]",[],{'bounceLogs': 'https://ehb.api-us1.com/api/3/...,169196,
1,2022-06-09T12:55:11-05:00,0.hominy.module@icloud.com,,Simon,Little,0,,,0,0,...,1,2023-02-17 16:44:14,2023-02-17 16:44:14,2023-02-17 16:44:26,0.0,"[48566, 50532]",[],{'bounceLogs': 'https://ehb.api-us1.com/api/3/...,24656,
2,2022-06-13T17:26:28-05:00,0.xstriderx.0@gmail.com,,,,0,,,0,0,...,0,,2023-05-18 05:59:06,,0.0,"[188178, 188199]",[],{'bounceLogs': 'https://ehb.api-us1.com/api/3/...,94097,
3,2023-04-08T14:37:32-05:00,00.ogress-shreds@icloud.com,,,,0,,,0,0,...,0,2023-05-02 09:58:48,2023-05-23 04:13:14,2023-05-23 04:13:03,0.0,"[267408, 267509]",[],{'bounceLogs': 'https://ehb.api-us1.com/api/3/...,133722,
4,2024-01-09T21:21:17-06:00,0001dws@gmail.com,,David,Shirley,0,,,0,0,...,0,,,,,"[299054, 299083]",[],{'bounceLogs': 'https://ehb.api-us1.com/api/3/...,149539,



Data types:
cdate                    object
email                    object
phone                    object
firstName                object
lastName                 object
orgid                    object
orgname                  object
segmentio_id             object
bounced_hard             object
bounced_soft             object
bounced_date             object
ip                       object
ua                       object
hash                     object
socialdata_lastcheck     object
email_local              object
email_domain             object
sentcnt                  object
rating_tstamp            object
gravatar                 object
deleted                  object
anonymized               object
adate                    object
udate                    object
edate                    object
deleted_at               object
created_utc_timestamp    object
updated_utc_timestamp    object
created_timestamp        object
updated_timestamp        object
created_by               ob

In [None]:
# Now you can work with the df DataFrame
# It contains all contacts with all available datapoints
# All operations here are local - nothing will be sent to ActiveCampaign

print("\n=== DataFrame is ready for analysis ===")
print(f"Variable 'df' contains {len(df)} contacts")
print(f"All {len(df.columns)} columns/datapoints are available")
print("\nYou can now analyze the data using pandas operations.")



=== DataFrame is ready for analysis ===
Variable 'df' contains 185775 contacts
All 42 columns/datapoints are available

You can now analyze the data using pandas operations.


## Filter: Contacts with Only Email (No Lists)


In [None]:
# Filter contacts: No lists + Only email data
# Uses df_enriched which has all the data we fetched

print("\n" + "=" * 80)
print("FILTERING: Contacts with NO Lists + ONLY Email Data")
print("=" * 80)

# Filter 1: No list subscriptions (using the has_any_lists flag)
df_no_lists = df_enriched[~df_enriched['has_any_lists']].copy()
print(f"✓ Contacts with NO list subscriptions: {len(df_no_lists):,}")

# Filter 2: Only email is filled (all other meaningful fields are empty)
# Define meaningful data fields (excluding technical/system fields)
data_fields = ['firstName', 'lastName', 'phone', 'address1', 'address2', 'city', 
               'state', 'zip', 'country', 'organization']

# Find which fields exist in our DataFrame
existing_data_fields = [f for f in data_fields if f in df_no_lists.columns]
print(f"✓ Checking these data fields: {existing_data_fields}")

# Function to check if only email has data
def has_only_email(row):
    """Check if only email has data, all other fields are empty"""
    for field in existing_data_fields:
        value = row.get(field, None)
        # Check if field has meaningful data (not empty, not null, not just whitespace)
        if pd.notna(value) and str(value).strip() != '':
            return False
    
    # Also check if they have tags, deals, or automations
    if row.get('tag_count', 0) > 0:
        return False
    if row.get('deal_count', 0) > 0:
        return False
    
    return True

# Apply the filter
email_only_contacts = df_no_lists[df_no_lists.apply(has_only_email, axis=1)].copy()

print(f"\n" + "=" * 80)
print("FILTER RESULTS")
print("=" * 80)
print(f"✓ Contacts with NO list subscriptions: {len(df_no_lists):,}")
print(f"✓ Contacts with ONLY email (no other data): {len(email_only_contacts):,}")
print(f"✓ Percentage of total: {(len(email_only_contacts)/len(df_enriched)*100):.2f}%")
print("=" * 80)

# Show sample of results
if not email_only_contacts.empty:
    print(f"\n📋 Sample of email-only contacts (first 20):")
    display_cols = ['id', 'email', 'list_count', 'tag_count', 'deal_count'] + existing_data_fields
    display_cols = [c for c in display_cols if c in email_only_contacts.columns]
    display(email_only_contacts[display_cols].head(20))
    
    # Create a simple list of emails
    email_only_list = email_only_contacts['email'].tolist()
    
    print(f"\n✓ Created 'email_only_contacts' DataFrame with {len(email_only_contacts):,} contacts")
    print(f"✓ Created 'email_only_list' with {len(email_only_list):,} email addresses")
    print(f"\nTo see all emails: print(email_only_list)")
else:
    print("\n⚠️  No contacts found matching criteria (no lists + only email)")


FILTERING CONTACTS: No Lists + Only Email

📊 DataFrame Overview:
Total contacts: 185,775
Total columns: 42

Column names:
  - cdate
  - email
  - phone
  - firstName
  - lastName
  - orgid
  - orgname
  - segmentio_id
  - bounced_hard
  - bounced_soft
  - bounced_date
  - ip
  - ua
  - hash
  - socialdata_lastcheck
  - email_local
  - email_domain
  - sentcnt
  - rating_tstamp
  - gravatar
  - deleted
  - anonymized
  - adate
  - udate
  - edate
  - deleted_at
  - created_utc_timestamp
  - updated_utc_timestamp
  - created_timestamp
  - updated_timestamp
  - created_by
  - updated_by
  - mpp_tracking
  - last_click_date
  - last_open_date
  - last_mpp_open_date
  - best_send_hour
  - scoreValues
  - accountContacts
  - links
  - id
  - organization

📋 List-related columns found: []

✓ Contacts with no list subscriptions: 0

📝 Checking these data fields: ['firstName', 'lastName', 'phone', 'organization']

RESULTS
✓ Contacts with NO list subscriptions: 0
✓ Contacts with ONLY email data: 

## All Columns Analysis


In [None]:
# Show all columns with detailed information
print("=" * 100)
print("ALL COLUMNS IN DATAFRAME")
print("=" * 100)

print(f"\nTotal columns: {len(df.columns)}")
print(f"Total rows: {len(df):,}")

print("\n" + "=" * 100)
print("COLUMN DETAILS")
print("=" * 100)

for i, col in enumerate(df.columns, 1):
    print(f"\n{i:2d}. Column: '{col}'")
    print(f"    Type: {df[col].dtype}")
    print(f"    Non-null count: {df[col].notna().sum():,} / {len(df):,} ({(df[col].notna().sum()/len(df)*100):.1f}%)")
    
    # Show unique values count (handle unhashable types)
    try:
        unique_count = df[col].nunique()
        print(f"    Unique values: {unique_count:,}")
    except TypeError as e:
        print(f"    Unique values: Cannot calculate (contains unhashable types like lists)")
    
    # Show sample values (non-null)
    non_null_values = df[col].dropna()
    if len(non_null_values) > 0:
        print(f"    Sample values:")
        sample_values = non_null_values.head(5).tolist()
        for val in sample_values:
            # Handle different data types
            if isinstance(val, (list, dict)):
                val_str = f"{type(val).__name__}: {str(val)[:50]}..."
            else:
                val_str = str(val)
                if len(val_str) > 50:
                    val_str = val_str[:47] + "..."
            print(f"      - {val_str}")
        
        if len(non_null_values) > 5:
            print(f"      ... and {len(non_null_values) - 5} more")
    else:
        print(f"    All values are null/empty")

print("\n" + "=" * 100)
print("QUICK REFERENCE - COLUMN NAMES")
print("=" * 100)

for i, col in enumerate(df.columns, 1):
    print(f"{i:2d}. {col}")

print(f"\nTotal: {len(df.columns)} columns")


ALL COLUMNS IN DATAFRAME

Total columns: 42
Total rows: 185,775

COLUMN DETAILS

 1. Column: 'cdate'
    Type: object
    Non-null count: 185,775 / 185,775 (100.0%)
    Unique values: 84,152
    Sample values:
      - 2024-12-17T09:10:40-06:00
      - 2022-06-09T12:55:11-05:00
      - 2022-06-13T17:26:28-05:00
      - 2023-04-08T14:37:32-05:00
      - 2024-01-09T21:21:17-06:00
      ... and 185770 more

 2. Column: 'email'
    Type: object
    Non-null count: 185,774 / 185,775 (100.0%)
    Unique values: 185,774
    Sample values:
      - 0.hominy.module@icloud.com
      - 0.xstriderx.0@gmail.com
      - 00.ogress-shreds@icloud.com
      - 0001dws@gmail.com
      - 0002@imagens.cf
      ... and 185769 more

 3. Column: 'phone'
    Type: object
    Non-null count: 185,775 / 185,775 (100.0%)
    Unique values: 43,351
    Sample values:
      - +447971166835
      - 
      - 
      - 
      - 
      ... and 185770 more

 4. Column: 'firstName'
    Type: object
    Non-null count: 185,775 

## Get List Memberships for All Contacts


In [None]:
# Fetch ALL additional data for each contact
# This includes: list memberships, tags, deals, and any other available data

def get_contact_additional_data(contact_id: str, api_url: str, headers: Dict[str, str]) -> Dict[str, Any]:
    """
    Get all additional data for a contact (READ ONLY).
    Returns: dict with lists, tags, deals, etc.
    """
    additional_data = {
        'lists': [],
        'tags': [],
        'deals': [],
        'contactAutomations': []
    }
    
    # 1. Get list memberships
    try:
        url = f"{api_url}/api/3/contacts/{contact_id}/contactLists"
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            data = response.json()
            additional_data['lists'] = data.get('contactLists', [])
    except:
        pass
    
    # 2. Get tags
    try:
        url = f"{api_url}/api/3/contacts/{contact_id}/contactTags"
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            data = response.json()
            additional_data['tags'] = data.get('contactTags', [])
    except:
        pass
    
    # 3. Get deals
    try:
        url = f"{api_url}/api/3/contacts/{contact_id}/deals"
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            data = response.json()
            additional_data['deals'] = data.get('deals', [])
    except:
        pass
    
    # 4. Get contact automations
    try:
        url = f"{api_url}/api/3/contacts/{contact_id}/contactAutomations"
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            data = response.json()
            additional_data['contactAutomations'] = data.get('contactAutomations', [])
    except:
        pass
    
    return additional_data

# Fetch additional data for all contacts
print("\n" + "=" * 80)
print("FETCHING ADDITIONAL DATA FOR ALL CONTACTS (Lists, Tags, Deals, etc.)")
print("=" * 80)
print("⚠️  This may take a while depending on the number of contacts...")

start_time = time.time()
enriched_contacts = []
total_contacts = len(df)

for idx, contact_row in df.iterrows():
    contact_id = contact_row['id']
    
    # Get base contact data
    contact_data = contact_row.to_dict()
    
    # Get additional data
    additional = get_contact_additional_data(contact_id, API_URL, headers)
    
    # Add additional data to contact
    contact_data['list_memberships'] = additional['lists']
    contact_data['tags'] = additional['tags']
    contact_data['deals'] = additional['deals']
    contact_data['automations'] = additional['contactAutomations']
    
    # Calculate derived fields
    contact_data['list_count'] = len(additional['lists'])
    contact_data['tag_count'] = len(additional['tags'])
    contact_data['deal_count'] = len(additional['deals'])
    contact_data['automation_count'] = len(additional['contactAutomations'])
    contact_data['has_any_lists'] = len(additional['lists']) > 0
    
    # Extract list IDs for easy filtering
    contact_data['list_ids'] = [l.get('list') for l in additional['lists']]
    contact_data['tag_ids'] = [t.get('tag') for t in additional['tags']]
    
    enriched_contacts.append(contact_data)
    
    # Progress updates
    processed = idx + 1
    if processed % 10 == 0:
        elapsed = time.time() - start_time
        rate = processed / elapsed
        remaining = (total_contacts - processed) / rate if rate > 0 else 0
        print(f"Progress: {processed}/{total_contacts} ({(processed/total_contacts*100):.1f}%) | "
              f"Speed: {rate:.1f} contacts/sec | "
              f"ETA: {timedelta(seconds=int(remaining))}")

# Create enriched DataFrame
df_enriched = pd.DataFrame(enriched_contacts)

total_time = time.time() - start_time
print("\n" + "=" * 80)
print("ENRICHMENT COMPLETE")
print("=" * 80)
print(f"✓ Total contacts enriched: {len(df_enriched):,}")
print(f"✓ Total time: {timedelta(seconds=int(total_time))} ({total_time:.2f}s)")
print(f"✓ Average speed: {len(df_enriched)/total_time:.1f} contacts/sec")
print(f"\n📊 SUMMARY:")
print(f"   ├─ Contacts with list memberships: {df_enriched['has_any_lists'].sum():,}")
print(f"   ├─ Contacts without any lists: {(~df_enriched['has_any_lists']).sum():,}")
print(f"   ├─ Contacts with tags: {(df_enriched['tag_count'] > 0).sum():,}")
print(f"   ├─ Contacts with deals: {(df_enriched['deal_count'] > 0).sum():,}")
print(f"   └─ Contacts in automations: {(df_enriched['automation_count'] > 0).sum():,}")
print("=" * 80)
print(f"\n✓ Created 'df_enriched' DataFrame with ALL contact data")



FETCHING ADDITIONAL DATA FOR ALL CONTACTS (Lists, Tags, Deals, etc.)
⚠️  This may take a while depending on the number of contacts...
Progress: 10/185775 (0.0%) | Speed: 0.6 contacts/sec | ETA: 3 days, 9:05:40
Progress: 20/185775 (0.0%) | Speed: 0.6 contacts/sec | ETA: 3 days, 11:25:37


## Filter: Contacts with Age Data


In [None]:
# Find contacts that have age data filled
print("\n" + "=" * 80)
print("FILTERING: Contacts with Age Data")
print("=" * 80)

# First, let's check what age-related columns exist
age_columns = [col for col in df_enriched.columns if 'age' in col.lower()]
print(f"✓ Age-related columns found: {age_columns}")

# Check if we have custom field values that might contain age
if 'fieldValues' in df_enriched.columns:
    print(f"\n📊 Checking fieldValues for age data...")
    # Look at sample fieldValues to understand structure
    sample_field_values = df_enriched['fieldValues'].dropna().head(5)
    for idx, fv in sample_field_values.items():
        print(f"Sample {idx}: {fv}")

# Look for age in custom fields (fieldValues)
contacts_with_age = df_enriched.copy()

# Method 1: Check if 'Age' appears in fieldValues
if 'fieldValues' in df_enriched.columns:
    def has_age_in_fields(field_values):
        if pd.isna(field_values):
            return False
        # Convert to string and check for age-related content
        field_str = str(field_values).lower()
        return 'age' in field_str or any(age_term in field_str for age_term in ['18-24', '25-34', '35-44', '45-54', '55-64', '65+'])
    
    contacts_with_age['has_age_field'] = contacts_with_age['fieldValues'].apply(has_age_in_fields)
    age_from_fields = contacts_with_age['has_age_field'].sum()
    print(f"✓ Contacts with age in fieldValues: {age_from_fields:,}")
else:
    contacts_with_age['has_age_field'] = False
    age_from_fields = 0

# Method 2: Check direct age columns
age_direct = 0
for col in age_columns:
    if col in df_enriched.columns:
        non_null_count = df_enriched[col].notna().sum()
        print(f"✓ Column '{col}' has {non_null_count:,} non-null values")
        age_direct += non_null_count

# Method 3: Check if any contact has age data in any form
def has_any_age_data(row):
    # Check direct age columns
    for col in age_columns:
        if col in df_enriched.columns and pd.notna(row[col]) and str(row[col]).strip() != '':
            return True
    
    # Check fieldValues
    if 'fieldValues' in df_enriched.columns and pd.notna(row['fieldValues']):
        field_str = str(row['fieldValues']).lower()
        if 'age' in field_str or any(age_term in field_str for age_term in ['18-24', '25-34', '35-44', '45-54', '55-64', '65+']):
            return True
    
    return False

contacts_with_age['has_any_age'] = contacts_with_age.apply(has_any_age_data, axis=1)
total_with_age = contacts_with_age['has_any_age'].sum()

print(f"\n" + "=" * 80)
print("AGE DATA SUMMARY")
print("=" * 80)
print(f"✓ Total contacts: {len(df_enriched):,}")
print(f"✓ Contacts with age data: {total_with_age:,}")
print(f"✓ Percentage with age: {(total_with_age/len(df_enriched)*100):.2f}%")
print(f"✓ Contacts without age data: {len(df_enriched) - total_with_age:,}")
print("=" * 80)

# Create filtered DataFrame with only contacts that have age data
age_contacts_df = contacts_with_age[contacts_with_age['has_any_age']].copy()

if not age_contacts_df.empty:
    print(f"\n📋 Sample of contacts with age data (first 20):")
    display_cols = ['id', 'email', 'firstName', 'lastName'] + age_columns
    display_cols = [c for c in display_cols if c in age_contacts_df.columns]
    display(age_contacts_df[display_cols].head(20))
    
    # Show age distribution if we can extract it
    print(f"\n📊 Age Data Analysis:")
    for col in age_columns:
        if col in age_contacts_df.columns:
            age_values = age_contacts_df[col].dropna()
            if not age_values.empty:
                print(f"\nAge values in '{col}':")
                print(age_values.value_counts().head(10))
    
    print(f"\n✓ Created 'age_contacts_df' with {len(age_contacts_df):,} contacts that have age data")
    print(f"✓ Created 'contacts_with_age' DataFrame with age flags")
else:
    print("\n⚠️  No contacts found with age data")
