In [1]:
## Cell 1: Install required packages
!pip3 install folium geopy pandas numpy



In [2]:
## Cell 2: Import libraries and load data
import pandas as pd
import numpy as np
import folium
from geopy.geocoders import Nominatim
import time
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Load the Bexar County PPP data
df = pd.read_csv("bexar_county_frequent_addresses_3plus.csv")

print(f"Loaded {len(df):,} PPP loan records")
print(f"Data columns: {list(df.columns)}")
print(f"Date range: {df['DateApproved'].min() if 'DateApproved' in df.columns else 'N/A'} to {df['DateApproved'].max() if 'DateApproved' in df.columns else 'N/A'}")

# Display basic info
df.info()

Loaded 4,875 PPP loan records
Data columns: ['LoanNumber', 'DateApproved', 'SBAOfficeCode', 'ProcessingMethod', 'BorrowerName', 'BorrowerAddress', 'BorrowerCity', 'BorrowerState', 'BorrowerZip', 'LoanStatusDate', 'LoanStatus', 'Term', 'SBAGuarantyPercentage', 'InitialApprovalAmount', 'CurrentApprovalAmount', 'UndisbursedAmount', 'FranchiseName', 'ServicingLenderLocationID', 'ServicingLenderName', 'ServicingLenderAddress', 'ServicingLenderCity', 'ServicingLenderState', 'ServicingLenderZip', 'RuralUrbanIndicator', 'HubzoneIndicator', 'LMIIndicator', 'BusinessAgeDescription', 'ProjectCity', 'ProjectCountyName', 'ProjectState', 'ProjectZip', 'CD', 'JobsReported', 'NAICSCode', 'Race', 'Ethnicity', 'UTILITIES_PROCEED', 'PAYROLL_PROCEED', 'MORTGAGE_INTEREST_PROCEED', 'RENT_PROCEED', 'REFINANCE_EIDL_PROCEED', 'HEALTH_CARE_PROCEED', 'DEBT_INTEREST_PROCEED', 'BusinessType', 'OriginatingLenderLocationID', 'OriginatingLender', 'OriginatingLenderCity', 'OriginatingLenderState', 'Gender', 'Veteran',

In [3]:
## Cell 3: Data preprocessing and aggregation by address
# Aggregate data by address to get summary statistics per location
address_cols = ['BorrowerAddress', 'BorrowerCity', 'BorrowerZip', 'BorrowerState']
available_address_cols = [col for col in address_cols if col in df.columns]

# Group by address and aggregate key metrics
agg_dict = {
    'BorrowerName': ['count', lambda x: ', '.join(x[:5].astype(str)) + ('...' if len(x) > 5 else '')],  # Count and sample names
    'InitialApprovalAmount': ['sum', 'mean', 'max', 'min'],
    'address_loan_count': 'first'  # This should be the same for all records at an address
}

# Add forgiveness columns if available
if 'ForgivenessAmount' in df.columns:
    agg_dict['ForgivenessAmount'] = ['sum', 'mean', 'count']

# Create aggregated dataframe
df_agg = df.groupby(available_address_cols).agg(agg_dict).reset_index()

# Flatten column names
df_agg.columns = [
    col[0] if col[1] == '' else f"{col[0]}_{col[1]}" 
    for col in df_agg.columns
]

# Rename for clarity
rename_dict = {
    'BorrowerName_count': 'total_loans',
    'BorrowerName_<lambda>': 'sample_borrowers',
    'InitialApprovalAmount_sum': 'total_loan_amount',
    'InitialApprovalAmount_mean': 'avg_loan_amount',
    'InitialApprovalAmount_max': 'max_loan_amount',
    'InitialApprovalAmount_min': 'min_loan_amount'
}

if 'ForgivenessAmount_sum' in df_agg.columns:
    rename_dict.update({
        'ForgivenessAmount_sum': 'total_forgiven',
        'ForgivenessAmount_mean': 'avg_forgiven',
        'ForgivenessAmount_count': 'loans_with_forgiveness'
    })

df_agg = df_agg.rename(columns=rename_dict)

print(f"Aggregated to {len(df_agg):,} unique addresses")
print(f"Address loan counts range: {df_agg['total_loans'].min()} to {df_agg['total_loans'].max()}")
print(f"Total loan amounts range: ${df_agg['total_loan_amount'].min():,.2f} to ${df_agg['total_loan_amount'].max():,.2f}")

# Display sample of aggregated data
df_agg.head()

Aggregated to 2,186 unique addresses
Address loan counts range: 1 to 21
Total loan amounts range: $370.00 to $1,129,500.00


Unnamed: 0,BorrowerAddress,BorrowerCity,BorrowerZip,BorrowerState,total_loans,BorrowerName_<lambda_0>,total_loan_amount,avg_loan_amount,max_loan_amount,min_loan_amount,address_loan_count_first,total_forgiven,avg_forgiven,loans_with_forgiveness
0,1 HAVEN FOR HOPE WAY,SAN ANTONIO,78207-1108,TX,2,"I CARE SAN ANTONIO, INC., PAY IT FORWARD - CLE...",125600.0,62800.0,85900.0,39700.0,5,126588.4,63294.2,2
1,1 Haven For Hope Way,San Antonio,78207-1108,TX,1,ROSARIO CONTRERAS,20833.0,20833.0,20833.0,20833.0,5,0.0,,0
2,1 Haven For Hope Way,San Antonio,78207-1266,TX,1,I CARE SAN ANTONIO,85900.0,85900.0,85900.0,85900.0,5,86361.27,86361.27,1
3,1 Haven for Hope Way,San Antonio,78207-1108,TX,1,STREET2FEET,11900.0,11900.0,11900.0,11900.0,5,11993.57,11993.57,1
4,100 NE LOOP 410 STE 615,SAN ANTONIO,78216,TX,1,ELIZABETH C DAVIDSON,20832.0,20832.0,20832.0,20832.0,3,21052.03,21052.03,1


In [4]:
## Cell 4: Setup local Nominatim geocoder and geocoding function
# Configure your local Nominatim server
# Replace 'localhost:8080' with your actual Nominatim server address
LOCAL_NOMINATIM_URL = "http://localhost:8080"  # Adjust this to your server

# Initialize geocoder with local Nominatim server
geolocator = Nominatim(
    user_agent="bexar_ppp_mapping", 
    domain=LOCAL_NOMINATIM_URL.replace('http://', '').replace('https://', ''),
    scheme='http'  # or 'https' if your server uses SSL
)

def get_coordinates_local(row, delay=0.1):
    """
    Get coordinates using local Nominatim server with enhanced address formatting
    """
    # Build full address string
    address_parts = []
    
    if 'BorrowerAddress' in row and pd.notna(row['BorrowerAddress']):
        address_parts.append(str(row['BorrowerAddress']).strip())
    
    if 'BorrowerCity' in row and pd.notna(row['BorrowerCity']):
        address_parts.append(str(row['BorrowerCity']).strip())
    
    # Always add Texas and country for better geocoding
    address_parts.extend(['TX', 'USA'])
    
    # Add ZIP code if available
    if 'BorrowerZip' in row and pd.notna(row['BorrowerZip']):
        zip_code = str(row['BorrowerZip']).strip()
        if len(zip_code) >= 5:
            address_parts.append(zip_code[:5])  # Use only 5-digit ZIP
    
    full_address = ', '.join(address_parts)
    
    try:
        print(f"Geocoding: {full_address[:80]}{'...' if len(full_address) > 80 else ''}")
        location = geolocator.geocode(full_address, timeout=10)
        
        if location:
            print(f"  ✓ Found: {location.latitude:.6f}, {location.longitude:.6f}")
            return pd.Series([location.latitude, location.longitude, full_address])
        else:
            # Try simplified address (just street and city)
            if len(address_parts) >= 2:
                simple_address = f"{address_parts[0]}, {address_parts[1]}, TX, USA"
                print(f"  Retrying with: {simple_address}")
                location = geolocator.geocode(simple_address, timeout=10)
                if location:
                    print(f"  ✓ Found (simplified): {location.latitude:.6f}, {location.longitude:.6f}")
                    return pd.Series([location.latitude, location.longitude, simple_address])
            
            print(f"  ✗ No coordinates found")
            return pd.Series([None, None, full_address])
    
    except Exception as e:
        print(f"  ✗ Error geocoding: {e}")
        return pd.Series([None, None, full_address])
    
    finally:
        if delay > 0:
            time.sleep(delay)  # Be respectful to your local server

print(f"Geocoder configured for: {LOCAL_NOMINATIM_URL}")
print("Ready to geocode addresses...")

Geocoder configured for: http://localhost:8080
Ready to geocode addresses...


In [8]:
# Cell: Complete fixed geocoding with proper success tracking

import re
import time
import pandas as pd

def normalize_address(address_str):
    """Normalize address string for better geocoding success"""
    if pd.isna(address_str):
        return ""
    
    addr = str(address_str).strip()
    
    # Common abbreviations that cause geocoding failures
    abbreviation_fixes = {
        # Suite variations
        r'\bSte\b\.?': 'Suite',
        r'\bSTE\b\.?': 'Suite', 
        r'\b#': 'Suite ',
        
        # Street type abbreviations
        r'\bBlvd\b\.?': 'Boulevard',
        r'\bBLVD\b\.?': 'Boulevard',
        r'\bAve\b\.?': 'Avenue',
        r'\bAVE\b\.?': 'Avenue',
        r'\bSt\b\.?$': 'Street',
        r'\bST\b\.?$': 'Street',
        r'\bRd\b\.?': 'Road',
        r'\bRD\b\.?': 'Road',
        r'\bDr\b\.?': 'Drive',
        r'\bDR\b\.?': 'Drive',
        r'\bLn\b\.?': 'Lane',
        r'\bLN\b\.?': 'Lane',
        r'\bCt\b\.?': 'Court',
        r'\bCT\b\.?': 'Court',
        r'\bPl\b\.?': 'Place',
        r'\bPL\b\.?': 'Place',
        r'\bPkwy\b\.?': 'Parkway',
        r'\bPKWY\b\.?': 'Parkway',
        r'\bCir\b\.?': 'Circle',
        r'\bCIR\b\.?': 'Circle',
        
        # Direction abbreviations
        r'\bN\b\.?': 'North',
        r'\bS\b\.?': 'South',
        r'\bE\b\.?': 'East',
        r'\bW\b\.?': 'West',
        r'\bNE\b\.?': 'Northeast',
        r'\bNW\b\.?': 'Northwest',
        r'\bSE\b\.?': 'Southeast',
        r'\bSW\b\.?': 'Southwest',
        
        # Common abbreviations
        r'\bBnd\b\.?': 'Bend',
        r'\bLk\b\.?': 'Lake',
        r'\bMt\b\.?': 'Mount',
        r'\bFt\b\.?': 'Fort',
    }
    
    for pattern, replacement in abbreviation_fixes.items():
        addr = re.sub(pattern, replacement, addr, flags=re.IGNORECASE)
    
    # Clean up extra spaces
    addr = re.sub(r'\s+', ' ', addr).strip()
    return addr

def get_coordinates_local_fixed(row, delay=0.1):
    """Enhanced geocoding with proper return value handling"""
    original_address = row.get('BorrowerAddress', '')
    city = row.get('BorrowerCity', '')
    zip_code = str(row.get('BorrowerZip', '')).strip()[:5] if pd.notna(row.get('BorrowerZip')) else ''
    
    # Strategy 1: Full normalized address with ZIP
    normalized_addr = normalize_address(original_address)
    full_address = f"{normalized_addr}, {city}, TX"
    if zip_code and len(zip_code) == 5:
        full_address += f", {zip_code}"
    full_address += ", USA"
    
    strategies = [
        ("Full normalized", full_address),
        ("Without ZIP", f"{normalized_addr}, {city}, TX, USA"),
        ("Without suite", re.sub(r'\bSuite\s+\w+', '', normalized_addr).strip() + f", {city}, TX, USA"),
        ("Street only", re.sub(r'\bSuite.*$', '', normalized_addr).strip() + f", {city}, TX, USA"),
        ("Original format", f"{original_address}, {city}, TX, USA"),
        ("Just street number and name", re.sub(r'\b(Suite|#).*$', '', original_address).strip() + f", {city}, TX, USA")
    ]
    
    # Remove empty strategies
    strategies = [(name, addr) for name, addr in strategies if addr.strip() != f", {city}, TX, USA"]
    
    try:
        for strategy_name, address in strategies:
            if not address.strip() or address.count(',') < 2:
                continue
                
            if strategy_name == "Full normalized":
                print(f"Geocoding: {address[:80]}{'...' if len(address) > 80 else ''}")
            
            try:
                location = geolocator.geocode(address, timeout=10)
                if location:
                    if strategy_name != "Full normalized":
                        print(f"  ✓ Found with {strategy_name}: {location.latitude:.6f}, {location.longitude:.6f}")
                    else:
                        print(f"  ✓ Found: {location.latitude:.6f}, {location.longitude:.6f}")
                    # Return as individual values, not Series
                    return location.latitude, location.longitude, address
            except Exception as e:
                if strategy_name == "Full normalized":
                    print(f"  ⚠ Error with {strategy_name}: {e}")
                continue
        
        print(f"  ✗ No coordinates found after trying {len(strategies)} strategies")
        return None, None, strategies[0][1] if strategies else full_address
    
    except Exception as e:
        print(f"  ✗ Critical error geocoding: {e}")
        return None, None, full_address
    
    finally:
        if delay > 0:
            time.sleep(delay)

# Fixed geocoding execution with proper progress tracking
print(f"Starting geocoding of {len(df_agg)} addresses...")
print("This may take several minutes depending on your server performance.")

# Reset coordinates columns
df_agg['Latitude'] = None
df_agg['Longitude'] = None 
df_agg['geocoded_address'] = None

# Track progress properly
total_addresses = len(df_agg)
processed_count = 0
successful_count = 0
start_time = time.time()

for idx, row in df_agg.iterrows():
    # Get coordinates using fixed function
    lat, lon, geocoded_addr = get_coordinates_local_fixed(row, delay=0.1)
    
    # Assign to dataframe
    df_agg.loc[idx, 'Latitude'] = lat
    df_agg.loc[idx, 'Longitude'] = lon
    df_agg.loc[idx, 'geocoded_address'] = geocoded_addr
    
    # Count success
    if lat is not None and lon is not None:
        successful_count += 1
    
    processed_count += 1
    
    # Progress update every 25 addresses
    if processed_count % 25 == 0:
        elapsed = time.time() - start_time
        rate = processed_count / (elapsed / 60) if elapsed > 0 else 0  # addresses per minute
        remaining_time = (total_addresses - processed_count) / rate if rate > 0 else 0
        success_rate = successful_count / processed_count * 100 if processed_count > 0 else 0
        
        print(f"Progress: {processed_count}/{total_addresses} ({processed_count/total_addresses*100:.1f}%) - "
              f"Success: {successful_count} ({success_rate:.1f}%) - "
              f"Rate: {rate:.1f}/min - ETA: {remaining_time:.1f} min")

# Final summary with accurate counts
final_successful = df_agg['Latitude'].notna().sum()
final_failed = len(df_agg) - final_successful

print(f"\n" + "="*60)
print("GEOCODING COMPLETE!")
print("="*60)
print(f"Addresses processed: {processed_count:,}")
print(f"Coordinates found during processing: {successful_count:,}")
print(f"Final successful geocodes in dataframe: {final_successful:,} ({final_successful/len(df_agg)*100:.1f}%)")
print(f"Final failed geocodes: {final_failed:,} ({final_failed/len(df_agg)*100:.1f}%)")

if successful_count != final_successful:
    print(f"\n⚠ Warning: Mismatch between processing count ({successful_count}) and final count ({final_successful})")
    print("This indicates potential issues with coordinate assignment.")

# Save results
df_agg.to_csv("bexar_ppp_geocoded_corrected.csv", index=False)
print(f"\nCorrected geocoded data saved to: bexar_ppp_geocoded_corrected.csv")

# Show sample of successful geocodes
successful_addresses = df_agg.dropna(subset=['Latitude', 'Longitude'])
if len(successful_addresses) > 0:
    print(f"\nSample of successful geocodes:")
    sample_cols = ['BorrowerAddress', 'BorrowerCity', 'Latitude', 'Longitude']
    print(successful_addresses[sample_cols].head(10).to_string(index=False))
else:
    print(f"\nNo successful geocodes found. Check your Nominatim server configuration.")

Starting geocoding of 2186 addresses...
This may take several minutes depending on your server performance.
Geocoding: 1 HAVEN FOR HOPE WAY, SAN ANTONIO, TX, 78207, USA
  ✓ Found: 29.432919, -98.506190
Geocoding: 1 Haven For Hope Way, San Antonio, TX, 78207, USA
  ✓ Found: 29.432919, -98.506190
Geocoding: 1 Haven For Hope Way, San Antonio, TX, 78207, USA
  ✓ Found: 29.432919, -98.506190
Geocoding: 1 Haven for Hope Way, San Antonio, TX, 78207, USA
  ✓ Found: 29.432919, -98.506190
Geocoding: 100 Northeast LOOP 410 Suite 615, SAN ANTONIO, TX, 78216, USA
  ✓ Found with Without suite: 29.517760, -98.469213
Geocoding: 100 Northeast Loop 410 Suite 615, San Antonio, TX, 78216, USA
  ✓ Found with Without suite: 29.517760, -98.469213
Geocoding: 100 West HOUSTON Street, SAN ANTONIO, TX, 78205, USA
  ✓ Found: 29.426482, -98.499061
Geocoding: 100 West HOUSTON Street, SAN ANTONIO, TX, 78205, USA
  ✓ Found: 29.426482, -98.499061
Geocoding: 100 West Houston Street, San Antonio, TX, 78205, USA
  ✓ Foun

In [18]:
## Cell 6: Create interactive map with detailed markers (matching hotel example style)
import folium

# Filter for successfully geocoded addresses
df_map = df_agg.dropna(subset=['Latitude', 'Longitude']).copy()

print(f"Creating map with {len(df_map):,} geocoded addresses")

# Create base map centered on San Antonio (like hotel example)
ppp_map = folium.Map(
    location=[29.5187, -98.6047], 
    zoom_start=11,
    tiles='OpenStreetMap'
)

# Calculate scaling for marker sizes based on total loan amounts (like hotel receipts)
min_amount = df_map['total_loan_amount'].min()
max_amount = df_map['total_loan_amount'].max()
amount_range = max_amount - min_amount

print(f"Loan amount range: ${min_amount:,.2f} to ${max_amount:,.2f}")

# Color scheme based on loan count per address (similar to hotel receipt ranges)
def get_marker_color(loan_count):
    if loan_count >= 20:
        return 'darkblue'
    elif loan_count >= 10:
        return 'blue' 
    elif loan_count >= 7:
        return 'lightblue'
    elif loan_count >= 5:
        return 'green'
    else:
        return 'lightgreen'

def get_marker_size(total_amount, min_amt, max_amt, range_amt):
    """Scale marker size between 4 and 12 based on total loan amount (smaller dots)"""
    if range_amt == 0:
        return 8
    normalized = (total_amount - min_amt) / range_amt
    return 4 + (8 * normalized)  # 4 to 12 range for smaller markers

# Add scaled circle markers for each address (matching hotel approach)
for idx, row in df_map.iterrows():
    # Calculate marker properties
    marker_size = get_marker_size(row['total_loan_amount'], min_amount, max_amount, amount_range)
    marker_color = get_marker_color(row['total_loans'])
    
    # Create detailed popup content (similar to hotel popup style)
    popup_content = f"""
    <b>{row['BorrowerAddress']}</b><br>
    Location: {row['BorrowerCity']}, {row['BorrowerState']} {row.get('BorrowerZip', 'N/A')}<br>
    Total Loans: {row['total_loans']}<br>
    Total Amount: ${row['total_loan_amount']:,.2f}<br>
    Avg Loan: ${row['avg_loan_amount']:,.2f}
    """
    
    # Add forgiveness info if available
    if 'total_forgiven' in row and pd.notna(row['total_forgiven']):
        forgiveness_rate = (row['total_forgiven'] / row['total_loan_amount']) * 100 if row['total_loan_amount'] > 0 else 0
        popup_content += f"<br>Total Forgiven: ${row['total_forgiven']:,.2f}<br>Forgiveness Rate: {forgiveness_rate:.1f}%"
    
    # Create circle marker (like hotel example)
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=marker_size,
        color=marker_color,
        fill=True,
        fillColor=marker_color,
        fillOpacity=0.7,
        popup=folium.Popup(popup_content, max_width=300)
    ).add_to(ppp_map)

# Add a title to the map
title_html = """
<div style="position: fixed; 
    top: 10px; left: 50px; width: 400px; height: 80px; 
    background-color: white; border:2px solid grey; z-index:9999; 
    font-size:16px; font-family: Arial; font-weight: bold;
    text-align: center; padding: 15px; display: flex; align-items: center; justify-content: center;">
    <h3 style="margin: 0; color: #2E86AB;">Bexar County PPP Loans - Addresses with 3+ Loans</h3>
</div>
"""
ppp_map.get_root().html.add_child(folium.Element(title_html))

# Add a legend
legend_html = """
<div style="position: fixed; 
    top: 10px; right: 10px; width: 220px; height: 200px; 
    background-color: white; border:2px solid grey; z-index:9999; 
    font-size:12px; font-family: Arial;
    padding: 10px">
    <h4 style="margin-top: 0; color: #2E86AB;">PPP Loans per Address</h4>
    <p><i class="fa fa-circle" style="color:lightgreen; font-size:16px;"></i> 3-4 loans</p>
    <p><i class="fa fa-circle" style="color:green; font-size:16px;"></i> 5-6 loans</p>
    <p><i class="fa fa-circle" style="color:lightblue; font-size:16px;"></i> 7-9 loans</p>
    <p><i class="fa fa-circle" style="color:blue; font-size:16px;"></i> 10-19 loans</p>
    <p><i class="fa fa-circle" style="color:darkblue; font-size:16px;"></i> 20+ loans</p>
    <hr style="margin: 10px 0;">
    <p style="margin: 5px 0;"><strong>Marker size:</strong> Total loan amount</p>
    <p style="margin: 5px 0; font-size:10px; color:grey;">Larger dots = Higher total $ amount</p>
</div>
"""
ppp_map.get_root().html.add_child(folium.Element(legend_html))

print("Map created successfully!")

# Save the interactive map
map_filename = "bexar_county_ppp_loans_map.html"
ppp_map.save(map_filename)

print(f"Interactive map saved as: {map_filename}")
print(f"Map includes {len(df_map):,} locations with PPP loans")
print(f"Total PPP loans mapped: {df_map['total_loans'].sum():,}")
print(f"Total PPP amount mapped: ${df_map['total_loan_amount'].sum():,.2f}")

# Display the map in Jupyter (if running in notebook)
ppp_map

Creating map with 1,806 geocoded addresses
Loan amount range: $520.00 to $936,300.00
Map created successfully!
Interactive map saved as: bexar_county_ppp_loans_map.html
Map includes 1,806 locations with PPP loans
Total PPP loans mapped: 4,019
Total PPP amount mapped: $109,723,875.30


In [19]:
## Cell 7: Save and display map
# Save the interactive map
map_filename = "bexar_county_ppp_loans_map.html"
ppp_map.save(map_filename)

print(f"Interactive map saved as: {map_filename}")
print(f"Map includes {len(df_map):,} locations with PPP loans")
print(f"Total PPP loans mapped: {df_map['total_loans'].sum():,}")
print(f"Total PPP amount mapped: ${df_map['total_loan_amount'].sum():,.2f}")

# Display the map in Jupyter (if running in notebook)
ppp_map

Interactive map saved as: bexar_county_ppp_loans_map.html
Map includes 1,806 locations with PPP loans
Total PPP loans mapped: 4,019
Total PPP amount mapped: $109,723,875.30


In [20]:
## Cell 8: Generate summary statistics and export options
# Generate comprehensive summary
print("="*60)
print("BEXAR COUNTY PPP LOANS - MAPPING SUMMARY")
print("="*60)

print(f"\n📊 DATA OVERVIEW:")
print(f"   • Total unique addresses: {len(df_agg):,}")
print(f"   • Successfully geocoded: {len(df_map):,} ({len(df_map)/len(df_agg)*100:.1f}%)")
print(f"   • Total loans represented: {df_map['total_loans'].sum():,}")
print(f"   • Total loan amount: ${df_map['total_loan_amount'].sum():,.2f}")

print(f"\n📍 GEOGRAPHIC DISTRIBUTION:")
print(f"   • Latitude range: {df_map['Latitude'].min():.4f} to {df_map['Latitude'].max():.4f}")
print(f"   • Longitude range: {df_map['Longitude'].min():.4f} to {df_map['Longitude'].max():.4f}")

print(f"\n🏢 LOAN CONCENTRATION:")
loan_count_dist = df_map['total_loans'].value_counts().sort_index()
for loans, addresses in loan_count_dist.items():
    total_loans_at_count = loans * addresses
    print(f"   • {loans:2d} loans per address: {addresses:3d} addresses ({total_loans_at_count:,} total loans)")

print(f"\n💰 TOP 10 ADDRESSES BY TOTAL LOAN AMOUNT:")
top_addresses = df_map.nlargest(10, 'total_loan_amount')
for idx, (_, row) in enumerate(top_addresses.iterrows(), 1):
    address = f"{row['BorrowerAddress']}, {row['BorrowerCity']}"
    print(f"   {idx:2d}. {address[:50]:<50} ${row['total_loan_amount']:>12,.2f} ({row['total_loans']:2d} loans)")

print(f"\n🎯 TOP 10 ADDRESSES BY LOAN COUNT:")
top_by_count = df_map.nlargest(10, 'total_loans')
for idx, (_, row) in enumerate(top_by_count.iterrows(), 1):
    address = f"{row['BorrowerAddress']}, {row['BorrowerCity']}"
    print(f"   {idx:2d}. {address[:50]:<50} {row['total_loans']:2d} loans (${row['total_loan_amount']:,.2f})")

# Additional export options
print(f"\n💾 EXPORT OPTIONS:")
print(f"   • Full geocoded data: bexar_ppp_geocoded.csv")
print(f"   • Interactive map: {map_filename}")

# Option to export specific subsets
export_high_concentration = df_map[df_map['total_loans'] >= 10]
if len(export_high_concentration) > 0:
    export_high_concentration.to_csv("high_concentration_addresses_10plus.csv", index=False)
    print(f"   • High concentration addresses (10+ loans): high_concentration_addresses_10plus.csv ({len(export_high_concentration)} addresses)")

print(f"\n🗺️  Map Features:")
print(f"   • Interactive markers with detailed loan information")
print(f"   • Color coding by loan count per address")
print(f"   • Marker size scaled by total loan amount")
print(f"   • Multiple map tile options (OpenStreetMap, CartoDB)")
print(f"   • Popup windows with financial summaries and borrower samples")

print(f"\nMap ready! Open '{map_filename}' in your web browser to explore the interactive map.")

BEXAR COUNTY PPP LOANS - MAPPING SUMMARY

📊 DATA OVERVIEW:
   • Total unique addresses: 2,186
   • Successfully geocoded: 1,806 (82.6%)
   • Total loans represented: 4,019
   • Total loan amount: $109,723,875.30

📍 GEOGRAPHIC DISTRIBUTION:
   • Latitude range: 29.2504 to 29.6840
   • Longitude range: -98.7469 to -98.3011

🏢 LOAN CONCENTRATION:
   •  1 loans per address: 793 addresses (793 total loans)
   •  2 loans per address: 446 addresses (892 total loans)
   •  3 loans per address: 303 addresses (909 total loans)
   •  4 loans per address: 148 addresses (592 total loans)
   •  5 loans per address:  38 addresses (190 total loans)
   •  6 loans per address:  23 addresses (138 total loans)
   •  7 loans per address:  26 addresses (182 total loans)
   •  8 loans per address:   7 addresses (56 total loans)
   •  9 loans per address:   6 addresses (54 total loans)
   • 10 loans per address:   3 addresses (30 total loans)
   • 11 loans per address:   3 addresses (33 total loans)
   • 12 l