# Edges CSV Visualization Demo

This notebook visualizes the edges.csv file using the same Plotly scattergeo format as the frontend ConnectionsMap component. It shows connections between player home locations and colleges, including both new recruits and transfers.

## Features
- Blue markers for home locations (recruits)
- Green markers for colleges
- Red lines for recruit pathways (home → college)
- Orange/dashed lines for transfer pathways (prev_team → new_team)
- Interactive controls for filtering by college and year range
- Uses city and state information directly from the CSV data


In [1]:
# Import required libraries
import pandas as pd
import plotly.graph_objects as go
import json
import os
import re
import ast
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import time
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")


Libraries imported successfully!


## Interactive Controls

Modify these variables to change what's displayed in the visualization:


In [2]:
# ===== CONFIGURATION VARIABLES =====
# Modify these to change the visualization

SELECTED_COLLEGE = 'BYU'  # College name to filter by (set to None to show all)
YEAR_MIN = 2004  # Minimum year to include
YEAR_MAX = 2025  # Maximum year to include
SHOW_TRANSFERS = True  # Whether to show transfer pathways
SHOW_ALL_COLLEGES = False  # If True, ignore SELECTED_COLLEGE and show all colleges
SHOW_RECRUITS = True  # Whether to show recruit pathways (home → college)

# ====================================

print(f"Configuration:")
print(f"  Selected College: {SELECTED_COLLEGE if not SHOW_ALL_COLLEGES else 'ALL'}")
print(f"  Year Range: {YEAR_MIN} - {YEAR_MAX}")
print(f"  Show Recruits: {SHOW_RECRUITS}")
print(f"  Show Transfers: {SHOW_TRANSFERS}")


Configuration:
  Selected College: BYU
  Year Range: 2004 - 2025
  Show Recruits: True
  Show Transfers: True


## 1. Data Loading and Parsing


In [3]:
def parse_coordinates(coord_str):
    """Extract lat/lon from numpy array string format like '[np.float64(41.5051613)]'"""
    if pd.isna(coord_str) or coord_str == '0' or coord_str == 0:
        return None
    
    coord_str = str(coord_str)
    
    # Try to extract the number from parentheses first (more accurate)
    # Pattern: [np.float64(number)] or [number] or (number)
    paren_match = re.search(r'\(([-+]?\d*\.?\d+)\)', coord_str)
    if paren_match:
        try:
            return float(paren_match.group(1))
        except ValueError:
            pass
    
    # Fallback: extract any decimal number (but prefer negative numbers for longitude)
    # Match numbers with optional decimal point and sign
    matches = re.findall(r'[-+]?\d+\.?\d*', coord_str)
    if matches:
        # Filter out '64' from 'float64' and other non-coordinate numbers
        # Coordinates should be reasonable: lat between -90 and 90, lon between -180 and 180
        for match in matches:
            try:
                value = float(match)
                # Reasonable coordinate range
                if -180 <= value <= 180 and abs(value) > 1:  # Exclude small integers like '64'
                    return value
            except ValueError:
                continue
    
    return None

def parse_list_string(list_str):
    """Extract first element from a string representation of a list like "['Cleveland']" """
    if pd.isna(list_str):
        return None
    
    try:
        # Try to parse as a Python list
        parsed = ast.literal_eval(str(list_str))
        if isinstance(parsed, list) and len(parsed) > 0:
            return parsed[0]
        return None
    except (ValueError, SyntaxError):
        # Fallback: try regex extraction
        match = re.search(r"'([^']+)'", str(list_str))
        if match:
            return match.group(1)
        return None

# Load the edges data
df = pd.read_csv('edges.csv')
print(f"Loaded {len(df):,} edges records")
print(f"Columns: {list(df.columns)}")
print(f"Year range: {df['year'].min()} - {df['year'].max()}")
print(f"Unique teams: {df['team'].nunique()}")

# Parse coordinates
print("\nParsing coordinates...")
df['home_lat'] = df['homeLat'].apply(parse_coordinates)
df['home_lon'] = df['homeLong'].apply(parse_coordinates)

# Parse city, state, and country from list strings
print("Parsing city, state, and country information...")
df['home_city'] = df['homeTown'].apply(parse_list_string)
df['home_state'] = df['homeState'].apply(parse_list_string)
df['home_country'] = df['homeCountry'].apply(parse_list_string)

# Check new_recruit column type and convert if needed
if df['new_recruit'].dtype == 'object':
    df['new_recruit'] = df['new_recruit'].astype(bool)

# Convert prev_team to string if it's numeric (0 means no previous team)
# Also handle string '0' values
if df['prev_team'].dtype in ['int64', 'float64']:
    df['prev_team'] = df['prev_team'].astype(str)
df['prev_team'] = df['prev_team'].replace(['0', '0.0', 'nan', 'None'], 'None')

# Filter by year range
df = df[(df['year'] >= YEAR_MIN) & (df['year'] <= YEAR_MAX)]
print(f"After year filtering ({YEAR_MIN}-{YEAR_MAX}): {len(df):,} records")

# Separate recruits and transfers
recruits = df[df['new_recruit'] == True].copy()
transfers = df[df['new_recruit'] == False].copy()

print(f"\nRecruits: {len(recruits):,} records")
print(f"Transfers: {len(transfers):,} records")
print(f"Recruits with coordinates: {recruits['home_lat'].notna().sum():,}")
print(f"Recruits with city info: {recruits['home_city'].notna().sum():,}")

# Show sample data
print("\nSample recruit data:")
print(recruits[['name', 'team', 'year', 'home_lat', 'home_lon', 'home_city', 'home_state']].head())
print("\nSample transfer data:")
print(transfers[['name', 'team', 'prev_team', 'year']].head())


Loaded 130,400 edges records
Columns: ['name', 'new_recruit', 'team', 'year', 'prev_team', 'homeLat', 'homeLong', 'homeTown', 'homeState', 'homeCountry']
Year range: 2004 - 2025
Unique teams: 337

Parsing coordinates...
Parsing city, state, and country information...
After year filtering (2004-2025): 130,400 records

Recruits: 113,713 records
Transfers: 16,687 records
Recruits with coordinates: 113,713
Recruits with city info: 113,713

Sample recruit data:
             name        team  year   home_lat    home_lon   home_city  \
0  Bristol Olomua  Texas Tech  2004  41.505161  -81.693445   Cleveland   
1      J.J. Jones    NC State  2004  33.712331  -84.105194    Lithonia   
2   Justin Miller     Clemson  2004  34.807330  -82.820698    Six Mile   
3      Josh Smith       Akron  2004  39.101454  -84.512460  Cincinnati   
4    Dennis Pitta         BYU  2004  34.285558 -118.882041    Moorpark   

  home_state  
0         OH  
1         GA  
2         SC  
3         OH  
4         CA  

Sam

## 2. College Location Encoding


In [4]:
def load_college_geocodes():
    """Load college coordinates from geocode cache"""
    cache_path = '../frontend/public/data/geocode_cache.json'
    
    if os.path.exists(cache_path):
        with open(cache_path, 'r') as f:
            cache = json.load(f)
        
        # Convert to simple (lat, lon) dictionary
        college_coords = {}
        for college, data in cache.items():
            if data and 'latitude' in data and 'longitude' in data:
                college_coords[college] = {
                    'lat': data['latitude'],
                    'lon': data['longitude']
                }
        
        print(f"Loaded {len(college_coords)} college coordinates from cache")
        return college_coords
    else:
        print(f"Warning: Geocode cache not found at {cache_path}")
        return {}

def geocode_college(college_name, cache, geolocator):
    """Geocode a college with caching"""
    if college_name in cache:
        return cache[college_name]
    
    try:
        location = geolocator.geocode(f"{college_name}, USA", timeout=10)
        if location:
            result = {
                'lat': location.latitude,
                'lon': location.longitude
            }
            cache[college_name] = result
            return result
        else:
            cache[college_name] = None
            return None
    except (GeocoderTimedOut, GeocoderServiceError) as e:
        print(f"Geocoding error for {college_name}: {e}")
        cache[college_name] = None
        return None

# Load college coordinates
college_coords = load_college_geocodes()

# Get unique teams from data
unique_teams = set(df['team'].unique())
if SHOW_TRANSFERS:
    # Add previous teams from transfers
    unique_teams.update(df[df['prev_team'] != 'None']['prev_team'].unique())

# Geocode any missing colleges
missing_colleges = [team for team in unique_teams if team not in college_coords or college_coords.get(team) is None]
if missing_colleges:
    print(f"\nGeocoding {len(missing_colleges)} missing colleges...")
    geolocator = Nominatim(user_agent="football_recruiting_map")
    
    for i, college in enumerate(missing_colleges):
        if i % 10 == 0:
            print(f"Progress: {i}/{len(missing_colleges)}")
        result = geocode_college(college, college_coords, geolocator)
        if result:
            print(f"  Geocoded: {college}")
        else:
            print(f"  Failed: {college}")
        time.sleep(1)  # Be respectful to the geocoding service
    
    # Save updated cache (optional - you may want to update the main cache file)
    print(f"\nTotal colleges with coordinates: {len([c for c in college_coords.values() if c is not None])}")

print(f"\nColleges with coordinates: {len([c for c in college_coords.values() if c is not None])}")
print(f"Colleges in data: {len(unique_teams)}")
print(f"Missing coordinates: {len([t for t in unique_teams if t not in college_coords or college_coords.get(t) is None])}")


Loaded 255 college coordinates from cache

Geocoding 90 missing colleges...
Progress: 0/90
  Geocoded: St. Thomas University (Fl)
  Geocoded: North Alabama
  Geocoded: New Haven
  Geocoded: Chadron St
  Geocoded: Keiser University
  Geocoded: Georgetown College Kentucky
  Geocoded: Texas Wesleyan
  Geocoded: Lincoln (PA)
  Geocoded: St. Francis (PA)
  Geocoded: Bowie State
Progress: 10/90
  Geocoded: North Central College
  Geocoded: West Virginia State
  Geocoded: Edinboro University
  Geocoded: Tarleton State
  Geocoded: Frostburg State
  Geocoded: Miles College
  Geocoded: Bloomsburg
  Geocoded: Northeastern
  Geocoded: Thomas More College
  Geocoded: Mercyhurst
Progress: 20/90
  Geocoded: Virginia Union
  Geocoded: Allen
  Geocoded: Franklin Pierce
  Geocoded: Indiana Wesleyan
  Geocoded: Colorado School Of Mines
  Geocoded: Cumberland (TN)
  Geocoded: Shorter
  Geocoded: Virginia St
  Geocoded: McKendree
  Geocoded: Catawba
Progress: 30/90
  Geocoded: Central Washington
  Geocoded

## 3. City and State Information

City and state information is now included directly in the edges.csv file, so no reverse geocoding is needed.


In [5]:
# City and state information is now included in the CSV data
# No reverse geocoding needed!

print("City and state information loaded from CSV:")
print(f"  Recruits with city info: {recruits['home_city'].notna().sum():,}")
print(f"  Recruits with state info: {recruits['home_state'].notna().sum():,}")
print(f"  Unique cities: {recruits['home_city'].nunique():,}")
print(f"  Unique states: {recruits['home_state'].nunique():,}")

# Show sample of city/state data
print("\nSample city/state information:")
sample_cities = recruits[recruits['home_city'].notna() & recruits['home_state'].notna()][
    ['home_city', 'home_state', 'home_country']
].drop_duplicates().head(10)
print(sample_cities)


City and state information loaded from CSV:
  Recruits with city info: 113,713
  Recruits with state info: 113,713
  Unique cities: 7,009
  Unique states: 51

Sample city/state information:
    home_city home_state home_country
0   Cleveland         OH          USA
1    Lithonia         GA          USA
2    Six Mile         SC          USA
3  Cincinnati         OH          USA
4    Moorpark         CA          USA
5  Branchburg         NJ          USA
6  Cedar Park         TX          USA
7   Bradenton         FL          USA
8     Oxnard          CA          USA
9     Memphis         TN          USA


## 4. Data Processing


In [6]:
def process_edges_data(recruits_df, transfers_df, college_coords, filters):
    """Process edges data to create pathways for visualization"""
    
    selected_college = filters.get('selected_college')
    show_all_colleges = filters.get('show_all_colleges', False)
    show_recruits = filters.get('show_recruits', True)
    show_transfers = filters.get('show_transfers', True)
    
    recruit_pathways = []
    transfer_pathways = []
    
    # Process recruits: home location → college
    if show_recruits:
        recruit_data = recruits_df[recruits_df['home_lat'].notna() & recruits_df['home_lon'].notna()].copy()
        
        # Filter by college if specified
        if not show_all_colleges and selected_college:
            recruit_data = recruit_data[recruit_data['team'] == selected_college]
        
        # Filter to only include colleges with coordinates
        recruit_data = recruit_data[
            recruit_data['team'].isin(college_coords.keys()) &
            recruit_data['team'].apply(lambda x: college_coords.get(x) is not None)
        ]
        
        # Aggregate pathways
        pathway_counts = defaultdict(int)
        pathway_details = defaultdict(list)
        pathway_city_state = {}  # Store city/state for each pathway key
        
        for _, row in recruit_data.iterrows():
            team = row['team']
            if team not in college_coords or college_coords[team] is None:
                continue
            
            college_coord = college_coords[team]
            key = (row['home_lat'], row['home_lon'], team)
            
            pathway_counts[key] += 1
            pathway_details[key].append({
                'name': row['name'],
                'year': row['year']
            })
            
            # Store city/state (will be overwritten if multiple, but should be same)
            if key not in pathway_city_state:
                pathway_city_state[key] = {
                    'city': row.get('home_city') if pd.notna(row.get('home_city')) else 'Unknown',
                    'state': row.get('home_state') if pd.notna(row.get('home_state')) else 'Unknown'
                }
        
        # Create pathway records with city/state info
        for (home_lat, home_lon, team), count in pathway_counts.items():
            if team in college_coords and college_coords[team]:
                city_state = pathway_city_state.get((home_lat, home_lon, team), {'city': 'Unknown', 'state': 'Unknown'})
                
                recruit_pathways.append({
                    'home_lat': home_lat,
                    'home_lon': home_lon,
                    'home_city': city_state['city'],
                    'home_state': city_state['state'],
                    'college': team,
                    'college_lat': college_coords[team]['lat'],
                    'college_lon': college_coords[team]['lon'],
                    'count': count,
                    'players': pathway_details[(home_lat, home_lon, team)]
                })
    
    # Process transfers: prev_team → new_team
    if show_transfers:
        transfer_data = transfers_df.copy()
        
        # Filter out transfers with no previous team (shouldn't happen, but be safe)
        transfer_data = transfer_data[transfer_data['prev_team'] != 'None']
        
        # Filter by college if specified (show transfers involving the selected college)
        if not show_all_colleges and selected_college:
            # Include transfers where either source or destination is the selected college
            transfer_data = transfer_data[
                (transfer_data['team'] == selected_college) | 
                (transfer_data['prev_team'] == selected_college)
            ]
        
        # Filter to only include teams with coordinates
        transfer_data = transfer_data[
            transfer_data['team'].isin(college_coords.keys()) &
            transfer_data['prev_team'].isin(college_coords.keys())
        ]
        transfer_data = transfer_data[
            (transfer_data['team'].apply(lambda x: college_coords.get(x) is not None)) &
            (transfer_data['prev_team'].apply(lambda x: college_coords.get(x) is not None))
        ]
        
        # Aggregate transfer pathways
        transfer_counts = defaultdict(int)
        transfer_details = defaultdict(list)
        
        for _, row in transfer_data.iterrows():
            prev_team = row['prev_team']
            new_team = row['team']
            
            if prev_team not in college_coords or new_team not in college_coords:
                continue
            if college_coords[prev_team] is None or college_coords[new_team] is None:
                continue
            
            key = (prev_team, new_team)
            transfer_counts[key] += 1
            transfer_details[key].append({
                'name': row['name'],
                'year': row['year']
            })
        
        # Create transfer pathway records
        for (prev_team, new_team), count in transfer_counts.items():
            transfer_pathways.append({
                'prev_team': prev_team,
                'prev_team_lat': college_coords[prev_team]['lat'],
                'prev_team_lon': college_coords[prev_team]['lon'],
                'new_team': new_team,
                'new_team_lat': college_coords[new_team]['lat'],
                'new_team_lon': college_coords[new_team]['lon'],
                'count': count,
                'players': transfer_details[(prev_team, new_team)]
            })
    
    return recruit_pathways, transfer_pathways

# Process the data
filters = {
    'selected_college': SELECTED_COLLEGE if not SHOW_ALL_COLLEGES else None,
    'show_all_colleges': SHOW_ALL_COLLEGES,
    'show_recruits': SHOW_RECRUITS,
    'show_transfers': SHOW_TRANSFERS
}

recruit_pathways, transfer_pathways = process_edges_data(recruits, transfers, college_coords, filters)

print(f"Processed pathways:")
print(f"  Recruit pathways: {len(recruit_pathways)}")
print(f"  Transfer pathways: {len(transfer_pathways)}")

if recruit_pathways:
    total_recruits = sum(p['count'] for p in recruit_pathways)
    print(f"  Total recruits: {total_recruits}")
    print(f"  Top recruit pathway: {max(recruit_pathways, key=lambda x: x['count'])}")

if transfer_pathways:
    total_transfers = sum(p['count'] for p in transfer_pathways)
    print(f"  Total transfers: {total_transfers}")
    print(f"  Top transfer pathway: {max(transfer_pathways, key=lambda x: x['count'])}")


Processed pathways:
  Recruit pathways: 327
  Transfer pathways: 88
  Total recruits: 744
  Top recruit pathway: {'home_lat': 40.2338438, 'home_lon': -111.6585337, 'home_city': 'Provo', 'home_state': 'UT', 'college': 'BYU', 'college_lat': 40.2554083, 'college_lon': -111.6496832, 'count': 33, 'players': [{'name': 'Luke Ashworth', 'year': 2005}, {'name': 'Harvey Unga', 'year': 2006}, {'name': 'Blake McKenzie', 'year': 2009}, {'name': 'Stephen Covey', 'year': 2009}, {'name': 'Matt Reynolds', 'year': 2009}, {'name': 'Houston Reynolds', 'year': 2009}, {'name': 'Scott Johnson', 'year': 2009}, {'name': 'Craig Bills', 'year': 2009}, {'name': 'Andrew Cusick', 'year': 2010}, {'name': 'Famika Anae', 'year': 2010}, {'name': 'Trevor Brown', 'year': 2011}, {'name': 'Kevan Bills', 'year': 2011}, {'name': 'Colby Jorgensen', 'year': 2011}, {'name': 'Bronson Kaufusi', 'year': 2012}, {'name': 'Chris Badger', 'year': 2013}, {'name': 'Creed Richardson', 'year': 2014}, {'name': 'Figgs Hofheins', 'year': 201

## 5. Visualization Creation


In [7]:
def create_edges_visualization(recruit_pathways, transfer_pathways, college_coords, filters):
    """Create Plotly scattergeo visualization matching ConnectionsMap.jsx format"""
    
    fig = go.Figure()
    
    # Add home location markers (blue) for recruits
    if recruit_pathways:
        home_lats = [p['home_lat'] for p in recruit_pathways]
        home_lons = [p['home_lon'] for p in recruit_pathways]
        
        # Get city/state names from pathway data
        home_labels = []
        for p in recruit_pathways:
            city_name = p.get('home_city', 'Unknown')
            state_name = p.get('home_state', 'Unknown')
            home_labels.append(f"{city_name}, {state_name}")
        
        # Create hover text with player counts
        hover_texts = []
        for i, p in enumerate(recruit_pathways):
            players_str = ', '.join([pl['name'] for pl in p['players'][:5]])
            if len(p['players']) > 5:
                players_str += f" (+{len(p['players']) - 5} more)"
            hover_texts.append(
                f"<b>{home_labels[i]}</b><br>" +
                f"Recruits: {p['count']}<br>" +
                f"To: {p['college']}<br>" +
                f"Players: {players_str}"
            )
        
        home_trace = go.Scattergeo(
            lat=home_lats,
            lon=home_lons,
            mode='markers',
            marker=dict(
                size=8,
                color='blue',
                opacity=0.7,
                symbol='circle'
            ),
            name='Home Locations',
            hovertemplate='%{text}<extra></extra>',
            text=hover_texts
        )
        fig.add_trace(home_trace)
    
    # Add college markers (green)
    # Collect all unique colleges from pathways
    all_colleges = set()
    if recruit_pathways:
        all_colleges.update([p['college'] for p in recruit_pathways])
    if transfer_pathways:
        all_colleges.update([p['prev_team'] for p in transfer_pathways])
        all_colleges.update([p['new_team'] for p in transfer_pathways])
    
    if all_colleges:
        college_lats = [college_coords[c]['lat'] for c in all_colleges if c in college_coords and college_coords[c]]
        college_lons = [college_coords[c]['lon'] for c in all_colleges if c in college_coords and college_coords[c]]
        college_names = [c for c in all_colleges if c in college_coords and college_coords[c]]
        
        # Count recruits per college
        college_counts = defaultdict(int)
        if recruit_pathways:
            for p in recruit_pathways:
                college_counts[p['college']] += p['count']
        if transfer_pathways:
            for p in transfer_pathways:
                college_counts[p['new_team']] += p['count']
        
        college_hover_texts = [
            f"<b>{name}</b><br>Total Recruits/Transfers: {college_counts.get(name, 0)}"
            for name in college_names
        ]
        
        college_trace = go.Scattergeo(
            lat=college_lats,
            lon=college_lons,
            mode='markers',
            marker=dict(
                size=10,
                color='green',
                opacity=0.8,
                symbol='diamond'
            ),
            name='Colleges',
            hovertemplate='%{text}<extra></extra>',
            text=college_hover_texts
        )
        fig.add_trace(college_trace)
    
    # Add connecting lines for recruits (red)
    if recruit_pathways:
        max_count = max(p['count'] for p in recruit_pathways) if recruit_pathways else 1
        
        for pathway in recruit_pathways:
            line_width = max(1, min(8, pathway['count']))
            line_opacity = max(0.3, min(0.9, 0.3 + (pathway['count'] / max_count) * 0.6))
            
            fig.add_trace(go.Scattergeo(
                lat=[pathway['home_lat'], pathway['college_lat']],
                lon=[pathway['home_lon'], pathway['college_lon']],
                mode='lines',
                line=dict(
                    color=f'rgba(255, 0, 0, {line_opacity})',
                    width=line_width
                ),
                showlegend=False,
                hoverinfo='skip'
            ))
    
    # Add connecting lines for transfers (orange, dashed)
    if transfer_pathways:
        max_transfer_count = max(p['count'] for p in transfer_pathways) if transfer_pathways else 1
        
        for pathway in transfer_pathways:
            line_width = max(2, min(10, pathway['count']))
            line_opacity = max(0.4, min(0.9, 0.4 + (pathway['count'] / max_transfer_count) * 0.5))
            
            fig.add_trace(go.Scattergeo(
                lat=[pathway['prev_team_lat'], pathway['new_team_lat']],
                lon=[pathway['prev_team_lon'], pathway['new_team_lon']],
                mode='lines',
                line=dict(
                    color=f'rgba(255, 165, 0, {line_opacity})',
                    width=line_width,
                    dash='dash'
                ),
                showlegend=False,
                hoverinfo='skip'
            ))
    
    # Update layout to match ConnectionsMap.jsx
    title_text = "Football Recruiting Connections"
    if not filters.get('show_all_colleges') and filters.get('selected_college'):
        title_text += f" - {filters['selected_college']}"
    
    fig.update_layout(
        title=dict(
            text=title_text,
            x=0.5,
            font=dict(size=20)
        ),
        geo=dict(
            projection_type='albers usa',
            showland=True,
            landcolor='lightgray',
            showocean=True,
            oceancolor='lightblue',
            showlakes=True,
            lakecolor='lightblue',
            showrivers=True,
            rivercolor='lightblue',
            scope='usa',
            center=dict(lat=39.8283, lon=-98.5795),
            lonaxis_range=[-125, -66],
            lataxis_range=[24, 50]
        ),
        width=1200,
        height=800,
        showlegend=True,
        legend=dict(
            x=0.02,
            y=0.98,
            bgcolor='rgba(255,255,255,0.8)'
        ),
        margin=dict(l=50, r=50, t=50, b=50)
    )
    
    return fig

# Create the visualization
print("Creating visualization...")
fig = create_edges_visualization(recruit_pathways, transfer_pathways, college_coords, filters)
fig.show()


Creating visualization...


In [8]:
# Display statistics
print("=== STATISTICS ===")
print(f"\nYear Range: {YEAR_MIN} - {YEAR_MAX}")
print(f"Selected College: {SELECTED_COLLEGE if not SHOW_ALL_COLLEGES else 'ALL'}")

if recruit_pathways:
    print(f"\nRecruit Pathways: {len(recruit_pathways)}")
    total_recruits = sum(p['count'] for p in recruit_pathways)
    print(f"Total Recruits: {total_recruits}")
    
    # Top recruit pathways
    top_recruit_pathways = sorted(recruit_pathways, key=lambda x: x['count'], reverse=True)[:10]
    print("\nTop 10 Recruit Pathways:")
    for i, p in enumerate(top_recruit_pathways, 1):
        city_state = f"{p.get('home_city', 'Unknown')}, {p.get('home_state', 'Unknown')}"
        print(f"  {i}. {p['college']}: {p['count']} recruits from {city_state}")

if transfer_pathways:
    print(f"\nTransfer Pathways: {len(transfer_pathways)}")
    total_transfers = sum(p['count'] for p in transfer_pathways)
    print(f"Total Transfers: {total_transfers}")
    
    # Top transfer pathways
    top_transfer_pathways = sorted(transfer_pathways, key=lambda x: x['count'], reverse=True)[:10]
    print("\nTop 10 Transfer Pathways:")
    for i, p in enumerate(top_transfer_pathways, 1):
        print(f"  {i}. {p['prev_team']} → {p['new_team']}: {p['count']} transfers")

# College statistics
if recruit_pathways or transfer_pathways:
    college_stats = defaultdict(int)
    if recruit_pathways:
        for p in recruit_pathways:
            college_stats[p['college']] += p['count']
    if transfer_pathways:
        for p in transfer_pathways:
            college_stats[p['new_team']] += p['count']
    
    print(f"\nTop 10 Colleges by Total Activity:")
    top_colleges = sorted(college_stats.items(), key=lambda x: x[1], reverse=True)[:10]
    for i, (college, count) in enumerate(top_colleges, 1):
        print(f"  {i}. {college}: {count}")


=== STATISTICS ===

Year Range: 2004 - 2025
Selected College: BYU

Recruit Pathways: 327
Total Recruits: 744

Top 10 Recruit Pathways:
  1. BYU: 33 recruits from Provo, UT
  2. BYU: 24 recruits from Salt Lake City, UT
  3. BYU: 23 recruits from Orem, UT
  4. BYU: 21 recruits from South Jordan, UT
  5. BYU: 16 recruits from Sandy, UT
  6. BYU: 16 recruits from Draper, UT
  7. BYU: 15 recruits from St. George, UT
  8. BYU: 12 recruits from Alpine, UT
  9. BYU: 11 recruits from American Fork, UT
  10. BYU: 11 recruits from Pleasant Grove, UT

Transfer Pathways: 88
Total Transfers: 172

Top 10 Transfer Pathways:
  1. BYU → Utah State: 12 transfers
  2. Utah → BYU: 9 transfers
  3. Weber State → BYU: 9 transfers
  4. Utah State → BYU: 9 transfers
  5. Southern Utah → BYU: 7 transfers
  6. BYU → Southern Utah: 6 transfers
  7. BYU → Hawai'i: 6 transfers
  8. BYU → Utah: 5 transfers
  9. Hawai'i → BYU: 4 transfers
  10. Boise State → BYU: 4 transfers

Top 10 Colleges by Total Activity:
  1. B