In [1]:
import pandas as pd
import requests
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get API key from environment
API_KEY = os.getenv('CFBD_API_KEY')

In [None]:
def get_all_recruiting_players(start_year: int = 2000, end_year: int = 2025):
    """
    Downloads every recruiting record from CFBD API directly.
    Combines into a single pandas DataFrame.
    """
    all_years = list(range(start_year, end_year + 1))
    all_dfs = []
    
    base_url = "https://api.collegefootballdata.com/recruiting/players"
    headers = {"Authorization": f"Bearer {API_KEY}"}

    for yr in all_years:
        try:
            params = {"year": yr}
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            
            data = response.json()
            if data:
                df = pd.DataFrame(data)
                df["class_year"] = yr
                all_dfs.append(df)
            else:
                print(f"No data for {yr}")
        except Exception as e:
            print(f"Error fetching {yr}: {e}")

    if not all_dfs:
        raise ValueError("No data retrieved! Check API access or network.")
    
    full_df = pd.concat(all_dfs, ignore_index=True)
    return full_df

In [None]:
df = get_all_recruiting_players(2000, 2025)
print(df.head())

       id athleteId recruitType  year  ranking            name  \
0  173252      None  HighSchool  2000        1   D.J. Williams   
1  173253      None  HighSchool  2000        2    Brock Berlin   
2  173254      None  HighSchool  2000        3  Charles Rogers   
3  173255      None  HighSchool  2000        4  Travis Johnson   
4  173256      None  HighSchool  2000        5  Marcus Houston   

                      school     committedTo position  height  weight  stars  \
0                De La Salle           Miami      ILB    74.0   235.0      5   
1  Evangel Christian Academy         Florida      PRO    74.0   190.0      5   
2                    Saginaw  Michigan State       WR    76.0   195.0      5   
3                 Notre Dame   Florida State      SDE    76.0   265.0      5   
4           Thomas Jefferson        Colorado       RB    72.0   208.0      5   

   rating          city stateProvince country  \
0  0.9998       Concord            CA     USA   
1  0.9998    Shreveport 

In [None]:
df = df.drop(columns=["id", "athleteId", "recruitType"], errors="ignore")

# Parse latitude/longitude from "hometownInfo" (if present) to new columns and then drop "hometownInfo"
def extract_lat_lon(hometown_info):
    if isinstance(hometown_info, dict):
        lat = hometown_info.get('latitude')
        lon = hometown_info.get('longitude')
        return lat, lon
    return None, None

if 'hometownInfo' in df.columns:
    df['latitude'], df['longitude'] = zip(*df['hometownInfo'].map(extract_lat_lon))
    df = df.drop(columns=["hometownInfo"])


df.to_csv('data/recruiting_data.csv', index=False)
