# CBBD API Endpoint Tests
## ShotsDashboard Pipeline - Raw Data Validation

This notebook validates the 4 raw data sources for the pipeline:
1. `get_teams()` - Team reference data
2. `get_team_roster()` - Player info
3. `get_games()` - Game metadata
4. `get_plays_by_team()` - Play-by-play (**critical: check for `on_floor[]`**)

## Setup

In [None]:
import cbbd
import pandas as pd
import getpass
from cbbd.rest import ApiException

# Configuration
SEASON = 2026
TEST_TEAM = "Duke"  # Single team to minimize API calls

print(f"Test parameters: season={SEASON}, team='{TEST_TEAM}'")

In [None]:
# API Authentication
api_key = getpass.getpass("Enter your CBBD API key: ")

configuration = cbbd.Configuration(
    host="https://api.collegebasketballdata.com",
    access_token=api_key
)

print("✓ API configured")

---
## Test 1: get_teams()
Team reference data (id, name, conference, venue)

In [None]:
with cbbd.ApiClient(configuration) as api_client:
    teams_api = cbbd.TeamsApi(api_client)
    teams = teams_api.get_teams(season=SEASON)
    teams_df = pd.DataFrame([t.to_dict() for t in teams])

print(f"✓ Retrieved {len(teams_df)} teams")
print(f"\nColumns ({len(teams_df.columns)}):")
print(list(teams_df.columns))

In [None]:
# Sample teams data
teams_df[['id', 'school', 'conference', 'mascot', 'abbreviation']].head(10)

---
## Test 2: get_team_roster()
Player info (id, name, height, position)

In [None]:
with cbbd.ApiClient(configuration) as api_client:
    teams_api = cbbd.TeamsApi(api_client)
    rosters = teams_api.get_team_roster(season=SEASON, team=TEST_TEAM)

roster = rosters[0].to_dict()
print(f"✓ Retrieved roster for {roster['team']}")
print(f"\nRoster keys: {list(roster.keys())}")

In [None]:
# Player details
players_df = pd.DataFrame(roster['players'])
print(f"Players: {len(players_df)}")
print(f"\nPlayer columns: {list(players_df.columns)}")
players_df.head(10)

---
## Test 3: get_games()
Game metadata (id, date, teams, scores, venue)

In [None]:
with cbbd.ApiClient(configuration) as api_client:
    games_api = cbbd.GamesApi(api_client)
    games = games_api.get_games(season=SEASON, team=TEST_TEAM)
    games_df = pd.DataFrame([g.to_dict() for g in games])

print(f"✓ Retrieved {len(games_df)} games for {TEST_TEAM}")
print(f"\nColumns ({len(games_df.columns)}):")
print(list(games_df.columns))

In [None]:
# Sample games
display_cols = ['id', 'start_date', 'home_team', 'away_team', 'home_points', 'away_points', 'venue']
display_cols = [c for c in display_cols if c in games_df.columns]
games_df[display_cols].head(10)

---
## Test 4: get_plays_by_team() ⚠️ CRITICAL
Play-by-play data - **must verify `on_floor[]` exists for lineup tracking**

In [None]:
with cbbd.ApiClient(configuration) as api_client:
    plays_api = cbbd.PlaysApi(api_client)
    
    # Get shooting plays only to limit API usage
    plays = plays_api.get_plays_by_team(
        season=SEASON,
        team=TEST_TEAM,
        shooting_plays_only=True
    )
    plays_df = pd.DataFrame([p.to_dict() for p in plays])

print(f"✓ Retrieved {len(plays_df)} shooting plays for {TEST_TEAM}")
print(f"\nColumns ({len(plays_df.columns)}):")
for i, col in enumerate(plays_df.columns):
    print(f"  {i+1:2}. {col}")

In [None]:
# CRITICAL FIELD CHECKS
print("=" * 50)
print("CRITICAL FIELD VALIDATION")
print("=" * 50)

# Check for on_floor (might be camelCase or snake_case)
on_floor_col = None
for col in ['on_floor', 'onFloor']:
    if col in plays_df.columns:
        on_floor_col = col
        break

if on_floor_col:
    print(f"\n✓ on_floor: FOUND (column: '{on_floor_col}')")
    sample = plays_df[on_floor_col].dropna().head(1)
    if len(sample) > 0:
        print(f"  Sample: {sample.iloc[0]}")
else:
    print(f"\n✗ on_floor: NOT FOUND")
    print("  WARNING: Lineup tracking from PBP will not work!")

# Check for shot_info
shot_col = None
for col in ['shot_info', 'shotInfo']:
    if col in plays_df.columns:
        shot_col = col
        break

if shot_col:
    print(f"\n✓ shot_info: FOUND (column: '{shot_col}')")
    sample = plays_df[shot_col].dropna().head(1)
    if len(sample) > 0:
        print(f"  Sample: {sample.iloc[0]}")
else:
    print(f"\n✗ shot_info: NOT FOUND")

# Check for participants
if 'participants' in plays_df.columns:
    print(f"\n✓ participants: FOUND")
    sample = plays_df['participants'].dropna().head(1)
    if len(sample) > 0:
        print(f"  Sample: {sample.iloc[0]}")
else:
    print(f"\n✗ participants: NOT FOUND")

In [None]:
# Sample play data
sample_cols = ['id', 'gameId', 'period', 'clock', 'playType', 'playText', 'team']
sample_cols = [c for c in sample_cols if c in plays_df.columns]
plays_df[sample_cols].head(10)

In [None]:
# Inspect on_floor structure (if it exists)
if on_floor_col and on_floor_col in plays_df.columns:
    print("on_floor[] structure inspection:")
    print("=" * 50)
    
    # Get first non-null value
    for idx, val in plays_df[on_floor_col].items():
        if val is not None and len(val) > 0:
            print(f"\nPlay {idx}:")
            print(f"  Type: {type(val)}")
            print(f"  Length: {len(val)}")
            print(f"  Content:")
            for i, player in enumerate(val):
                print(f"    {i+1}. {player}")
            break
else:
    print("on_floor column not found - skipping inspection")

In [None]:
# Inspect shot_info structure
if shot_col and shot_col in plays_df.columns:
    print("shot_info structure inspection:")
    print("=" * 50)
    
    # Get first non-null value
    for idx, val in plays_df[shot_col].items():
        if val is not None:
            print(f"\nPlay {idx}:")
            print(f"  Type: {type(val)}")
            if isinstance(val, dict):
                for k, v in val.items():
                    print(f"  {k}: {v}")
            else:
                print(f"  Content: {val}")
            break

---
## Summary

In [None]:
print("=" * 50)
print("TEST SUMMARY")
print("=" * 50)

print(f"\n1. Teams:    {len(teams_df)} teams retrieved")
print(f"2. Roster:   {len(players_df)} players for {TEST_TEAM}")
print(f"3. Games:    {len(games_df)} games for {TEST_TEAM}")
print(f"4. Plays:    {len(plays_df)} shooting plays for {TEST_TEAM}")

print(f"\nCritical fields:")
print(f"  on_floor[]:   {'✓ FOUND' if on_floor_col else '✗ MISSING'}")
print(f"  shot_info:    {'✓ FOUND' if shot_col else '✗ MISSING'}")
print(f"  participants: {'✓ FOUND' if 'participants' in plays_df.columns else '✗ MISSING'}")

print("\n" + "=" * 50)
if on_floor_col:
    print("✓ Raw data layer validated - ready for transformations!")
else:
    print("⚠ on_floor missing - may need alternative lineup strategy")
print("=" * 50)