
# 🏀 UCSD Basketball XML → Smartabase CSV

This notebook processes raw NCAA box score XML files and converts **UCSD player data** into a **Smartabase-compatible CSV** file.

✅ Automatically maps to Smartabase field keys  
✅ Only includes UCSD players  
✅ Skips non-playing entries  
✅ Interactive file selection and clean output


In [None]:

import glob
import ipywidgets as widgets
from IPython.display import display

# List available XML files in current directory
xml_files = glob.glob("*.XML")

if not xml_files:
    print("⚠️ No XML files found. Please upload one via the JupyterLite file browser.")
else:
    file_dropdown = widgets.Dropdown(
        options=xml_files,
        description='Select XML:',
        disabled=False,
    )
    display(file_dropdown)


In [None]:

def process_ucsd_xml(xml_path):
    import xml.etree.ElementTree as ET
    import csv

    tree = ET.parse(xml_path)
    root = tree.getroot()

    venue = root.find('venue')
    game_id = venue.attrib.get('gameid') if venue is not None else ''

    csv_filename = 'ucsd_smartabase_upload.csv'
    csv_headers = [
        'first_name', 'last_name', 'gameid', 'gs',
        'fgm6', 'fga7', 'fgm38', 'fga39',
        'ftm10', 'fta11', 'tp12', 'blk13',
        'stl14', 'ast15', 'min16',
        'oreb17', 'dreb18', 'to22', 'pf'
    ]

    rows = []
    for team in root.findall('team'):
        if team.attrib.get('id') != 'UCSD':
            continue
        for player in team.findall('player'):
            name = player.attrib.get('name', '')
            last, first = name.split(',') if ',' in name else ('', name)
            row = {
                'first_name': first.strip(),
                'last_name': last.strip(),
                'gameid': game_id,
                'gs': player.attrib.get('gs', ''),
            }
            stats = player.find('stats')
            stat_map = {
                'fgm6': 'fgm', 'fga7': 'fga',
                'fgm38': 'fgm3', 'fga39': 'fga3',
                'ftm10': 'ftm', 'fta11': 'fta',
                'tp12': 'tp', 'blk13': 'blk',
                'stl14': 'stl', 'ast15': 'ast',
                'min16': 'min', 'oreb17': 'oreb',
                'dreb18': 'dreb', 'to22': 'to', 'pf': 'pf'
            }
            for csv_key, xml_key in stat_map.items():
                row[csv_key] = stats.attrib.get(xml_key, '') if stats is not None else ''
            rows.append(row)

    with open(csv_filename, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=csv_headers)
        writer.writeheader()
        writer.writerows(rows)

    print(f"✅ CSV file '{csv_filename}' generated.")


In [None]:

# Run conversion based on dropdown selection
if 'file_dropdown' in globals():
    process_ucsd_xml(file_dropdown.value)


In [None]:

from IPython.display import FileLink
FileLink("ucsd_smartabase_upload.csv")
