In [90]:
import os
import time
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from urllib.parse import urlparse
import re



In [91]:
def get_rendered_html(url: str, wait_secs: float = 5.0) -> str:
    """
    Launches headless Chrome (via webdriver-manager), navigates to `url`,
    waits `wait_secs` seconds for JS to populate the tables, then returns HTML.
    """
    chrome_opts = Options()
    chrome_opts.add_argument("--headless")
    chrome_opts.add_argument("--disable-gpu")
    chrome_opts.add_argument("--no-sandbox")
    chrome_opts.add_argument("--disable-dev-shm-usage")
    chrome_opts.add_argument("--window-size=1920,1080")
    
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_opts)
    
    driver.get(url)
    time.sleep(wait_secs)  # allow JS to finish
    html = driver.page_source
    driver.quit()
    return html


In [92]:
def parse_and_save(html: str, output_dir: str = "data/output_csvs") -> None:
    """
    Simplified parsing using pandas.read_html:
      - tables[0] → Team Stats
      - tables[1] → Overall Individual Stats
    Each DataFrame is saved as CSV in `output_dir`.
    """
    os.makedirs(output_dir, exist_ok=True)

    # Read all <table> elements from the rendered HTML
    tables = pd.read_html(html)

    # # ── 4.1 Save Team Stats (first table) ─────────────────────────────────────────
    # df_team = tables[0]
    # team_csv_path = os.path.join(output_dir, "team_stats.csv")
    # df_team.to_csv(team_csv_path, index=False)
    # print(f"→ Saved Team Stats → {team_csv_path}")

    # Save Overall Individual Stats (second table)
    df_indiv = tables[1]
    indiv_csv_path = os.path.join(output_dir, "individual_stats_overall.csv")
    df_indiv.to_csv(indiv_csv_path, index=False)
    print(f"→ Saved Overall Individual Stats → {indiv_csv_path}")


In [None]:
base_urls = [
    "https://gogriffs.com/sports/womens-basketball/stats",
    "https://fairfieldstags.com/sports/womens-basketball/stats",
    "https://ionagaels.com/sports/womens-basketball/stats",
    "https://gojaspers.com/sports/womens-basketball/stats",
    "https://goredfoxes.com/sports/womens-basketball/stats",
    "https://merrimackathletics.com/sports/womens-basketball/stats",
    "https://mountathletics.com/sports/womens-basketball/stats",
    "https://purpleeagles.com/sports/womens-basketball/stats",
    "https://gobobcats.com/sports/womens-basketball/stats",
    "https://gobroncs.com/sports/womens-basketball/stats",
    "https://sacredheartpioneers.com/sports/womens-basketball/stats",
    "https://saintpeterspeacocks.com/sports/womens-basketball/stats",
    "https://sienasaints.com/sports/womens-basketball/stats",
]

start_year = 2002
end_year   = 2024
seasons    = [f"{y}-{str(y+1)[-2:]}" for y in range(start_year, end_year+1)]

# Scraping loop 
for base_url in base_urls:
    college_folder = urlparse(base_url).netloc.replace(".", "_")

    for season in seasons:
        url = f"{base_url}/{season}"
        print(f"\nScraping {college_folder} for season {season} …")

        # Launch headless Chrome
        chrome_opts = Options()
        chrome_opts.add_argument("--headless")
        chrome_opts.add_argument("--disable-gpu")
        driver = webdriver.Chrome(
            service=Service(ChromeDriverManager().install()),
            options=chrome_opts
        )

        # Navigate and wait for JS
        try:
            driver.get(url)
            time.sleep(5)  # adjust if needed
        except Exception as e:
            print(f"Error loading page: {e}")
            driver.quit()
            continue

        # Check final URL for silent redirects
        final = driver.current_url.rstrip("/")
        if final != url.rstrip("/"):
            print(f"Redirected to {final!r}; skipping.")
            driver.quit()
            continue

        # Grab rendered HTML
        html = driver.page_source
        driver.quit()

        # Verify there's at least one stats table
        soup = BeautifulSoup(html, "html.parser")
        if not soup.find("table", class_="sidearm-table"):
            print("No stats table found; skipping.")
            continue

        # Parse & save CSVs
        out_dir = os.path.join("data/output_by_college", college_folder, season)
        print(f"-> Parsing & saving into '{out_dir}' …", end=" ")
        try:
            parse_and_save(html, out_dir)
            print("DONE")
        except Exception as e:
            print(f"parse_and_save error: {e}")

print("\n All done. CSVs saved to 'output_by_college'.")  


Scraping gogriffs_com for season 2002-03 …
Redirected to 'https://gogriffs.com/sports/womens-basketball/schedule'; skipping.

Scraping gogriffs_com for season 2003-04 …
Redirected to 'https://gogriffs.com/sports/womens-basketball/schedule'; skipping.

Scraping gogriffs_com for season 2004-05 …
Redirected to 'https://gogriffs.com/sports/womens-basketball/schedule'; skipping.

Scraping gogriffs_com for season 2005-06 …
Redirected to 'https://gogriffs.com/sports/womens-basketball/schedule'; skipping.

Scraping gogriffs_com for season 2006-07 …
Redirected to 'https://gogriffs.com/sports/womens-basketball/schedule'; skipping.

Scraping gogriffs_com for season 2007-08 …
Redirected to 'https://gogriffs.com/sports/womens-basketball/schedule'; skipping.

Scraping gogriffs_com for season 2008-09 …
Redirected to 'https://gogriffs.com/sports/womens-basketball/schedule'; skipping.

Scraping gogriffs_com for season 2009-10 …
Redirected to 'https://gogriffs.com/sports/womens-basketball/schedule'; sk

  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gogriffs_com/2015-16' … → Saved Overall Individual Stats → data/output_by_college/gogriffs_com/2015-16/individual_stats_overall.csv
DONE

Scraping gogriffs_com for season 2016-17 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gogriffs_com/2016-17' … → Saved Overall Individual Stats → data/output_by_college/gogriffs_com/2016-17/individual_stats_overall.csv
DONE

Scraping gogriffs_com for season 2017-18 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gogriffs_com/2017-18' … → Saved Overall Individual Stats → data/output_by_college/gogriffs_com/2017-18/individual_stats_overall.csv
DONE

Scraping gogriffs_com for season 2018-19 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gogriffs_com/2018-19' … → Saved Overall Individual Stats → data/output_by_college/gogriffs_com/2018-19/individual_stats_overall.csv
DONE

Scraping gogriffs_com for season 2019-20 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gogriffs_com/2019-20' … → Saved Overall Individual Stats → data/output_by_college/gogriffs_com/2019-20/individual_stats_overall.csv
DONE

Scraping gogriffs_com for season 2020-21 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gogriffs_com/2020-21' … → Saved Overall Individual Stats → data/output_by_college/gogriffs_com/2020-21/individual_stats_overall.csv
DONE

Scraping gogriffs_com for season 2021-22 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gogriffs_com/2021-22' … → Saved Overall Individual Stats → data/output_by_college/gogriffs_com/2021-22/individual_stats_overall.csv
DONE

Scraping gogriffs_com for season 2022-23 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gogriffs_com/2022-23' … → Saved Overall Individual Stats → data/output_by_college/gogriffs_com/2022-23/individual_stats_overall.csv
DONE

Scraping gogriffs_com for season 2023-24 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gogriffs_com/2023-24' … → Saved Overall Individual Stats → data/output_by_college/gogriffs_com/2023-24/individual_stats_overall.csv
DONE

Scraping gogriffs_com for season 2024-25 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gogriffs_com/2024-25' … → Saved Overall Individual Stats → data/output_by_college/gogriffs_com/2024-25/individual_stats_overall.csv
DONE

Scraping fairfieldstags_com for season 2002-03 …


  tables = pd.read_html(html)


Redirected to 'https://fairfieldstags.com/sports/womens-basketball/schedule'; skipping.

Scraping fairfieldstags_com for season 2003-04 …
Redirected to 'https://fairfieldstags.com/sports/womens-basketball/schedule'; skipping.

Scraping fairfieldstags_com for season 2004-05 …
Redirected to 'https://fairfieldstags.com/sports/womens-basketball/schedule'; skipping.

Scraping fairfieldstags_com for season 2005-06 …
Redirected to 'https://fairfieldstags.com/sports/womens-basketball/schedule'; skipping.

Scraping fairfieldstags_com for season 2006-07 …
Redirected to 'https://fairfieldstags.com/sports/womens-basketball/schedule'; skipping.

Scraping fairfieldstags_com for season 2007-08 …
Redirected to 'https://fairfieldstags.com/sports/womens-basketball/schedule'; skipping.

Scraping fairfieldstags_com for season 2008-09 …
Redirected to 'https://fairfieldstags.com/sports/womens-basketball/schedule'; skipping.

Scraping fairfieldstags_com for season 2009-10 …
Redirected to 'https://fairfieldst

  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/fairfieldstags_com/2015-16' … → Saved Overall Individual Stats → data/output_by_college/fairfieldstags_com/2015-16/individual_stats_overall.csv
DONE

Scraping fairfieldstags_com for season 2016-17 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/fairfieldstags_com/2016-17' … → Saved Overall Individual Stats → data/output_by_college/fairfieldstags_com/2016-17/individual_stats_overall.csv
DONE

Scraping fairfieldstags_com for season 2017-18 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/fairfieldstags_com/2017-18' … → Saved Overall Individual Stats → data/output_by_college/fairfieldstags_com/2017-18/individual_stats_overall.csv
DONE

Scraping fairfieldstags_com for season 2018-19 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/fairfieldstags_com/2018-19' … → Saved Overall Individual Stats → data/output_by_college/fairfieldstags_com/2018-19/individual_stats_overall.csv
DONE

Scraping fairfieldstags_com for season 2019-20 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/fairfieldstags_com/2019-20' … → Saved Overall Individual Stats → data/output_by_college/fairfieldstags_com/2019-20/individual_stats_overall.csv
DONE

Scraping fairfieldstags_com for season 2020-21 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/fairfieldstags_com/2020-21' … → Saved Overall Individual Stats → data/output_by_college/fairfieldstags_com/2020-21/individual_stats_overall.csv
DONE

Scraping fairfieldstags_com for season 2021-22 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/fairfieldstags_com/2021-22' … → Saved Overall Individual Stats → data/output_by_college/fairfieldstags_com/2021-22/individual_stats_overall.csv
DONE

Scraping fairfieldstags_com for season 2022-23 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/fairfieldstags_com/2022-23' … → Saved Overall Individual Stats → data/output_by_college/fairfieldstags_com/2022-23/individual_stats_overall.csv
DONE

Scraping fairfieldstags_com for season 2023-24 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/fairfieldstags_com/2023-24' … → Saved Overall Individual Stats → data/output_by_college/fairfieldstags_com/2023-24/individual_stats_overall.csv
DONE

Scraping fairfieldstags_com for season 2024-25 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/fairfieldstags_com/2024-25' … → Saved Overall Individual Stats → data/output_by_college/fairfieldstags_com/2024-25/individual_stats_overall.csv
DONE

Scraping ionagaels_com for season 2002-03 …


  tables = pd.read_html(html)


Redirected to 'https://ionagaels.com/sports/womens-basketball/schedule'; skipping.

Scraping ionagaels_com for season 2003-04 …
Redirected to 'https://ionagaels.com/sports/womens-basketball/schedule'; skipping.

Scraping ionagaels_com for season 2004-05 …
Redirected to 'https://ionagaels.com/sports/womens-basketball/schedule'; skipping.

Scraping ionagaels_com for season 2005-06 …
Redirected to 'https://ionagaels.com/sports/womens-basketball/schedule'; skipping.

Scraping ionagaels_com for season 2006-07 …
Redirected to 'https://ionagaels.com/sports/womens-basketball/schedule'; skipping.

Scraping ionagaels_com for season 2007-08 …
Redirected to 'https://ionagaels.com/sports/womens-basketball/schedule'; skipping.

Scraping ionagaels_com for season 2008-09 …
Redirected to 'https://ionagaels.com/sports/womens-basketball/schedule'; skipping.

Scraping ionagaels_com for season 2009-10 …
Redirected to 'https://ionagaels.com/sports/womens-basketball/schedule'; skipping.

Scraping ionagaels_c

  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/ionagaels_com/2015-16' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/ionagaels_com/2015-16/individual_stats_overall.csv
DONE

Scraping ionagaels_com for season 2016-17 …
-> Parsing & saving into 'data/output_by_college/ionagaels_com/2016-17' … → Saved Overall Individual Stats → data/output_by_college/ionagaels_com/2016-17/individual_stats_overall.csv
DONE

Scraping ionagaels_com for season 2017-18 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/ionagaels_com/2017-18' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/ionagaels_com/2017-18/individual_stats_overall.csv
DONE

Scraping ionagaels_com for season 2018-19 …
-> Parsing & saving into 'data/output_by_college/ionagaels_com/2018-19' … → Saved Overall Individual Stats → data/output_by_college/ionagaels_com/2018-19/individual_stats_overall.csv
DONE

Scraping ionagaels_com for season 2019-20 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/ionagaels_com/2019-20' … → Saved Overall Individual Stats → data/output_by_college/ionagaels_com/2019-20/individual_stats_overall.csv
DONE

Scraping ionagaels_com for season 2020-21 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/ionagaels_com/2020-21' … → Saved Overall Individual Stats → data/output_by_college/ionagaels_com/2020-21/individual_stats_overall.csv
DONE

Scraping ionagaels_com for season 2021-22 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/ionagaels_com/2021-22' … → Saved Overall Individual Stats → data/output_by_college/ionagaels_com/2021-22/individual_stats_overall.csv
DONE

Scraping ionagaels_com for season 2022-23 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/ionagaels_com/2022-23' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/ionagaels_com/2022-23/individual_stats_overall.csv
DONE

Scraping ionagaels_com for season 2023-24 …
-> Parsing & saving into 'data/output_by_college/ionagaels_com/2023-24' … → Saved Overall Individual Stats → data/output_by_college/ionagaels_com/2023-24/individual_stats_overall.csv
DONE

Scraping ionagaels_com for season 2024-25 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/ionagaels_com/2024-25' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/ionagaels_com/2024-25/individual_stats_overall.csv
DONE

Scraping gojaspers_com for season 2002-03 …
Redirected to 'https://gojaspers.com/sports/womens-basketball/schedule'; skipping.

Scraping gojaspers_com for season 2003-04 …
Redirected to 'https://gojaspers.com/sports/womens-basketball/schedule'; skipping.

Scraping gojaspers_com for season 2004-05 …
Redirected to 'https://gojaspers.com/sports/womens-basketball/schedule'; skipping.

Scraping gojaspers_com for season 2005-06 …
Redirected to 'https://gojaspers.com/sports/womens-basketball/schedule'; skipping.

Scraping gojaspers_com for season 2006-07 …
Redirected to 'https://gojaspers.com/sports/womens-basketball/schedule'; skipping.

Scraping gojaspers_com for season 2007-08 …
Redirected to 'https://gojaspers.com/sports/womens-basketball/schedule'; skipping.

Scraping gojaspers_com for season 2008-09 …
Redirected to 'https://gojaspers.com/sports/womens-basketball/schedule'; 

  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gojaspers_com/2014-15' … → Saved Overall Individual Stats → data/output_by_college/gojaspers_com/2014-15/individual_stats_overall.csv
DONE

Scraping gojaspers_com for season 2015-16 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gojaspers_com/2015-16' … → Saved Overall Individual Stats → data/output_by_college/gojaspers_com/2015-16/individual_stats_overall.csv
DONE

Scraping gojaspers_com for season 2016-17 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gojaspers_com/2016-17' … → Saved Overall Individual Stats → data/output_by_college/gojaspers_com/2016-17/individual_stats_overall.csv
DONE

Scraping gojaspers_com for season 2017-18 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gojaspers_com/2017-18' … → Saved Overall Individual Stats → data/output_by_college/gojaspers_com/2017-18/individual_stats_overall.csv
DONE

Scraping gojaspers_com for season 2018-19 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gojaspers_com/2018-19' … → Saved Overall Individual Stats → data/output_by_college/gojaspers_com/2018-19/individual_stats_overall.csv
DONE

Scraping gojaspers_com for season 2019-20 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gojaspers_com/2019-20' … → Saved Overall Individual Stats → data/output_by_college/gojaspers_com/2019-20/individual_stats_overall.csv
DONE

Scraping gojaspers_com for season 2020-21 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gojaspers_com/2020-21' … → Saved Overall Individual Stats → data/output_by_college/gojaspers_com/2020-21/individual_stats_overall.csv
DONE

Scraping gojaspers_com for season 2021-22 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gojaspers_com/2021-22' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/gojaspers_com/2021-22/individual_stats_overall.csv
DONE

Scraping gojaspers_com for season 2022-23 …
-> Parsing & saving into 'data/output_by_college/gojaspers_com/2022-23' … → Saved Overall Individual Stats → data/output_by_college/gojaspers_com/2022-23/individual_stats_overall.csv
DONE

Scraping gojaspers_com for season 2023-24 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gojaspers_com/2023-24' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/gojaspers_com/2023-24/individual_stats_overall.csv
DONE

Scraping gojaspers_com for season 2024-25 …
-> Parsing & saving into 'data/output_by_college/gojaspers_com/2024-25' … → Saved Overall Individual Stats → data/output_by_college/gojaspers_com/2024-25/individual_stats_overall.csv
DONE

Scraping goredfoxes_com for season 2002-03 …


  tables = pd.read_html(html)


Redirected to 'https://goredfoxes.com/sports/womens-basketball/schedule'; skipping.

Scraping goredfoxes_com for season 2003-04 …
Redirected to 'https://goredfoxes.com/sports/womens-basketball/schedule'; skipping.

Scraping goredfoxes_com for season 2004-05 …
Redirected to 'https://goredfoxes.com/sports/womens-basketball/schedule'; skipping.

Scraping goredfoxes_com for season 2005-06 …
Redirected to 'https://goredfoxes.com/sports/womens-basketball/schedule'; skipping.

Scraping goredfoxes_com for season 2006-07 …
Redirected to 'https://goredfoxes.com/sports/womens-basketball/schedule'; skipping.

Scraping goredfoxes_com for season 2007-08 …
Redirected to 'https://goredfoxes.com/sports/womens-basketball/schedule'; skipping.

Scraping goredfoxes_com for season 2008-09 …
Redirected to 'https://goredfoxes.com/sports/womens-basketball/schedule'; skipping.

Scraping goredfoxes_com for season 2009-10 …
Redirected to 'https://goredfoxes.com/sports/womens-basketball/schedule'; skipping.

Scrap

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/goredfoxes_com/2014-15/individual_stats_overall.csv
DONE

Scraping goredfoxes_com for season 2015-16 …
-> Parsing & saving into 'data/output_by_college/goredfoxes_com/2015-16' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/goredfoxes_com/2015-16/individual_stats_overall.csv
DONE

Scraping goredfoxes_com for season 2016-17 …
-> Parsing & saving into 'data/output_by_college/goredfoxes_com/2016-17' … → Saved Overall Individual Stats → data/output_by_college/goredfoxes_com/2016-17/individual_stats_overall.csv
DONE

Scraping goredfoxes_com for season 2017-18 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/goredfoxes_com/2017-18' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/goredfoxes_com/2017-18/individual_stats_overall.csv
DONE

Scraping goredfoxes_com for season 2018-19 …
-> Parsing & saving into 'data/output_by_college/goredfoxes_com/2018-19' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/goredfoxes_com/2018-19/individual_stats_overall.csv
DONE

Scraping goredfoxes_com for season 2019-20 …
-> Parsing & saving into 'data/output_by_college/goredfoxes_com/2019-20' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/goredfoxes_com/2019-20/individual_stats_overall.csv
DONE

Scraping goredfoxes_com for season 2020-21 …
-> Parsing & saving into 'data/output_by_college/goredfoxes_com/2020-21' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/goredfoxes_com/2020-21/individual_stats_overall.csv
DONE

Scraping goredfoxes_com for season 2021-22 …
-> Parsing & saving into 'data/output_by_college/goredfoxes_com/2021-22' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/goredfoxes_com/2021-22/individual_stats_overall.csv
DONE

Scraping goredfoxes_com for season 2022-23 …
-> Parsing & saving into 'data/output_by_college/goredfoxes_com/2022-23' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/goredfoxes_com/2022-23/individual_stats_overall.csv
DONE

Scraping goredfoxes_com for season 2023-24 …
-> Parsing & saving into 'data/output_by_college/goredfoxes_com/2023-24' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/goredfoxes_com/2023-24/individual_stats_overall.csv
DONE

Scraping goredfoxes_com for season 2024-25 …
-> Parsing & saving into 'data/output_by_college/goredfoxes_com/2024-25' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/goredfoxes_com/2024-25/individual_stats_overall.csv
DONE

Scraping merrimackathletics_com for season 2002-03 …
Redirected to 'https://merrimackathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping merrimackathletics_com for season 2003-04 …
Redirected to 'https://merrimackathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping merrimackathletics_com for season 2004-05 …
Redirected to 'https://merrimackathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping merrimackathletics_com for season 2005-06 …
Redirected to 'https://merrimackathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping merrimackathletics_com for season 2006-07 …
Redirected to 'https://merrimackathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping merrimackathletics_com for season 2007-08 …
Redirected to 'https://merrimackathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping

  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/merrimackathletics_com/2018-19' … → Saved Overall Individual Stats → data/output_by_college/merrimackathletics_com/2018-19/individual_stats_overall.csv
DONE

Scraping merrimackathletics_com for season 2019-20 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/merrimackathletics_com/2019-20' … → Saved Overall Individual Stats → data/output_by_college/merrimackathletics_com/2019-20/individual_stats_overall.csv
DONE

Scraping merrimackathletics_com for season 2020-21 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/merrimackathletics_com/2020-21' … → Saved Overall Individual Stats → data/output_by_college/merrimackathletics_com/2020-21/individual_stats_overall.csv
DONE

Scraping merrimackathletics_com for season 2021-22 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/merrimackathletics_com/2021-22' … → Saved Overall Individual Stats → data/output_by_college/merrimackathletics_com/2021-22/individual_stats_overall.csv
DONE

Scraping merrimackathletics_com for season 2022-23 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/merrimackathletics_com/2022-23' … → Saved Overall Individual Stats → data/output_by_college/merrimackathletics_com/2022-23/individual_stats_overall.csv
DONE

Scraping merrimackathletics_com for season 2023-24 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/merrimackathletics_com/2023-24' … → Saved Overall Individual Stats → data/output_by_college/merrimackathletics_com/2023-24/individual_stats_overall.csv
DONE

Scraping merrimackathletics_com for season 2024-25 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/merrimackathletics_com/2024-25' … → Saved Overall Individual Stats → data/output_by_college/merrimackathletics_com/2024-25/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2002-03 …


  tables = pd.read_html(html)


Redirected to 'https://mountathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping mountathletics_com for season 2003-04 …
Redirected to 'https://mountathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping mountathletics_com for season 2004-05 …
Redirected to 'https://mountathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping mountathletics_com for season 2005-06 …
Redirected to 'https://mountathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping mountathletics_com for season 2006-07 …
Redirected to 'https://mountathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping mountathletics_com for season 2007-08 …
Redirected to 'https://mountathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping mountathletics_com for season 2008-09 …
Redirected to 'https://mountathletics.com/sports/womens-basketball/schedule'; skipping.

Scraping mountathletics_com for season 2009-10 …
Redirected to 'https://mountathlet

  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/mountathletics_com/2011-12' … → Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2011-12/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2012-13 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/mountathletics_com/2012-13' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2012-13/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2013-14 …
-> Parsing & saving into 'data/output_by_college/mountathletics_com/2013-14' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2013-14/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2014-15 …
-> Parsing & saving into 'data/output_by_college/mountathletics_com/2014-15' … → Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2014-15/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2015-16 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/mountathletics_com/2015-16' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2015-16/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2016-17 …
-> Parsing & saving into 'data/output_by_college/mountathletics_com/2016-17' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2016-17/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2017-18 …
-> Parsing & saving into 'data/output_by_college/mountathletics_com/2017-18' … → Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2017-18/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2018-19 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/mountathletics_com/2018-19' … → Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2018-19/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2019-20 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/mountathletics_com/2019-20' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2019-20/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2020-21 …
-> Parsing & saving into 'data/output_by_college/mountathletics_com/2020-21' … → Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2020-21/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2021-22 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/mountathletics_com/2021-22' … → Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2021-22/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2022-23 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/mountathletics_com/2022-23' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2022-23/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2023-24 …
-> Parsing & saving into 'data/output_by_college/mountathletics_com/2023-24' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2023-24/individual_stats_overall.csv
DONE

Scraping mountathletics_com for season 2024-25 …
-> Parsing & saving into 'data/output_by_college/mountathletics_com/2024-25' … → Saved Overall Individual Stats → data/output_by_college/mountathletics_com/2024-25/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2002-03 …


  tables = pd.read_html(html)


Redirected to 'https://purpleeagles.com/sports/womens-basketball/schedule'; skipping.

Scraping purpleeagles_com for season 2003-04 …
Redirected to 'https://purpleeagles.com/sports/womens-basketball/schedule'; skipping.

Scraping purpleeagles_com for season 2004-05 …
Redirected to 'https://purpleeagles.com/sports/womens-basketball/schedule'; skipping.

Scraping purpleeagles_com for season 2005-06 …
Redirected to 'https://purpleeagles.com/sports/womens-basketball/schedule'; skipping.

Scraping purpleeagles_com for season 2006-07 …
Redirected to 'https://purpleeagles.com/sports/womens-basketball/schedule'; skipping.

Scraping purpleeagles_com for season 2007-08 …
Redirected to 'https://purpleeagles.com/sports/womens-basketball/schedule'; skipping.

Scraping purpleeagles_com for season 2008-09 …
Redirected to 'https://purpleeagles.com/sports/womens-basketball/schedule'; skipping.

Scraping purpleeagles_com for season 2009-10 …
Redirected to 'https://purpleeagles.com/sports/womens-basketba

  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2013-14' … → Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2013-14/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2014-15 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2014-15' … → Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2014-15/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2015-16 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2015-16' … → Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2015-16/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2016-17 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2016-17' … → Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2016-17/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2017-18 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2017-18' … → Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2017-18/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2018-19 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2018-19' … → Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2018-19/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2019-20 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2019-20' … → Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2019-20/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2020-21 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2020-21' … → Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2020-21/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2021-22 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2021-22' … → Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2021-22/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2022-23 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2022-23' … → Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2022-23/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2023-24 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2023-24' … → Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2023-24/individual_stats_overall.csv
DONE

Scraping purpleeagles_com for season 2024-25 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/purpleeagles_com/2024-25' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/purpleeagles_com/2024-25/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2002-03 …
-> Parsing & saving into 'data/output_by_college/gobobcats_com/2002-03' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2002-03/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2003-04 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2003-04' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2003-04/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2004-05 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2004-05' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2004-05/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2005-06 …


  tables = pd.read_html(html)


Redirected to 'https://gobobcats.com/sports/womens-basketball/schedule'; skipping.

Scraping gobobcats_com for season 2006-07 …
-> Parsing & saving into 'data/output_by_college/gobobcats_com/2006-07' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2006-07/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2007-08 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2007-08' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2007-08/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2008-09 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2008-09' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2008-09/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2009-10 …


  tables = pd.read_html(html)


Redirected to 'https://gobobcats.com/sports/womens-basketball/schedule'; skipping.

Scraping gobobcats_com for season 2010-11 …
-> Parsing & saving into 'data/output_by_college/gobobcats_com/2010-11' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2010-11/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2011-12 …


  tables = pd.read_html(html)


Redirected to 'https://gobobcats.com/sports/womens-basketball/schedule'; skipping.

Scraping gobobcats_com for season 2012-13 …
Redirected to 'https://gobobcats.com/sports/womens-basketball/schedule'; skipping.

Scraping gobobcats_com for season 2013-14 …
Redirected to 'https://gobobcats.com/sports/womens-basketball/schedule'; skipping.

Scraping gobobcats_com for season 2014-15 …
Redirected to 'https://gobobcats.com/sports/womens-basketball/schedule'; skipping.

Scraping gobobcats_com for season 2015-16 …
-> Parsing & saving into 'data/output_by_college/gobobcats_com/2015-16' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2015-16/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2016-17 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2016-17' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2016-17/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2017-18 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2017-18' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2017-18/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2018-19 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2018-19' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2018-19/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2019-20 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2019-20' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2019-20/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2020-21 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2020-21' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2020-21/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2021-22 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2021-22' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2021-22/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2022-23 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2022-23' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2022-23/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2023-24 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2023-24' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2023-24/individual_stats_overall.csv
DONE

Scraping gobobcats_com for season 2024-25 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobobcats_com/2024-25' … → Saved Overall Individual Stats → data/output_by_college/gobobcats_com/2024-25/individual_stats_overall.csv
DONE

Scraping gobroncs_com for season 2002-03 …


  tables = pd.read_html(html)


Redirected to 'https://gobroncs.com/sports/womens-basketball/schedule'; skipping.

Scraping gobroncs_com for season 2003-04 …
Redirected to 'https://gobroncs.com/sports/womens-basketball/schedule'; skipping.

Scraping gobroncs_com for season 2004-05 …
Redirected to 'https://gobroncs.com/sports/womens-basketball/schedule'; skipping.

Scraping gobroncs_com for season 2005-06 …
Redirected to 'https://gobroncs.com/sports/womens-basketball/schedule'; skipping.

Scraping gobroncs_com for season 2006-07 …
Redirected to 'https://gobroncs.com/sports/womens-basketball/schedule'; skipping.

Scraping gobroncs_com for season 2007-08 …
Redirected to 'https://gobroncs.com/sports/womens-basketball/schedule'; skipping.

Scraping gobroncs_com for season 2008-09 …
Redirected to 'https://gobroncs.com/sports/womens-basketball/schedule'; skipping.

Scraping gobroncs_com for season 2009-10 …
Redirected to 'https://gobroncs.com/sports/womens-basketball/schedule'; skipping.

Scraping gobroncs_com for season 20

  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobroncs_com/2014-15' … → Saved Overall Individual Stats → data/output_by_college/gobroncs_com/2014-15/individual_stats_overall.csv
DONE

Scraping gobroncs_com for season 2015-16 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobroncs_com/2015-16' … → Saved Overall Individual Stats → data/output_by_college/gobroncs_com/2015-16/individual_stats_overall.csv
DONE

Scraping gobroncs_com for season 2016-17 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobroncs_com/2016-17' … → Saved Overall Individual Stats → data/output_by_college/gobroncs_com/2016-17/individual_stats_overall.csv
DONE

Scraping gobroncs_com for season 2017-18 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobroncs_com/2017-18' … → Saved Overall Individual Stats → data/output_by_college/gobroncs_com/2017-18/individual_stats_overall.csv
DONE

Scraping gobroncs_com for season 2018-19 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobroncs_com/2018-19' … → Saved Overall Individual Stats → data/output_by_college/gobroncs_com/2018-19/individual_stats_overall.csv
DONE

Scraping gobroncs_com for season 2019-20 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobroncs_com/2019-20' … → Saved Overall Individual Stats → data/output_by_college/gobroncs_com/2019-20/individual_stats_overall.csv
DONE

Scraping gobroncs_com for season 2020-21 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobroncs_com/2020-21' … → Saved Overall Individual Stats → data/output_by_college/gobroncs_com/2020-21/individual_stats_overall.csv
DONE

Scraping gobroncs_com for season 2021-22 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobroncs_com/2021-22' … → Saved Overall Individual Stats → data/output_by_college/gobroncs_com/2021-22/individual_stats_overall.csv
DONE

Scraping gobroncs_com for season 2022-23 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobroncs_com/2022-23' … → Saved Overall Individual Stats → data/output_by_college/gobroncs_com/2022-23/individual_stats_overall.csv
DONE

Scraping gobroncs_com for season 2023-24 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobroncs_com/2023-24' … → Saved Overall Individual Stats → data/output_by_college/gobroncs_com/2023-24/individual_stats_overall.csv
DONE

Scraping gobroncs_com for season 2024-25 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/gobroncs_com/2024-25' … → Saved Overall Individual Stats → data/output_by_college/gobroncs_com/2024-25/individual_stats_overall.csv
DONE

Scraping sacredheartpioneers_com for season 2002-03 …


  tables = pd.read_html(html)


Redirected to 'https://sacredheartpioneers.com/sports/womens-basketball/schedule'; skipping.

Scraping sacredheartpioneers_com for season 2003-04 …
Redirected to 'https://sacredheartpioneers.com/sports/womens-basketball/schedule'; skipping.

Scraping sacredheartpioneers_com for season 2004-05 …
Redirected to 'https://sacredheartpioneers.com/sports/womens-basketball/schedule'; skipping.

Scraping sacredheartpioneers_com for season 2005-06 …
Redirected to 'https://sacredheartpioneers.com/sports/womens-basketball/schedule'; skipping.

Scraping sacredheartpioneers_com for season 2006-07 …
Redirected to 'https://sacredheartpioneers.com/sports/womens-basketball/schedule'; skipping.

Scraping sacredheartpioneers_com for season 2007-08 …
Redirected to 'https://sacredheartpioneers.com/sports/womens-basketball/schedule'; skipping.

Scraping sacredheartpioneers_com for season 2008-09 …
Redirected to 'https://sacredheartpioneers.com/sports/womens-basketball/schedule'; skipping.

Scraping sacredhea

  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sacredheartpioneers_com/2017-18' … → Saved Overall Individual Stats → data/output_by_college/sacredheartpioneers_com/2017-18/individual_stats_overall.csv
DONE

Scraping sacredheartpioneers_com for season 2018-19 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sacredheartpioneers_com/2018-19' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/sacredheartpioneers_com/2018-19/individual_stats_overall.csv
DONE

Scraping sacredheartpioneers_com for season 2019-20 …
-> Parsing & saving into 'data/output_by_college/sacredheartpioneers_com/2019-20' … → Saved Overall Individual Stats → data/output_by_college/sacredheartpioneers_com/2019-20/individual_stats_overall.csv
DONE

Scraping sacredheartpioneers_com for season 2020-21 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sacredheartpioneers_com/2020-21' … → Saved Overall Individual Stats → data/output_by_college/sacredheartpioneers_com/2020-21/individual_stats_overall.csv
DONE

Scraping sacredheartpioneers_com for season 2021-22 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sacredheartpioneers_com/2021-22' … → Saved Overall Individual Stats → data/output_by_college/sacredheartpioneers_com/2021-22/individual_stats_overall.csv
DONE

Scraping sacredheartpioneers_com for season 2022-23 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sacredheartpioneers_com/2022-23' … → Saved Overall Individual Stats → data/output_by_college/sacredheartpioneers_com/2022-23/individual_stats_overall.csv
DONE

Scraping sacredheartpioneers_com for season 2023-24 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sacredheartpioneers_com/2023-24' … → Saved Overall Individual Stats → data/output_by_college/sacredheartpioneers_com/2023-24/individual_stats_overall.csv
DONE

Scraping sacredheartpioneers_com for season 2024-25 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sacredheartpioneers_com/2024-25' … → Saved Overall Individual Stats → data/output_by_college/sacredheartpioneers_com/2024-25/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2002-03 …


  tables = pd.read_html(html)


Redirected to 'https://saintpeterspeacocks.com/sports/womens-basketball/schedule'; skipping.

Scraping saintpeterspeacocks_com for season 2003-04 …
Redirected to 'https://saintpeterspeacocks.com/sports/womens-basketball/schedule'; skipping.

Scraping saintpeterspeacocks_com for season 2004-05 …
Redirected to 'https://saintpeterspeacocks.com/sports/womens-basketball/schedule'; skipping.

Scraping saintpeterspeacocks_com for season 2005-06 …
Redirected to 'https://saintpeterspeacocks.com/sports/womens-basketball/schedule'; skipping.

Scraping saintpeterspeacocks_com for season 2006-07 …
Redirected to 'https://saintpeterspeacocks.com/sports/womens-basketball/schedule'; skipping.

Scraping saintpeterspeacocks_com for season 2007-08 …
Redirected to 'https://saintpeterspeacocks.com/sports/womens-basketball/schedule'; skipping.

Scraping saintpeterspeacocks_com for season 2008-09 …
Redirected to 'https://saintpeterspeacocks.com/sports/womens-basketball/schedule'; skipping.

Scraping saintpete

  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2012-13' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2012-13/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2013-14 …
-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2013-14' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2013-14/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2014-15 …
-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2014-15' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2014-15/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2015-16 …
-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2015-16' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2015-16/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2016-17 …
-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2016-17' … → Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2016-17/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2017-18 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2017-18' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2017-18/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2018-19 …
-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2018-19' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2018-19/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2019-20 …
-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2019-20' … → Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2019-20/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2020-21 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2020-21' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2020-21/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2021-22 …
-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2021-22' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2021-22/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2022-23 …
-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2022-23' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2022-23/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2023-24 …
-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2023-24' … → Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2023-24/individual_stats_overall.csv
DONE

Scraping saintpeterspeacocks_com for season 2024-25 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/saintpeterspeacocks_com/2024-25' … 

  tables = pd.read_html(html)


→ Saved Overall Individual Stats → data/output_by_college/saintpeterspeacocks_com/2024-25/individual_stats_overall.csv
DONE

Scraping sienasaints_com for season 2002-03 …
Redirected to 'https://sienasaints.com/sports/womens-basketball/schedule'; skipping.

Scraping sienasaints_com for season 2003-04 …
Redirected to 'https://sienasaints.com/sports/womens-basketball/schedule'; skipping.

Scraping sienasaints_com for season 2004-05 …
Redirected to 'https://sienasaints.com/sports/womens-basketball/schedule'; skipping.

Scraping sienasaints_com for season 2005-06 …
Redirected to 'https://sienasaints.com/sports/womens-basketball/schedule'; skipping.

Scraping sienasaints_com for season 2006-07 …
Redirected to 'https://sienasaints.com/sports/womens-basketball/schedule'; skipping.

Scraping sienasaints_com for season 2007-08 …
Redirected to 'https://sienasaints.com/sports/womens-basketball/schedule'; skipping.

Scraping sienasaints_com for season 2008-09 …
Redirected to 'https://sienasaints.co

  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sienasaints_com/2019-20' … → Saved Overall Individual Stats → data/output_by_college/sienasaints_com/2019-20/individual_stats_overall.csv
DONE

Scraping sienasaints_com for season 2020-21 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sienasaints_com/2020-21' … → Saved Overall Individual Stats → data/output_by_college/sienasaints_com/2020-21/individual_stats_overall.csv
DONE

Scraping sienasaints_com for season 2021-22 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sienasaints_com/2021-22' … → Saved Overall Individual Stats → data/output_by_college/sienasaints_com/2021-22/individual_stats_overall.csv
DONE

Scraping sienasaints_com for season 2022-23 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sienasaints_com/2022-23' … → Saved Overall Individual Stats → data/output_by_college/sienasaints_com/2022-23/individual_stats_overall.csv
DONE

Scraping sienasaints_com for season 2023-24 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sienasaints_com/2023-24' … → Saved Overall Individual Stats → data/output_by_college/sienasaints_com/2023-24/individual_stats_overall.csv
DONE

Scraping sienasaints_com for season 2024-25 …


  tables = pd.read_html(html)


-> Parsing & saving into 'data/output_by_college/sienasaints_com/2024-25' … → Saved Overall Individual Stats → data/output_by_college/sienasaints_com/2024-25/individual_stats_overall.csv
DONE

 All done. CSVs saved to 'output_by_college'.


  tables = pd.read_html(html)


In [102]:

def get_all_csv_paths(root_folder):
    team_paths = []
    individual_paths = []
    for dirpath, dirnames, filenames in os.walk(root_folder):
        for file in filenames:
            if file.endswith('.csv') and 'individual_stats' in file.lower(): 
                    individual_paths.append(os.path.join(dirpath, file))
    return individual_paths

folder = "data/output_by_college"
individual_paths = get_all_csv_paths(folder)
print(individual_paths)

['data/output_by_college/gojaspers_com/2017-18/individual_stats_overall.csv', 'data/output_by_college/gojaspers_com/2016-17/individual_stats_overall.csv', 'data/output_by_college/gojaspers_com/2019-20/individual_stats_overall.csv', 'data/output_by_college/gojaspers_com/2021-22/individual_stats_overall.csv', 'data/output_by_college/gojaspers_com/2018-19/individual_stats_overall.csv', 'data/output_by_college/gojaspers_com/2024-25/individual_stats_overall.csv', 'data/output_by_college/gojaspers_com/2014-15/individual_stats_overall.csv', 'data/output_by_college/gojaspers_com/2023-24/individual_stats_overall.csv', 'data/output_by_college/gojaspers_com/2013-14/individual_stats_overall.csv', 'data/output_by_college/gojaspers_com/2022-23/individual_stats_overall.csv', 'data/output_by_college/gojaspers_com/2015-16/individual_stats_overall.csv', 'data/output_by_college/gojaspers_com/2020-21/individual_stats_overall.csv', 'data/output_by_college/mountathletics_com/2017-18/individual_stats_overall

In [103]:
new_root = "data/output_by_college_clean"
original_root = "data/output_by_college"

In [104]:
# helper to clean "Last, First 15 Last, First" → "First Last"
def normalize_name(val):
    # look for “NN  Last, First”
    m = re.search(r'\d+\s*(?P<last>[^,]+),\s*(?P<first>\S+)', val)
    if m:
        return f"{m.group('first')} {m.group('last')}"
    # fallback: just “Last, First”
    if ',' in val:
        last, first = [x.strip() for x in val.split(',', 1)]
        return f"{first} {last}"
    return val

In [105]:
# Individual Stats Processing
for path in individual_paths:
    print(f"Processing {path} ...")
    # Read in your CSV without parsing any header rows:
    df_ind = pd.read_csv(path, header=None)

    # Grab the first two rows as strings:
    row0 = df_ind.iloc[0].astype(str)
    row1 = df_ind.iloc[1].astype(str)

    # Build a new list of column names:
    new_cols = []
    for a, b in zip(row0, row1):
        a, b = a.strip(), b.strip()
        if not b or a.lower() == b.lower():
            new_cols.append(a)          
        else:
            new_cols.append(f"{a} {b}") 

    # Assign and drop the old header‐rows:
    df_ind.columns = new_cols
    df_ind = df_ind.drop([0, 1]).reset_index(drop=True)

    # Drop the last column
    df_ind = df_ind.iloc[:, :-1]

    # Rename the first column to "Player Number"
    first = df_ind.columns[0]
    df_ind = df_ind.rename(columns={first: "Player Number"})

    # Normalize the player name column
    name_col = df_ind.columns[1]     # second column
    df_ind[name_col] = df_ind[name_col].apply(normalize_name)

    # Create new path with same directory structure under new_root
    relative_path = os.path.relpath(path, original_root)
    output_path_ind = os.path.join(new_root, relative_path)
    

    # Team Stats Processing
    df_team = df_ind.tail(2)
    rel_dir  = os.path.dirname(relative_path)
    base     = os.path.basename(relative_path)
    _, ext = os.path.splitext(base)
    team_name = f"team_stats{ext}"

    # Drop the last rows from df_ind that corresponmd to team stats
    df_ind = df_ind[df_ind["Player Number"] != 'TM']
    df_ind = df_ind[df_ind["Player"] != 'Total']
    df_ind = df_ind[df_ind["Player"] != 'Opponents']

    # Join it back under new_root
    output_path_team = os.path.join(new_root, rel_dir, team_name)
    
    # Create parent directories if they don't exist
    os.makedirs(os.path.dirname(output_path_ind), exist_ok=True)
    os.makedirs(os.path.dirname(output_path_team), exist_ok=True)
    
    # Save processed Individual DataFrame
    df_ind.to_csv(output_path_ind, index=False)
    print(f"Individual Player Statistics Saved to: {output_path_ind}\n") 

    # Save processed Team DataFrame
    df_team.to_csv(output_path_team, index=False)
    print(f"Team Statistics Saved to: {output_path_team}\n")

    # Check output
    print(df_ind.head())
    print(df_team.head())

Processing data/output_by_college/gojaspers_com/2017-18/individual_stats_overall.csv ...
Individual Player Statistics Saved to: data/output_by_college_clean/gojaspers_com/2017-18/individual_stats_overall.csv

Team Statistics Saved to: data/output_by_college_clean/gojaspers_com/2017-18/team_stats.csv

  Player Number           Player  GP    GS Minutes TOT Minutes AVG FG FGM  \
0            35     Kayla Grimme  31  30.0         951        30.7    179   
1            15      Amani Tatum  31  31.0        1041        33.6    133   
2            02      Gabby Cajou  31   1.0         915        29.5    100   
3            30     Mikki Guiton  30  11.0         478        15.9     62   
4            14  Taylor Williams  30  23.0         664        22.1     42   

  FG FGA FG FG% 3PT  ... Scoring AVG Rebounds OFF Rebounds DEF Rebounds TOT  \
0    420  0.426  26  ...        14.8          110          153          263   
1    334  0.398  71  ...        12.9           26           99          125  