In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time

teams = [
    "illinois", "indiana", "iowa", "maryland", "michigan", "michigan-state", "minnesota", "nebraska",
    "northwestern", "ohio-state", "oregon", "penn-state", "purdue", "rutgers", "ucla", "usc", "washington", "wisconsin"
]

options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

for team in teams:
    url = f"https://fbschedules.com/2025-{team}-football-schedule/"
    print(f"Loading {url}")
    driver.get(url)
    time.sleep(5)  # Adjust if needed for slower JS
    with open(f"{team}_2025.html", "w", encoding="utf-8") as f:
        f.write(driver.page_source)

driver.quit()


Loading https://fbschedules.com/2025-illinois-football-schedule/
Loading https://fbschedules.com/2025-indiana-football-schedule/
Loading https://fbschedules.com/2025-iowa-football-schedule/
Loading https://fbschedules.com/2025-maryland-football-schedule/
Loading https://fbschedules.com/2025-michigan-football-schedule/
Loading https://fbschedules.com/2025-michigan-state-football-schedule/
Loading https://fbschedules.com/2025-minnesota-football-schedule/
Loading https://fbschedules.com/2025-nebraska-football-schedule/
Loading https://fbschedules.com/2025-northwestern-football-schedule/
Loading https://fbschedules.com/2025-ohio-state-football-schedule/
Loading https://fbschedules.com/2025-oregon-football-schedule/
Loading https://fbschedules.com/2025-penn-state-football-schedule/
Loading https://fbschedules.com/2025-purdue-football-schedule/
Loading https://fbschedules.com/2025-rutgers-football-schedule/
Loading https://fbschedules.com/2025-ucla-football-schedule/
Loading https://fbschedu

In [2]:
import pandas as pd
import re
from bs4 import BeautifulSoup

def split_opponent_venue(raw):
    m = re.match(r'^(at|vs)\s*(\d+)?\s*(.*)', raw)
    if m:
        location_type = m.group(1)
        raw2 = m.group(3)
    else:
        location_type = ""
        raw2 = re.sub(r'^\d+\s*', '', raw)
    raw2 = re.sub(r'\([^)]*\)', '', raw2).strip()
    m2 = re.search(r'([a-z])([A-Z])', raw2)
    if m2:
        idx = m2.start(2)
        opponent = raw2[:idx].strip()
        venue_city = raw2[idx:].strip()
    else:
        opponent = raw2.strip()
        venue_city = ""
    if "," in venue_city:
        idx = venue_city.find(",")
        venue = venue_city[:idx].strip()
        city_state = venue_city[idx+1:].strip()
    else:
        venue = venue_city
        city_state = ""
    return location_type, opponent, venue, city_state

all_games = []
for team in teams:
    with open(f"{team}_2025.html", "r", encoding="utf-8") as f:
        html = f.read()
    soup = BeautifulSoup(html, "html.parser")
    table = soup.find("table", class_="cfb-sch")
    if not table:
        print(f"No table found for {team}")
        continue
    rows = table.find_all("tr", class_=re.compile(r"^\d{6}$"))
    for row in rows:
        cols = [td.get_text(strip=True) for td in row.find_all("td")]
        if len(cols) < 5:
            continue
        date = cols[0]
        raw = cols[2]
        time_tv = cols[3]
        result = cols[4]
        if raw == "OFF":
            all_games.append({
                "Team": team.title().replace('-', ' '),
                "Date": date,
                "Opponent": "BYE",
                "Location_Type": "",
                "Venue": "",
                "City_State": "",
                "Time": "",
                "TV": "",
                "Result": result
            })
            continue
        location_type, opponent, venue, city_state = split_opponent_venue(raw)
        tv_match = re.match(r'^(.+?[EC]T)(.*)$', time_tv)
        if tv_match:
            time = tv_match.group(1).strip()
            tv = tv_match.group(2).strip()
        else:
            time, tv = time_tv, ''
        all_games.append({
            "Team": team.title().replace('-', ' '),
            "Date": date,
            "Opponent": opponent,
            "Location_Type": location_type,
            "Venue": venue,
            "City_State": city_state,
            "Time": time,
            "TV": tv,
            "Result": result
        })

df = pd.DataFrame(all_games)
df.to_csv("bigten_2025_schedule.csv", index=False)
print(df.head(20))


        Team             Date                       Opponent Location_Type  \
0   Illinois    FridayAug. 29  Western Illinois Leathernecks                 
1   Illinois   SaturdaySep. 6               Duke Blue Devils            at   
2   Illinois  SaturdaySep. 13       Western Michigan Broncos                 
3   Illinois  SaturdaySep. 20               Indiana Hoosiers            at   
4   Illinois  SaturdaySep. 27                    USC Trojans                 
5   Illinois   SaturdayOct. 4            Purdue Boilermakers            at   
6   Illinois  SaturdayOct. 11            Ohio State Buckeyes                 
7   Illinois  SaturdayOct. 18                            BYE                 
8   Illinois  SaturdayOct. 25             Washington Huskies            at   
9   Illinois   SaturdayNov. 1        Rutgers Scarlet Knights                 
10  Illinois   SaturdayNov. 8                            BYE                 
11  Illinois  SaturdayNov. 15             Maryland Terrapins    

In [3]:
df.to_excel("bigten_2025_schedule.xlsx", index=False)
