In [3]:
from bs4 import BeautifulSoup 
import requests 
import pandas as pd
import os 
import time

def wikiscraper(team_name, year_int):
    # team ex: "Illinois_Fighting_Illini"
    # year_int ex: 2024
    team = team_name.replace(" ", "_")
    year = str(year_int-1) + "-" + str(year_int%2000)
    csv_folder = "csv_files"
    year_path = os.path.join(csv_folder, year)
    csv_file_path = os.path.join(year_path, f'{team}_{year}.csv')

    if os.path.exists(csv_file_path):
        return None
    
    roster = f"https://en.wikipedia.org/wiki/{year}_{team}_men%27s_basketball_team"
    result = requests.get(roster)
    if(result.status_code == 404):
        print(team_name + " error 404")
        return
    content = result.text

    soup = BeautifulSoup(content, 'lxml')
    table = soup.find('table', class_= 'toccolours')
    table = table.find('table', class_='sortable')

    player_data = []

    rows = table.find_all('tr')
    for row in rows[1:]:
        cells = row.find_all('td')
        name = cells[2].text.strip().replace('\xa0(W)', '')
        hometown = cells[7].text.strip()
        player_data.append({'Team': team_name, 'Name': name, 'Hometown': hometown})

    df = pd.DataFrame(player_data)

    if not os.path.exists(csv_folder):
        os.makedirs(csv_folder)

    if not os.path.exists(year_path):
        os.makedirs(year_path)

    df.to_csv(csv_file_path, index=False)
    print(f'{team_name} CSV uploaded!')
    time.sleep(4)

In [4]:
bigten = {"Illinois Fighting Illini", "Ohio State Buckeyes", "Wisconsin Badgers", "Purdue Boilermakers", "Indiana Hoosiers", "Michigan Wolverines", "Michigan State Spartans", "Nebraska Cornhuskers", "Northwestern Wildcats", "Minnesota Golden Gophers",  "Maryland Terrapins", "Rutgers Scarlet Knights"}
# Ones that didnt work "Penn State Nittany Lions", "Iowa Hawkeyes"
for team in bigten:
    for i in range (2024,2020,-1):
        wikiscraper(team, i)