In [8]:
from bs4 import BeautifulSoup 
import requests 
import pandas as pd
import os 
import time

def wikiscraper(team_name, year_int):
    # team ex: "Illinois_Fighting_Illini"
    # year_int ex: 2024
    team = team_name.replace(" ", "_")
    year = str(year_int-1) + "-" + str(year_int%2000)
    csv_folder = "csv_files"
    year_path = os.path.join(csv_folder, year)
    csv_file_path = os.path.join(year_path, f'{team}_{year}.csv')

    if os.path.exists(csv_file_path):
        return None
    try: 
        roster = f"https://en.wikipedia.org/wiki/{year}_{team}_men%27s_basketball_team"
        result = requests.get(roster)
        if(result.status_code == 404):
            print(team_name + " error 404")
            return
        content = result.text

        soup = BeautifulSoup(content, 'lxml')
        table = soup.find('table', class_= 'toccolours')
        table = table.find('table', class_='sortable')

        player_data = []

        rows = table.find_all('tr')
        for row in rows[1:]:
            cells = row.find_all('td')
            name = cells[2].text.strip().replace('\xa0(W)', '')
            hometown = cells[7].text.strip()
            player_data.append({'Team': team_name, 'Name': name, 'Hometown': hometown})
    except:
        print("error thrown for " + team_name + " " + year)
        return
    df = pd.DataFrame(player_data)

    if not os.path.exists(csv_folder):
        os.makedirs(csv_folder)

    if not os.path.exists(year_path):
        os.makedirs(year_path)

    df.to_csv(csv_file_path, index=False)
    print(f'{team_name} CSV uploaded!')
    time.sleep(4)

In [9]:
bigten = {"Illinois Fighting Illini", "Ohio State Buckeyes", "Wisconsin Badgers", "Purdue Boilermakers", "Indiana Hoosiers", "Michigan Wolverines", "Michigan State Spartans", "Nebraska Cornhuskers", "Northwestern Wildcats", "Minnesota Golden Gophers",  "Maryland Terrapins", "Rutgers Scarlet Knights"}
# Ones that didnt work altogether "Penn State Nittany Lions", "Iowa Hawkeyes"
for team in bigten:
    print("uploading " + team)
    for year in range (2024,2014,-1):
        wikiscraper(team, year)

uploading Northwestern Wildcats
uploading Nebraska Cornhuskers
error thrown for Nebraska Cornhuskers 2015-16
error thrown for Nebraska Cornhuskers 2014-15
uploading Illinois Fighting Illini
uploading Minnesota Golden Gophers
uploading Purdue Boilermakers
error thrown for Purdue Boilermakers 2015-16
error thrown for Purdue Boilermakers 2014-15
uploading Rutgers Scarlet Knights
uploading Ohio State Buckeyes
error thrown for Ohio State Buckeyes 2015-16
error thrown for Ohio State Buckeyes 2014-15
uploading Maryland Terrapins
error thrown for Maryland Terrapins 2015-16
error thrown for Maryland Terrapins 2014-15
uploading Wisconsin Badgers
error thrown for Wisconsin Badgers 2015-16
error thrown for Wisconsin Badgers 2014-15
uploading Michigan Wolverines
error thrown for Michigan Wolverines 2019-20
error thrown for Michigan Wolverines 2018-19
error thrown for Michigan Wolverines 2017-18
error thrown for Michigan Wolverines 2015-16
error thrown for Michigan Wolverines 2014-15
uploading Michi