In [6]:
import urllib3
import pandas as pd
import string
from datetime import datetime
from bs4 import BeautifulSoup

pd.set_option('display.max_columns', None)

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [7]:
def get_season_schools(year):
    http = urllib3.PoolManager()
    URL = f'https://www.sports-reference.com/cbb/seasons/men/{year}-school-stats.html'
    r = http.request('GET', URL)
    soup = BeautifulSoup(r.data, 'html.parser')

    filename = 'data_file_' + datetime.today().strftime('%Y_%m_%d') + ".txt"
    with open(filename, 'w') as f:
        header = ','.join([item.get_text() for item in soup.select("table[id = 'basic_school_stats'] > thead > tr > th")][12:])
        print(header, file=f)
        for item in soup.select("table[id='basic_school_stats'] > tbody > tr"):
            row_data = ','.join( [ item.get_text() for item in item.select("td") ] )
            print(row_data, file=f)

    df = pd.read_csv(filename)
    cols = df.columns
    df.drop(columns = 'PF', inplace = True)
    df.columns = cols[1:]
    
    df.drop(columns = ['\xa0','\xa0.1','\xa0.2','\xa0.3','\xa0.4'], inplace = True)
    
    column_names = {
        'School':'school',
        'G':'games',
        'W':'wins',
        'L':'losses',
        'W-L%':'win_loss_percent',
        'SRS':'simple_rating_system',
        'SOS':'strength_of_schedule',
        'W.1':'conference_wins',
        'L.1':'conference_losses',
        'W.2':'home_wins',
        'L.2':'home_losses',
        'W.3':'away_wins',
        'L.3':'away_losses',
        'Tm.':'points_for',
        'Opp.':'points_against',
        'MP':'minutes_played',
        'FG':'field_goals',
        'FGA':'field_goals_attempted',
        'FG%':'field_goal_percent',
        '3P':'three_pointers',
        '3PA':'three_pointers_attempted',
        '3P%':'three_pointer_percentage',
        'FT':'free_throws',
        'FTA':'free_throws_attempted',
        'FT%':'free_throw_percentage',
        'ORB':'offensive_rebounds',
        'TRB':'total_rebounds',
        'AST':'assists',
        'STL':'steals',
        'BLK':'blocks',
        'TOV':'turnovers',
        'PF':'personal_fouls'
        }

    df.columns = df.columns.map(column_names)
    
    schools = df['school'].unique()
    school_dict = {}
    for school in schools:
        school_dict[school] = school.lower().translate(str.maketrans('','',string.punctuation)).replace(' ','-')
    
    return df, school_dict

In [8]:
df, school_dict = get_season_schools(2025)
df

Unnamed: 0,school,games,wins,losses,win_loss_percent,simple_rating_system,strength_of_schedule,conference_wins,conference_losses,home_wins,home_losses,away_wins,away_losses,points_for,points_against,minutes_played,field_goals,field_goals_attempted,field_goal_percent,three_pointers,three_pointers_attempted,three_pointer_percentage,free_throws,free_throws_attempted,free_throw_percentage,offensive_rebounds,total_rebounds,assists,steals,blocks,turnovers,personal_fouls
0,Abilene Christian,21,9,12,0.429,-7.41,-0.58,1,5,6,4,2,8,1473,1486,845,533,1200,0.444,100,332,0.301,307,441,0.696,235,714,278,210,61,317,427
1,Air Force,21,3,18,0.143,-6.69,1.45,0,10,2,9,0,9,1330,1501,845,455,1043,0.436,169,497,0.340,251,400,0.628,157,625,280,123,64,279,363
2,Akron,20,15,5,0.750,3.76,-3.80,8,0,10,0,4,3,1680,1460,805,606,1328,0.456,230,636,0.362,238,314,0.758,259,823,367,158,74,253,367
3,Alabama,21,18,3,0.857,25.00,13.52,7,1,9,1,6,1,1895,1654,845,639,1348,0.474,213,641,0.332,404,574,0.704,298,933,358,134,105,271,387
4,Alabama A&M,20,7,13,0.350,-19.11,-7.52,3,4,7,6,0,7,1540,1614,820,531,1298,0.409,167,523,0.319,311,467,0.666,310,784,295,178,88,339,436
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
359,Wright State,22,11,11,0.500,-2.53,-2.68,5,6,7,3,3,6,1694,1626,890,630,1301,0.484,183,485,0.377,251,352,0.713,206,783,327,129,65,251,376
360,Wyoming,21,11,10,0.524,0.02,2.92,4,6,7,3,3,5,1453,1488,845,519,1166,0.445,168,467,0.360,247,368,0.671,225,743,250,89,59,275,387
361,Xavier,22,13,9,0.591,14.18,7.41,5,6,10,2,2,6,1693,1544,890,569,1257,0.453,180,470,0.383,375,482,0.778,195,753,362,165,66,250,359
362,Yale,17,11,6,0.647,7.47,-2.91,4,0,7,0,2,5,1400,1180,680,504,1051,0.480,133,346,0.384,259,349,0.742,213,690,302,101,61,186,298


In [9]:
def players_per_game(year):
    http = urllib3.PoolManager()
    URL = f'https://www.sports-reference.com/cbb/schools/abilene-christian/men/{year}.html'
    r = http.request('GET', URL)
    soup = BeautifulSoup(r.data, 'html.parser')

    filename = 'players_per_game' + datetime.today().strftime('%Y_%m_%d') + ".txt"
    with open(filename, 'w') as f:
        header = ','.join([item.get_text() for item in soup.select("table[id = 'players_per_game'] > thead > tr > th")])
        print(header, file=f)
        for item in soup.select("table[id='players_per_game'] > tbody > tr"):
            row_data = ','.join( [ item.get_text() for item in item.select("td") ] )
            print(row_data, file=f)

    df = pd.read_csv(filename)
    cols = df.columns
    df.drop(columns = 'Awards', inplace = True)
    df.columns = cols[1:]
    
    return df

In [10]:
player_df = players_per_game(2025)

In [117]:
opp_df = df.copy()
opp_df.columns = ['opp_' + item for item in df.columns]

pd.concat([df[df['school'] == 'Abilene Christian'].reset_index(drop = True), opp_df[opp_df['opp_school'] == 'Akron'].reset_index(drop = True)], axis = 1)

Unnamed: 0,school,games,wins,losses,win_loss_percent,simple_rating_system,strength_of_schedule,conference_wins,conference_losses,home_wins,home_losses,away_wins,away_losses,points_for,points_against,minutes_played,field_goals,field_goals_attempted,field_goal_percent,three_pointers,three_pointers_attempted,three_pointer_percentage,free_throws,free_throws_attempted,free_throw_percentage,offensive_rebounds,total_rebounds,assists,steals,blocks,turnovers,personal_fouls,opp_school,opp_games,opp_wins,opp_losses,opp_win_loss_percent,opp_simple_rating_system,opp_strength_of_schedule,opp_conference_wins,opp_conference_losses,opp_home_wins,opp_home_losses,opp_away_wins,opp_away_losses,opp_points_for,opp_points_against,opp_minutes_played,opp_field_goals,opp_field_goals_attempted,opp_field_goal_percent,opp_three_pointers,opp_three_pointers_attempted,opp_three_pointer_percentage,opp_free_throws,opp_free_throws_attempted,opp_free_throw_percentage,opp_offensive_rebounds,opp_total_rebounds,opp_assists,opp_steals,opp_blocks,opp_turnovers,opp_personal_fouls
0,Abilene Christian,21,9,12,0.429,-7.34,-0.51,1,5,6,4,2,8,1473,1486,845,533,1200,0.444,100,332,0.301,307,441,0.696,235,714,278,210,61,317,427,Akron,20,15,5,0.75,3.8,-3.76,8,0,10,0,4,3,1680,1460,805,606,1328,0.456,230,636,0.362,238,314,0.758,259,823,367,158,74,253,367
