In [41]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [58]:
# URL of the webpage you want to scrape
url = 'https://umsltritons.com/sports/mens-basketball/stats/2023-24'

# Fetch the content of the page
response = requests.get(url)
html_content = response.text

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

# Locate the section with id 'individual-overall'
section = soup.find('section', {'id': 'individual-overall'})

# Define the DataFrame with column names if known
columns = [
    'Number', 'Name', 'GP', 'GS', 'MIN', 'Min Per Game', 'FGM', 'FGA',
    'FG%', '3PTM', '3PTA', '3PT%',
    'FTM', 'FTA', 'FT%', 'PTS', 'PPG', 'OREB',
    'DREB', 'REB', 'AVG REB', 'PF', 'A', 'TO', 'STL', 'BLK', 'View Bio'
]

# Initialize an empty DataFrame
df = pd.DataFrame(columns=columns)

# If the section is found, locate the table within it
if section:
    table = section.find('table', {'class': 'sidearm-table'})

    # If the table is found, extract the data
    if table:
        for row in table.find_all('tr'):
            columns_data = [col.get_text(strip=True) for col in row.find_all('td')]
            if columns_data:  # Append non-empty rows
                df = pd.concat([df, pd.DataFrame([columns_data], columns=columns)], ignore_index=True)

# Display the DataFrame
df


Unnamed: 0,Number,Name,GP,GS,MIN,Min Per Game,FGM,FGA,FG%,3PTM,...,OREB,DREB,REB,AVG REB,PF,A,TO,STL,BLK,View Bio
0,03,"Enright, Matt03Enright, Matt",29,29.0,1037,35.8,149,334,0.446,57,...,17,94,111,3.8,56,94,50,35,4,View Bio
1,15,"Quartlebaum, Mayson15Quartlebaum, Mayson",27,24.0,776,28.7,114,219,0.521,26,...,29,95,124,4.6,46,36,41,16,10,View Bio
2,12,"Prospere II, Emanuel12Prospere II, Emanuel",28,14.0,790,28.2,127,238,0.534,13,...,29,72,101,3.6,87,66,84,30,8,View Bio
3,35,"Glover II, Troy35Glover II, Troy",29,21.0,781,26.9,123,221,0.557,0,...,73,163,236,8.1,58,37,42,25,49,View Bio
4,20,"Wykle, Savon20Wykle, Savon",25,21.0,767,30.7,82,184,0.446,42,...,16,72,88,3.5,50,34,37,13,16,View Bio
5,10,"Harris, Janeir10Harris, Janeir",4,4.0,100,25.0,13,34,0.382,3,...,4,13,17,4.3,9,7,9,1,3,View Bio
6,21,"Kabala, Terrell21Kabala, Terrell",29,24.0,796,27.4,81,184,0.44,36,...,5,67,72,2.5,71,42,41,24,2,View Bio
7,05,"O'Neal II, Kris05O'Neal II, Kris",29,7.0,540,18.6,33,72,0.458,9,...,6,50,56,1.9,42,35,27,18,2,View Bio
8,04,"Bledsoe, Sam04Bledsoe, Sam",20,1.0,151,7.6,8,36,0.222,8,...,1,8,9,0.5,14,3,6,0,1,View Bio
9,13,"Rogers, Georden13Rogers, Georden",12,0.0,63,5.3,5,21,0.238,3,...,4,7,11,0.9,4,0,6,2,1,View Bio


In [60]:
# Clean the Player Stats tables for both teams
def process_stats(df):
    # Create an explicit copy of the DataFrame
    df = df.copy()

 
    # # Convert rest of relevant columns to the correct type
    df['FGM'] = df['FGM'].astype(float)
    df['3PTM'] = df['3PTM'].astype(float)
    df['FGA'] = df['FGA'].astype(float)
    df['PTS'] = df['PTS'].astype(float)
    df['FTA'] = df['FTA'].astype(float)
    df['A'] = df['A'].astype(float)
    df['TO'] = df['TO'].astype(float)
    df['REB'] = df['REB'].astype(float)
    df['STL'] = df['STL'].astype(float)
    df['BLK'] = df['BLK'].astype(float)
    df['PF'] = df['PF'].astype(float)
    df['MIN'] = df['MIN'].astype(float)

    # Calculate Advanced Statistics
    # eFG% (Effective Field Goal Percentage): Measures shooting efficiency, taking into account 3-pointers. Formula: (FGM + 0.5*3PM) / FGA
    df['eFG%'] = (df['FGM'] + (0.5 * df['3PTM'])) / df['FGA']
    df['eFG%'] = df['eFG%'].fillna(0.0).round(2)

    # TS% (True Shooting Percentage): Measures shooting efficiency, taking into account 3-pointers and free throws. Formula: PTS / (2*(FGA + 0.44*FTA))
    df['TS%'] = df['PTS'] / (2 * (df['FGA'] + (0.44 * df['FTA'])))
    df['TS%'] = df['TS%'].fillna(0.0).round(2)

    # Assist-to-Turnover Ratio: Measures the number of assists per turnover
    df['A/TO'] = np.where(df['TO'] == 0, 0, df['A'] / df['TO']) # Numpy handles div by 0 cases were 0 Turnovers(TO) are committed
    df['A/TO'] = round(df['A/TO'], 2)

    # Usage Rate: Measures how often a player is involved in team plays
    df['Usage Rate'] = (df['FGA'] + df['FTA'] + df['A'] + df['TO']) / (df['MIN'] / 40)  # 40 minutes per game
    df['Usage Rate'] = round(df['Usage Rate'], 2)

    # Box Plus/Minus: Measures a player's overall contribution
    df['Box +/-'] = np.where(df['MIN'] == 0, 0, ((df['PTS'] + df['REB'] + df['A'] + df['STL'] + df['BLK']) - (df['FGA'] + df['FTA'] + df['TO'] + df['PF'])) / df['MIN']) # Numpy handles div by 0 cases were 0 Minutes(MIN) are recorded
    df['Box +/-'] = round(df['Box +/-'], 2)

    # Place columns in a specific order
    df = df[
         ['Number', 'Name', 'GP', 'GS', 'MIN', 'Min Per Game', 'FGM', 'FGA',
    'FG%', '3PTM', '3PTA', '3PT%',
    'FTM', 'FTA', 'FT%', 'PTS', 'PPG', 'OREB',
    'DREB', 'REB', 'AVG REB', 'PF', 'A', 'TO', 'STL', 'BLK', 'eFG%', 'TS%', 'A/TO', 'Usage Rate', 'Box +/-'
    ]]

    return df

# Process the score and stats
seasonStats = process_stats(df)
seasonStats

Unnamed: 0,Number,Name,GP,GS,MIN,Min Per Game,FGM,FGA,FG%,3PTM,...,PF,A,TO,STL,BLK,eFG%,TS%,A/TO,Usage Rate,Box +/-
0,03,"Enright, Matt03Enright, Matt",29,29.0,1037.0,35.8,149.0,334.0,0.446,57.0,...,56.0,94.0,50.0,35.0,4.0,0.53,0.56,1.88,20.68,0.14
1,15,"Quartlebaum, Mayson15Quartlebaum, Mayson",27,24.0,776.0,28.7,114.0,219.0,0.521,26.0,...,46.0,36.0,41.0,16.0,10.0,0.58,0.61,0.88,19.79,0.14
2,12,"Prospere II, Emanuel12Prospere II, Emanuel",28,14.0,790.0,28.2,127.0,238.0,0.534,13.0,...,87.0,66.0,84.0,30.0,8.0,0.56,0.58,0.79,23.19,0.05
3,35,"Glover II, Troy35Glover II, Troy",29,21.0,781.0,26.9,123.0,221.0,0.557,0.0,...,58.0,37.0,42.0,25.0,49.0,0.56,0.58,0.88,19.31,0.31
4,20,"Wykle, Savon20Wykle, Savon",25,21.0,767.0,30.7,82.0,184.0,0.446,42.0,...,50.0,34.0,37.0,13.0,16.0,0.56,0.59,0.92,15.75,0.1
5,10,"Harris, Janeir10Harris, Janeir",4,4.0,100.0,25.0,13.0,34.0,0.382,3.0,...,9.0,7.0,9.0,1.0,3.0,0.43,0.48,0.78,23.2,0.04
6,21,"Kabala, Terrell21Kabala, Terrell",29,24.0,796.0,27.4,81.0,184.0,0.44,36.0,...,71.0,42.0,41.0,24.0,2.0,0.54,0.56,1.02,15.73,0.03
7,05,"O'Neal II, Kris05O'Neal II, Kris",29,7.0,540.0,18.6,33.0,72.0,0.458,9.0,...,42.0,35.0,27.0,18.0,2.0,0.52,0.56,1.3,11.85,0.07
8,04,"Bledsoe, Sam04Bledsoe, Sam",20,1.0,151.0,7.6,8.0,36.0,0.222,8.0,...,14.0,3.0,6.0,0.0,1.0,0.33,0.33,0.5,11.92,-0.13
9,13,"Rogers, Georden13Rogers, Georden",12,0.0,63.0,5.3,5.0,21.0,0.238,3.0,...,4.0,0.0,6.0,2.0,1.0,0.31,0.34,0.0,18.41,-0.06


In [None]:
# Optionally, save to CSV
df.to_csv('scraped_table.csv', index=False)
