In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt

sns.set(style="whitegrid")

url = "https://umsltritons.com/sports/mens-basketball/stats/2023-24"
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content of the page
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the section with id 'team'
    team_section = soup.find('section', id='team')

    # Check if the section is found
    if team_section:
        # Find the table containing basketball stats within the 'team' section
        team_stats_table = team_section.find('table')

        # Check if the table is found
        if team_stats_table:
            # Extract the statistics based on the HTML structure of the table
            team_stats = team_stats_table.find_all('tr')

            # Create a list to hold the rows of data
            data = []

            # Loop through each row in the table
            for stat in team_stats:
                # Extract data from each column (td) in the row
                row_data = [td.text.strip() for td in stat.find_all('td')]
                
                # Append the row data to the list
                data.append(row_data)

            # Create a Pandas DataFrame from the list of data
            df = pd.DataFrame(data)
            
        else:
            print("Table not found within the 'team' section.")
    else:
        print("Section with id 'team' not found. Check the id attribute.")

else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")

# Clean the data
new_column_names = {0: 'Stat', 1: 'UMSL', 2: 'Opponent'}
df.columns = [new_column_names[col] for col in df.columns]

df_clean = df[df != '--'].dropna()
df_clean = df_clean.dropna()

exclude_values = ['\nTOT', 'Player_Total', 'Per Game\nAVG']
df_clean = df_clean[~df_clean['Stat'].str.contains('|'.join(exclude_values))]

unwanted_characters = ['Points Per Game\n', 'FG: Made-Attempted\n', 'FG: Percentage\n', 'FG: Per Game\n', '3PT: Made-Attempted\n', '3PT: Percentage\n', '3PT: Per Game\n', 'FT: Made-Attempted\n', 'FT: Percentage\n', 'FT: Per Game\n', 'Per Game\n', 'Assist/Turnover Ratio\n', 'Points Off Turnovers\n']
df_clean['Stat'].replace(unwanted_characters, '', regex=True, inplace=True)

df_clean = df_clean.reset_index(drop=True)

df_clean

Unnamed: 0,Stat,UMSL,Opponent
0,PPG,67.6,72.6
1,FGM-FGA,451-969,465-1002
2,FG%,.465,.464
3,FGM/G,25.1,25.8
4,3PT-3PA,129-363,174-428
5,3PT%,.355,.407
6,3PT/G,7.2,9.7
7,FTM-FTA,185-265,202-258
8,FT%,.698,.783
9,FT/G,10.3,11.2


In [2]:
melted_df = pd.melt(df_clean, id_vars=['Stat'], var_name='Team', value_name='Value')
melted_df

Unnamed: 0,Stat,Team,Value
0,PPG,UMSL,67.6
1,FGM-FGA,UMSL,451-969
2,FG%,UMSL,.465
3,FGM/G,UMSL,25.1
4,3PT-3PA,UMSL,129-363
5,3PT%,UMSL,.355
6,3PT/G,UMSL,7.2
7,FTM-FTA,UMSL,185-265
8,FT%,UMSL,.698
9,FT/G,UMSL,10.3


In [None]:
ppg_df = melted_df[melted_df['Stat'] == 'PPG'].copy()
ppg_df['Value'] = ppg_df['Value'].astype(float)

# Plot a horizontal bar chart
plt.figure(figsize=(8, 6))
plt.barh(ppg_df['Team'], ppg_df['Value'], color=['red', 'gray'])
plt.xlabel('Points Per Game (PPG)')
plt.ylabel('Team')
plt.title('UMSL PPG vs Opponent PPG')

plt.show()