In [1]:
import pandas as pd
import pickle
import requests
import time

from bs4 import BeautifulSoup
from datetime import date
from urllib.error import HTTPError

In [32]:
pd.set_option('display.max_columns', None)

In [12]:
def player_per100_scraper_old(seasons, source_dir, output_dir):
    '''
    Inputs:
        season = season year

    Output: DataFrame of all games
    '''
    

    for season in seasons:

        player_per100_df = pd.DataFrame()

        # Get teams list for season
        teams = ['abilene-christian', 'air-force', 'akron', 'alabama-am', 'uab']

        season_filename = f"player_per100_{season}_data.pkl"

        for team in teams:
            try:
                '''Print for progress update'''
                print('per100_scraper, team: {}, season: {}'.format(team, season))

                '''URL for data pull'''
                url = 'https://www.sports-reference.com/cbb/schools/{}/{}.html#per_poss'.format(team, season)

                # Extract html from player page
                req = requests.get(url).text
                print("1")

                # Create soup object form html
                soup = BeautifulSoup(req, 'html.parser')
                print("2")
                
                # Extract placeholder classes
                placeholders = soup.find_all('div', {'class': 'placeholder'})
                print("3")
                print(placeholders)
                
                for x in placeholders:
                    # Get elements after placeholder and combine into one string
                    comment = ''.join(x.next_siblings)
                    print("4")

                    # Parse comment back into soup object
                    soup_comment = BeautifulSoup(comment, 'html.parser')
                    print("5")

                    # Extract correct table from soup object using 'id' attribute
                    tables = soup_comment.find_all('table', attrs={"id":"per_poss"})
                    print("6")

                    # Iterate tables
                    for tag in tables:
                        # Turn table from html to pandas DataFrame
                        df = pd.read_html(tag.prettify())[0]
                        print(df.head())

                        # Extract a player's stats from their most recent college season
                        table = df.iloc[:, :]

                        # Add Team Column
                        table['Team'] = team
                        table['Season'] = season

            except HTTPError as http_error:
                print(http_error)
                print(url)
                print(f"skip {season} {team}")
            except ValueError as value_error:
                print(value_error)
                print(url)
                print(f"skip {season} {team}")
            else:
                # Add individual player stats to full per_poss DataFrame
                player_per100_df = player_per100_df.append(table, ignore_index=True)

        # Filter out irrelevant columns
        cols = ['Player', 'G', 'GS', 'MP',
        'FG', 'FGA', 'FG%', '2P', '2PA', '2P%', '3P', '3PA', '3P%', 'FT',
        'FTA', 'FT%', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS',
        'ORtg', 'DRtg', 'Team', 'Season']

        player_per100_df = player_per100_df[cols]
            
        print(f"Saving {season_filename}")
        player_per100_df.to_pickle(f'{source_dir}/{season_filename}')

        time.sleep(30)


In [38]:
def player_per100_scraper(seasons, source_dir, output_dir):
    '''
    Inputs:
        season = season year

    Output: DataFrame of all games
    '''
    

    for season in seasons:

        player_per100_df = pd.DataFrame()

        # Get teams list for season
        teams = ['abilene-christian', 'air-force', 'akron', 'alabama-am', 'uab']

        season_filename = f"player_per100_{season}_data.pkl"

        for team in teams:
            try:
                '''Print for progress update'''
                print('per100_scraper, team: {}, season: {}'.format(team, season))

                '''URL for data pull'''
                url = 'https://www.sports-reference.com/cbb/schools/{}/{}.html#per_poss'.format(team, season)
                
                df = pd.read_html(url)[11]
                
                # Drop uneeded columns
                df = df.drop(['Rk', 'Unnamed: 24'], axis=1)
                
                # Add Team and Season Columns
                df['Team'] = team
                df['Season'] = season

            except HTTPError as http_error:
                print(http_error)
                print(url)
                print(f"skip {season} {team}")
            except ValueError as value_error:
                print(value_error)
                print(url)
                print(f"skip {season} {team}")
            else:
                # Add individual player stats to full per_poss DataFrame
                player_per100_df = player_per100_df.append(df, ignore_index=True)

        # Filter out irrelevant columns
        cols = ['Player', 'G', 'GS', 'MP',
        'FG', 'FGA', 'FG%', '2P', '2PA', '2P%', '3P', '3PA', '3P%', 'FT',
        'FTA', 'FT%', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS',
        'ORtg', 'DRtg', 'Team', 'Season']

        player_per100_df = player_per100_df[cols]
            
        print(f"Saving {season_filename}")
        player_per100_df.to_pickle(f'{source_dir}/{season_filename}')

        time.sleep(30)
        
    return player_per100_df


In [40]:
pp_df = player_per100_scraper(seasons=[2022], source_dir="0_scraped_data", output_dir="0_scraped_data")

per100_scraper, team: abilene-christian, season: 2022
per100_scraper, team: air-force, season: 2022
per100_scraper, team: akron, season: 2022
per100_scraper, team: alabama-am, season: 2022
per100_scraper, team: uab, season: 2022
HTTP Error 404: Not Found
https://www.sports-reference.com/cbb/schools/uab/2022.html#per_poss
skip 2022 uab
Saving player_per100_2022_data.pkl


In [41]:
pp_df

Unnamed: 0,Player,G,GS,MP,FG,FGA,FG%,2P,2PA,2P%,3P,3PA,3P%,FT,FTA,FT%,TRB,AST,STL,BLK,TOV,PF,PTS,ORtg,DRtg,Team,Season
0,Airion Simmons,31,31,653,11.5,23.8,0.484,8.7,15.5,0.56,2.8,8.3,0.34,5.5,8.4,0.653,14.9,4.9,4.2,1.5,5.0,8.1,31.3,108.0,85.1,abilene-christian,2022
1,Yuot Gai,29,0,178,11.0,21.6,0.507,10.3,19.1,0.541,0.6,2.5,0.25,4.4,6.3,0.7,12.8,0.9,0.3,1.3,6.3,7.8,26.9,93.9,96.1,abilene-christian,2022
2,Mahki Morris,33,33,805,8.4,18.5,0.453,6.0,10.4,0.573,2.4,8.1,0.299,6.0,7.1,0.843,6.6,3.5,3.3,0.1,2.6,6.3,25.1,116.0,92.9,abilene-christian,2022
3,Coryon Mason,32,29,850,7.7,17.6,0.437,5.4,11.5,0.472,2.2,6.0,0.37,6.7,8.4,0.797,4.6,3.3,2.2,0.3,2.7,5.8,24.3,113.2,96.7,abilene-christian,2022
4,Immanuel Allen,33,2,449,7.7,17.1,0.449,5.1,10.5,0.482,2.6,6.6,0.396,6.2,8.3,0.746,7.9,3.2,2.4,0.6,3.5,9.2,24.2,112.4,94.7,abilene-christian,2022
5,Logan McLaughlin,13,0,121,7.8,18.0,0.436,3.7,6.0,0.615,4.1,12.0,0.346,2.8,3.2,0.857,8.3,3.2,3.2,0.0,0.9,6.0,22.6,123.0,91.1,abilene-christian,2022
6,Cameron Steele,33,19,742,8.1,17.6,0.46,5.9,10.4,0.572,2.2,7.3,0.299,2.7,3.7,0.735,9.4,2.9,1.7,1.2,2.2,4.2,21.1,111.4,94.4,abilene-christian,2022
7,Furaha Cadeaux de Dieu,21,0,125,7.6,18.3,0.415,7.6,17.8,0.425,0.0,0.4,0.0,5.8,10.3,0.565,14.7,0.9,2.7,0.0,4.0,10.7,20.9,94.9,92.6,abilene-christian,2022
8,Stevie Smith,16,0,111,7.5,18.1,0.417,6.5,13.6,0.481,1.0,4.5,0.222,4.5,8.0,0.563,8.0,2.5,1.0,1.0,5.5,7.5,20.6,85.4,96.9,abilene-christian,2022
9,Tobias Cameron,33,14,632,6.7,14.0,0.478,5.1,9.0,0.569,1.6,5.0,0.316,5.3,6.8,0.779,10.1,4.9,2.5,0.4,3.5,6.3,20.3,115.8,93.6,abilene-christian,2022


In [42]:
pp_2021_df = pd.read_pickle('/Users/sean/Documents/bracket_buster/bracket_buster_v2.0/0_scraped_data/player_per100_2021_data.pkl')

In [44]:
pp_2021_df.head(50)

Unnamed: 0,Player,G,GS,MP,FG,FGA,FG%,2P,2PA,2P%,3P,3PA,3P%,FT,FTA,FT%,TRB,AST,STL,BLK,TOV,PF,PTS,ORtg,DRtg,Team,Season
0,Kolton Kohl,27,27,511,13.8,25.5,0.542,13.4,23.9,0.561,0.4,1.6,0.267,7.7,10.8,0.71,13.9,4.3,1.2,2.9,4.0,4.2,35.7,117.1,84.8,abilene-christian,2021
1,Clay Gayman,24,1,386,9.1,21.3,0.43,5.0,9.1,0.547,4.1,12.1,0.341,6.9,8.0,0.857,12.0,3.9,2.7,1.7,5.3,4.9,29.3,108.1,81.9,abilene-christian,2021
2,Cameron Steele,22,0,134,9.1,18.9,0.478,5.8,10.3,0.56,3.3,8.6,0.381,5.4,8.6,0.619,11.9,4.1,0.4,0.8,2.9,8.2,26.8,116.0,87.2,abilene-christian,2021
3,Jameson Richardson,18,0,79,11.2,23.0,0.485,9.1,16.8,0.542,2.1,6.3,0.333,1.4,3.5,0.4,12.6,1.4,0.0,1.4,6.3,14.0,25.8,88.6,88.4,abilene-christian,2021
4,Joe Pleasant,26,24,605,9.7,20.4,0.473,7.9,16.8,0.473,1.7,3.6,0.475,4.6,7.8,0.588,12.9,2.9,1.6,0.6,3.3,3.9,25.6,109.3,87.0,abilene-christian,2021
5,Airion Simmons,27,2,407,8.8,17.6,0.5,6.6,12.1,0.551,2.2,5.6,0.39,3.8,5.6,0.683,15.0,4.1,4.2,1.1,4.5,8.4,23.6,107.0,76.7,abilene-christian,2021
6,Coryon Mason,23,20,592,8.1,17.2,0.47,5.7,11.0,0.517,2.4,6.2,0.388,4.3,6.5,0.657,4.8,5.3,2.2,0.5,2.9,4.6,22.9,113.4,88.6,abilene-christian,2021
7,Immanuel Allen,14,0,197,8.4,16.2,0.517,7.0,12.6,0.556,1.4,3.6,0.385,3.6,5.3,0.684,11.5,3.4,1.4,0.6,3.6,7.8,21.8,110.5,86.8,abilene-christian,2021
8,Mahki Morris,27,7,482,6.6,15.2,0.436,3.7,7.0,0.525,3.0,8.2,0.361,3.5,5.3,0.674,5.8,4.0,2.5,0.3,3.8,6.5,19.8,103.6,87.0,abilene-christian,2021
9,Logan McLaughlin,25,0,242,6.6,19.2,0.345,1.4,4.1,0.333,5.2,15.0,0.348,1.1,2.1,0.556,8.7,3.4,1.8,0.2,5.0,4.6,19.6,88.8,87.3,abilene-christian,2021


In [16]:
url = 'https://www.sports-reference.com/cbb/schools/{team}/{season}.html#per_poss'.format(team='abilene-christian', season=2022)

df = pd.read_html(url)

In [18]:
len(df)

15

In [33]:
## this table
df[11]

Unnamed: 0,Rk,Player,G,GS,MP,FG,FGA,FG%,2P,2PA,2P%,3P,3PA,3P%,FT,FTA,FT%,TRB,AST,STL,BLK,TOV,PF,PTS,Unnamed: 24,ORtg,DRtg
0,1,Airion Simmons,31,31,653,11.5,23.8,0.484,8.7,15.5,0.56,2.8,8.3,0.34,5.5,8.4,0.653,14.9,4.9,4.2,1.5,5.0,8.1,31.3,,108.0,85.1
1,2,Yuot Gai,29,0,178,11.0,21.6,0.507,10.3,19.1,0.541,0.6,2.5,0.25,4.4,6.3,0.7,12.8,0.9,0.3,1.3,6.3,7.8,26.9,,93.9,96.1
2,3,Mahki Morris,33,33,805,8.4,18.5,0.453,6.0,10.4,0.573,2.4,8.1,0.299,6.0,7.1,0.843,6.6,3.5,3.3,0.1,2.6,6.3,25.1,,116.0,92.9
3,4,Coryon Mason,32,29,850,7.7,17.6,0.437,5.4,11.5,0.472,2.2,6.0,0.37,6.7,8.4,0.797,4.6,3.3,2.2,0.3,2.7,5.8,24.3,,113.2,96.7
4,5,Immanuel Allen,33,2,449,7.7,17.1,0.449,5.1,10.5,0.482,2.6,6.6,0.396,6.2,8.3,0.746,7.9,3.2,2.4,0.6,3.5,9.2,24.2,,112.4,94.7
5,6,Logan McLaughlin,13,0,121,7.8,18.0,0.436,3.7,6.0,0.615,4.1,12.0,0.346,2.8,3.2,0.857,8.3,3.2,3.2,0.0,0.9,6.0,22.6,,123.0,91.1
6,7,Cameron Steele,33,19,742,8.1,17.6,0.46,5.9,10.4,0.572,2.2,7.3,0.299,2.7,3.7,0.735,9.4,2.9,1.7,1.2,2.2,4.2,21.1,,111.4,94.4
7,8,Furaha Cadeaux de Dieu,21,0,125,7.6,18.3,0.415,7.6,17.8,0.425,0.0,0.4,0.0,5.8,10.3,0.565,14.7,0.9,2.7,0.0,4.0,10.7,20.9,,94.9,92.6
8,9,Stevie Smith,16,0,111,7.5,18.1,0.417,6.5,13.6,0.481,1.0,4.5,0.222,4.5,8.0,0.563,8.0,2.5,1.0,1.0,5.5,7.5,20.6,,85.4,96.9
9,10,Tobias Cameron,33,14,632,6.7,14.0,0.478,5.1,9.0,0.569,1.6,5.0,0.316,5.3,6.8,0.779,10.1,4.9,2.5,0.4,3.5,6.3,20.3,,115.8,93.6


In [34]:
df[11].columns

Index(['Rk', 'Player', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%', '2P', '2PA', '2P%',
       '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', 'PTS', 'Unnamed: 24', 'ORtg', 'DRtg'],
      dtype='object')

In [35]:
df[11].drop(['Rk', 'Unnamed: 24'], axis=1)

Unnamed: 0,Player,G,GS,MP,FG,FGA,FG%,2P,2PA,2P%,3P,3PA,3P%,FT,FTA,FT%,TRB,AST,STL,BLK,TOV,PF,PTS,ORtg,DRtg
0,Airion Simmons,31,31,653,11.5,23.8,0.484,8.7,15.5,0.56,2.8,8.3,0.34,5.5,8.4,0.653,14.9,4.9,4.2,1.5,5.0,8.1,31.3,108.0,85.1
1,Yuot Gai,29,0,178,11.0,21.6,0.507,10.3,19.1,0.541,0.6,2.5,0.25,4.4,6.3,0.7,12.8,0.9,0.3,1.3,6.3,7.8,26.9,93.9,96.1
2,Mahki Morris,33,33,805,8.4,18.5,0.453,6.0,10.4,0.573,2.4,8.1,0.299,6.0,7.1,0.843,6.6,3.5,3.3,0.1,2.6,6.3,25.1,116.0,92.9
3,Coryon Mason,32,29,850,7.7,17.6,0.437,5.4,11.5,0.472,2.2,6.0,0.37,6.7,8.4,0.797,4.6,3.3,2.2,0.3,2.7,5.8,24.3,113.2,96.7
4,Immanuel Allen,33,2,449,7.7,17.1,0.449,5.1,10.5,0.482,2.6,6.6,0.396,6.2,8.3,0.746,7.9,3.2,2.4,0.6,3.5,9.2,24.2,112.4,94.7
5,Logan McLaughlin,13,0,121,7.8,18.0,0.436,3.7,6.0,0.615,4.1,12.0,0.346,2.8,3.2,0.857,8.3,3.2,3.2,0.0,0.9,6.0,22.6,123.0,91.1
6,Cameron Steele,33,19,742,8.1,17.6,0.46,5.9,10.4,0.572,2.2,7.3,0.299,2.7,3.7,0.735,9.4,2.9,1.7,1.2,2.2,4.2,21.1,111.4,94.4
7,Furaha Cadeaux de Dieu,21,0,125,7.6,18.3,0.415,7.6,17.8,0.425,0.0,0.4,0.0,5.8,10.3,0.565,14.7,0.9,2.7,0.0,4.0,10.7,20.9,94.9,92.6
8,Stevie Smith,16,0,111,7.5,18.1,0.417,6.5,13.6,0.481,1.0,4.5,0.222,4.5,8.0,0.563,8.0,2.5,1.0,1.0,5.5,7.5,20.6,85.4,96.9
9,Tobias Cameron,33,14,632,6.7,14.0,0.478,5.1,9.0,0.569,1.6,5.0,0.316,5.3,6.8,0.779,10.1,4.9,2.5,0.4,3.5,6.3,20.3,115.8,93.6


---
#### Bracket import test

In [45]:
from bracket import bracket

In [46]:
bracket

['team1', 'team2']

In [80]:
with open('bracket-2021.txt', 'r') as f:
    b = f.read()

In [81]:
b

"# conference\n'team1'\n'team2'"

In [82]:
b.split(',')

["# conference\n'team1'\n'team2'"]

In [85]:
for t in b.split('\n'):
    print(t.strip().strip("'"))

# conference
team1
team2


In [92]:
[t.strip().strip("'") for t in b.split('\n') if t.strip().strip("'").find('#') <0]

['team1', 'team2']

In [89]:
'# conference'.find('#')

0

In [90]:
'# conference'.find('P')

-1

In [100]:
def read_bracket(season):
    try:
        with open(f'bracket-{season}.txt', 'r') as f:
            bracket = f.read()
    except FileNotFoundError:
        print(f'bracket-{season}.txt does not exist' )
        raise 
        
    bracket = [team.strip().strip("'") for team in bracket.split('\n') if team.strip().strip("'").find('#') <0]
    
    return bracket

In [101]:
read_bracket(season=2021)

['team1', 'team2', 'team3']

In [99]:
read_bracket(season=2022)

bracket-2022.txt does not exist


FileNotFoundError: [Errno 2] No such file or directory: 'bracket-2022.txt'

---
## Why is alabama-birmingham still missing team experience features

### 3_Model_Data

- gamelog_exp_clust looks ok
- season2022_final_stats is missing team composition features

In [143]:
final_stats_df = pd.read_pickle('/Users/sean/Documents/bracket_buster/bracket_buster_v2.0/3_model_data/season2022_final_stats.pkl')

In [144]:
final_stats_df[final_stats_df['Tm'] == 'alabama-birmingham']

Unnamed: 0,3Pp,ASTpg,BLKpg,Date,FGp,FTp,GameType,Home,ORBpg,Opp,PFpg,RBpg,STLpg,TOpg,Tm,W,Wp,Ws,pApg,ppg,sos,ID,exp_factor,C0,C1,C2,F0,F1,F2,G0,G1,G2,G3
349,0.4632,13.0,4.2,2022-03-12,0.4744,0.72,season2022,N,9.6,louisiana-tech,19.0,38.0,7.2,14.2,alabama-birmingham,1.0,0.787879,26.0,74.4,83.8,-0.71,"alabama-birmingham,2022",,,,,,,,,,,


In [145]:
gamelog_exp_clust_df = pd.read_pickle('/Users/sean/Documents/bracket_buster/bracket_buster_v2.0/3_model_data/gamelog_exp_clust.pkl')

In [146]:
gamelog_exp_clust_df[gamelog_exp_clust_df['Tm'] == 'alabama-birmingham'].head()

Unnamed: 0,Date,Home,W,sos,GameType,Wp,ppg,pApg,FGp,3Pp,FTp,ORBpg,RBpg,ASTpg,STLpg,BLKpg,TOpg,PFpg,Tm,exp_factor,C0,C1,C2,F0,F1,F2,G0,G1,G2,G3,OPHome,OPsos,OPWp,OPppg,OPpApg,OPFGp,OP3Pp,OPFTp,OPORBpg,OPRBpg,OPASTpg,OPSTLpg,OPBLKpg,OPTOpg,OPPFpg,OPTm,OPexp_factor,OPC0,OPC1,OPC2,OPF0,OPF1,OPF2,OPG0,OPG1,OPG2,OPG3
102,2013-11-24,N,0,-0.77,season2014,0.8,83.4,74.4,0.4554,0.3156,0.75,16.8,47.4,14.6,3.2,3.8,13.6,18.4,alabama-birmingham,3.208891,0.0,0.122212,0.0,0.267686,0.061663,0.135915,0.011791,0.174474,0.226259,0.0,N,6.88,0.4,73.0,76.4,0.4196,0.3126,0.7322,10.8,34.4,12.4,2.8,5.8,10.0,19.6,temple,2.521739,0.0,0.0,0.0,0.137549,0.201581,0.003478,0.363478,0.189881,0.104032,0.0
103,2013-11-27,H,1,-0.77,season2014,0.666667,81.4,80.6,0.4422,0.3246,0.77,15.4,44.6,12.8,2.4,3.8,14.2,18.0,alabama-birmingham,3.208891,0.0,0.122212,0.0,0.267686,0.061663,0.135915,0.011791,0.174474,0.226259,0.0,A,-7.58,0.2,67.6,74.8,0.4096,0.3236,0.6542,11.0,30.8,8.4,8.8,4.0,14.8,23.0,florida-am,3.156124,0.0,0.034884,0.0,0.114109,0.133488,0.035194,0.389767,0.051318,0.24124,0.0
104,2013-12-01,H,1,-0.77,season2014,0.714286,82.6,80.0,0.4474,0.3532,0.7532,14.6,43.0,13.6,3.0,4.6,15.4,16.8,alabama-birmingham,3.208891,0.0,0.122212,0.0,0.267686,0.061663,0.135915,0.011791,0.174474,0.226259,0.0,A,8.42,0.8,80.2,70.8,0.496,0.3942,0.6434,11.8,38.2,16.4,8.0,3.6,13.0,21.2,north-carolina,2.214567,0.0,0.0,0.0,0.468983,0.127281,0.001898,0.27558,0.107284,0.018975,0.0
105,2013-12-07,A,1,-0.77,season2014,0.75,79.0,78.0,0.4086,0.3262,0.7222,15.2,42.4,11.4,3.2,4.0,13.8,16.6,alabama-birmingham,3.208891,0.0,0.122212,0.0,0.267686,0.061663,0.135915,0.011791,0.174474,0.226259,0.0,H,-0.3,0.25,66.0,71.0,0.4642,0.2732,0.6308,9.6,31.0,9.4,4.8,3.4,13.6,19.8,northeastern,2.398764,0.0,0.0,0.0,0.276911,0.009575,0.008185,0.339151,0.109961,0.256216,0.0
106,2013-12-15,H,1,-0.77,season2014,0.777778,75.0,72.4,0.4286,0.3472,0.6722,13.2,38.2,11.0,3.4,4.4,12.8,14.6,alabama-birmingham,3.208891,0.0,0.122212,0.0,0.267686,0.061663,0.135915,0.011791,0.174474,0.226259,0.0,A,-8.6,0.5,73.6,67.0,0.4368,0.3488,0.6978,10.6,32.8,10.2,7.6,0.8,12.2,13.8,georgia-southern,3.223822,0.0,0.0,0.0,0.122309,0.270797,0.0,0.577225,0.0,0.029668,0.0


In [115]:
gamelog_exp_clust_df[gamelog_exp_clust_df['Tm'] == 'houston'].head()

Unnamed: 0,Date,Home,W,sos,GameType,Wp,ppg,pApg,FGp,3Pp,FTp,ORBpg,RBpg,ASTpg,STLpg,BLKpg,TOpg,PFpg,Tm,exp_factor,C0,C1,C2,F0,F1,F2,G0,G1,G2,G3,OPHome,OPsos,OPWp,OPppg,OPpApg,OPFGp,OP3Pp,OPFTp,OPORBpg,OPRBpg,OPASTpg,OPSTLpg,OPBLKpg,OPTOpg,OPPFpg,OPTm,OPexp_factor,OPC0,OPC1,OPC2,OPF0,OPF1,OPF2,OPG0,OPG1,OPG2,OPG3
2725,2013-11-25,N,0,4.62,season2014,1.0,77.6,65.0,0.4976,0.3494,0.6632,11.0,36.0,14.6,5.8,7.8,12.6,17.2,houston,2.646943,0.0,0.0,0.003774,0.294038,0.047849,0.0,0.321509,0.266868,0.065509,0.000453,N,9.0,0.8,81.8,73.2,0.4738,0.419,0.7028,11.0,36.0,13.2,5.6,5.6,12.0,18.8,stanford,3.325353,0.0,0.098118,0.034736,0.336701,0.000969,0.053972,0.341683,0.021312,0.11251,0.0
2726,2013-11-26,N,0,4.62,season2014,0.833333,77.6,68.2,0.4762,0.3268,0.6632,12.8,36.6,14.2,6.2,7.6,12.0,17.6,houston,2.646943,0.0,0.0,0.003774,0.294038,0.047849,0.0,0.321509,0.266868,0.065509,0.000453,N,8.61,0.666667,70.6,69.0,0.4842,0.3926,0.6792,9.2,28.2,11.4,5.8,5.4,10.6,17.6,texas-tech,2.821379,0.0,0.0,0.0,0.386028,0.066283,0.044033,0.112027,0.258752,0.132877,0.0
2727,2013-11-30,H,1,4.62,season2014,0.714286,75.0,70.4,0.443,0.3414,0.6934,12.2,33.8,13.4,5.8,8.2,10.6,17.6,houston,2.646943,0.0,0.0,0.003774,0.294038,0.047849,0.0,0.321509,0.266868,0.065509,0.000453,A,-6.69,0.375,66.6,68.2,0.449,0.3904,0.6966,8.0,26.6,12.2,6.2,3.0,9.8,22.8,texas-am-corpus-christi,2.772003,0.0,0.0,0.0,0.237564,0.028221,0.0,0.144133,0.317443,0.27264,0.0
2728,2013-12-04,A,0,4.62,season2014,0.75,74.6,71.4,0.4296,0.3118,0.7444,12.2,33.0,13.0,7.2,8.4,10.0,18.0,houston,2.646943,0.0,0.0,0.003774,0.294038,0.047849,0.0,0.321509,0.266868,0.065509,0.000453,H,3.62,0.75,72.2,62.0,0.5014,0.3158,0.6878,8.4,32.8,11.4,6.6,6.0,14.6,18.2,texas-am,2.409242,0.0,0.0,0.031047,0.125343,0.158123,0.05444,0.150325,0.167509,0.313213,0.0
2729,2013-12-07,H,0,4.62,season2014,0.666667,70.0,73.0,0.3974,0.2892,0.7514,12.4,31.8,13.8,7.0,6.6,9.8,17.4,houston,2.646943,0.0,0.0,0.003774,0.294038,0.047849,0.0,0.321509,0.266868,0.065509,0.000453,A,3.05,0.25,70.8,74.6,0.3686,0.3644,0.7374,10.6,35.0,10.8,5.4,3.0,9.6,21.6,san-jose-state,2.125378,0.0,0.0,0.043173,0.132388,0.058308,0.142743,0.290266,0.333121,0.0,0.0


---
### 0_scraped_data

- everything looks ok here

In [116]:
pp100_df = pd.read_pickle('/Users/sean/Documents/bracket_buster/bracket_buster_v2.0/0_scraped_data/player_per100_2022_data.pkl')

In [117]:
pp100_df.head()

Unnamed: 0,Player,G,GS,MP,FG,FGA,FG%,2P,2PA,2P%,3P,3PA,3P%,FT,FTA,FT%,TRB,AST,STL,BLK,TOV,PF,PTS,ORtg,DRtg,Team,Season
0,Airion Simmons,31,31,653,11.5,23.8,0.484,8.7,15.5,0.56,2.8,8.3,0.34,5.5,8.4,0.653,14.9,4.9,4.2,1.5,5.0,8.1,31.3,108.0,85.1,abilene-christian,2022
1,Yuot Gai,29,0,178,11.0,21.6,0.507,10.3,19.1,0.541,0.6,2.5,0.25,4.4,6.3,0.7,12.8,0.9,0.3,1.3,6.3,7.8,26.9,93.9,96.1,abilene-christian,2022
2,Mahki Morris,33,33,805,8.4,18.5,0.453,6.0,10.4,0.573,2.4,8.1,0.299,6.0,7.1,0.843,6.6,3.5,3.3,0.1,2.6,6.3,25.1,116.0,92.9,abilene-christian,2022
3,Coryon Mason,32,29,850,7.7,17.6,0.437,5.4,11.5,0.472,2.2,6.0,0.37,6.7,8.4,0.797,4.6,3.3,2.2,0.3,2.7,5.8,24.3,113.2,96.7,abilene-christian,2022
4,Immanuel Allen,33,2,449,7.7,17.1,0.449,5.1,10.5,0.482,2.6,6.6,0.396,6.2,8.3,0.746,7.9,3.2,2.4,0.6,3.5,9.2,24.2,112.4,94.7,abilene-christian,2022


In [118]:
pp100_df[pp100_df['Team'] == 'alabama-birmingham']

Unnamed: 0,Player,G,GS,MP,FG,FGA,FG%,2P,2PA,2P%,3P,3PA,3P%,FT,FTA,FT%,TRB,AST,STL,BLK,TOV,PF,PTS,ORtg,DRtg,Team,Season
56,Jon Coleman,4,0,10,22.7,34.0,0.667,11.3,17.0,0.667,11.3,17.0,0.667,0.0,5.7,0.0,0.0,5.7,0.0,0.0,0.0,0.0,56.7,156.8,107.5,alabama-birmingham,2022
57,Elijah Tate,10,1,27,14.7,31.5,0.467,4.2,8.4,0.5,10.5,23.1,0.455,2.1,2.1,1.0,16.8,6.3,2.1,2.1,0.0,8.4,42.0,158.1,96.6,alabama-birmingham,2022
58,Jordan Walker,33,33,1044,11.7,29.0,0.403,5.4,13.6,0.398,6.2,15.4,0.406,7.0,7.9,0.883,5.1,8.7,2.7,0.1,6.3,3.4,36.5,111.5,97.2,alabama-birmingham,2022
59,Quan Jackson,34,33,980,9.1,18.7,0.485,8.3,15.6,0.535,0.8,3.2,0.236,4.7,6.7,0.698,11.7,3.9,4.6,0.9,3.6,3.3,23.6,109.5,88.5,alabama-birmingham,2022
60,Michael Ertel,33,11,832,8.4,19.6,0.431,4.2,8.7,0.477,4.3,10.9,0.394,2.0,2.7,0.769,2.9,3.4,3.1,0.0,1.6,3.8,23.2,120.4,98.3,alabama-birmingham,2022
61,Jamal Johnson,27,3,299,7.2,18.2,0.396,1.5,3.0,0.5,5.7,15.2,0.375,2.3,3.2,0.706,5.3,1.7,1.7,1.1,2.7,4.4,22.4,112.9,98.7,alabama-birmingham,2022
62,KJ Buffen,34,32,911,8.0,15.5,0.518,7.8,14.7,0.53,0.2,0.8,0.308,4.9,6.4,0.767,14.1,2.0,2.1,1.4,3.4,6.2,21.2,115.5,93.9,alabama-birmingham,2022
63,Tyreke Locure,8,1,87,7.8,19.5,0.4,4.6,8.5,0.538,3.3,11.1,0.294,2.0,3.9,0.5,6.5,5.9,5.2,0.0,5.2,3.9,20.9,94.6,90.9,alabama-birmingham,2022
64,Tavin Lovan,32,19,713,8.6,17.7,0.486,8.5,17.1,0.498,0.1,0.6,0.143,3.3,5.5,0.594,8.7,3.3,2.9,0.6,1.7,3.4,20.5,113.4,95.5,alabama-birmingham,2022
65,Justin Brown,30,1,275,6.2,15.7,0.395,1.4,2.9,0.5,4.7,12.8,0.371,2.1,2.7,0.769,9.7,1.4,1.4,0.2,1.2,3.3,19.2,126.2,98.8,alabama-birmingham,2022


---
### 1_transformed_data

- everything looks ok

In [120]:
fs_df = pd.read_pickle('/Users/sean/Documents/bracket_buster/bracket_buster_v2.0/1_transformed_data/season_2022_gamelog_final_stats_data.pkl')

In [121]:
fs_df.head()

Unnamed: 0,3Pp,ASTpg,BLKpg,Date,FGp,FTp,GameType,Home,ORBpg,Opp,PFpg,RBpg,STLpg,TOpg,Tm,W,Wp,Ws,pApg,ppg,sos
0,0.2642,9.2,3.6,2022-03-05,0.4042,0.7196,season2022,A,8.2,harvard,13.6,32.6,7.6,11.8,dartmouth,1.0,0.333333,8.0,66.0,67.0,-2.5
1,0.3284,13.2,2.8,2022-03-02,0.3736,0.728,season2022,N,10.2,tennessee-tech,18.4,28.4,6.2,13.6,austin-peay,0.0,0.428571,12.0,66.8,63.8,-3.13
2,0.3656,12.6,2.2,2022-03-08,0.4588,0.7282,season2022,N,11.4,wright-state,15.6,35.2,9.0,12.0,northern-kentucky,0.0,0.645161,20.0,58.6,71.6,-5.84
3,0.3426,12.6,2.0,2022-03-06,0.4706,0.7916,season2022,N,6.6,chattanooga,13.2,29.2,7.8,10.6,wofford,0.0,0.612903,19.0,66.6,71.8,-1.43
4,0.3696,12.2,1.0,2022-03-09,0.4574,0.7688,season2022,N,5.6,sacramento-state,18.4,28.8,2.4,12.0,idaho,0.0,0.3,9.0,81.8,74.8,-4.45


In [122]:
fs_df[fs_df['Tm'] == 'alabama-birmingham']

Unnamed: 0,3Pp,ASTpg,BLKpg,Date,FGp,FTp,GameType,Home,ORBpg,Opp,PFpg,RBpg,STLpg,TOpg,Tm,W,Wp,Ws,pApg,ppg,sos
349,0.4632,13.0,4.2,2022-03-12,0.4744,0.72,season2022,N,9.6,louisiana-tech,19.0,38.0,7.2,14.2,alabama-birmingham,1.0,0.787879,26.0,74.4,83.8,-0.71


In [163]:
gs_df = pd.read_pickle('/Users/sean/Documents/bracket_buster/bracket_buster_v2.0/1_transformed_data/season_2022_gamelog_stats_data.pkl')

In [164]:
gs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9381 entries, 0 to 9380
Data columns (total 21 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      9381 non-null   object 
 1   Home      9381 non-null   object 
 2   Opp       9381 non-null   object 
 3   W         9381 non-null   int64  
 4   sos       9381 non-null   float64
 5   GameType  9381 non-null   object 
 6   Ws        9381 non-null   float64
 7   Wp        9381 non-null   float64
 8   ppg       9381 non-null   float64
 9   pApg      9381 non-null   float64
 10  FGp       9381 non-null   float64
 11  3Pp       9381 non-null   float64
 12  FTp       9381 non-null   float64
 13  ORBpg     9381 non-null   float64
 14  RBpg      9381 non-null   float64
 15  ASTpg     9381 non-null   float64
 16  STLpg     9381 non-null   float64
 17  BLKpg     9381 non-null   float64
 18  TOpg      9381 non-null   float64
 19  PFpg      9381 non-null   float64
 20  Tm        9381 non-null   obje

In [124]:
gs_df[gs_df['Tm'] == 'alabama-birmingham']

Unnamed: 0,Date,Home,Opp,W,sos,GameType,Ws,Wp,ppg,pApg,FGp,3Pp,FTp,ORBpg,RBpg,ASTpg,STLpg,BLKpg,TOpg,PFpg,Tm
105,2021-11-25,N,new-mexico,1,-0.71,season2022,4.0,0.8,86.8,63.2,0.4838,0.3702,0.6498,10.4,33.8,15.8,14.6,3.8,11.0,16.6,alabama-birmingham
106,2021-11-26,N,san-francisco,0,-0.71,season2022,5.0,0.833333,83.6,62.4,0.4696,0.3672,0.684,11.0,34.6,16.0,15.2,4.0,13.8,17.4,alabama-birmingham
107,2021-12-01,H,east-tennessee-state,1,-0.71,season2022,5.0,0.714286,78.8,60.8,0.438,0.33,0.6966,11.6,35.8,13.4,14.2,4.2,13.6,17.2,alabama-birmingham
108,2021-12-04,A,saint-louis,1,-0.71,season2022,6.0,0.75,73.2,59.8,0.4216,0.293,0.7424,11.0,34.6,13.0,11.6,3.6,13.0,17.2,alabama-birmingham
109,2021-12-12,H,millsaps,1,-0.71,season2022,7.0,0.777778,76.0,61.0,0.4496,0.3166,0.7264,11.6,35.4,14.4,10.6,4.0,12.2,17.0,alabama-birmingham
110,2021-12-14,H,grambling,1,-0.71,season2022,8.0,0.8,79.4,58.6,0.4518,0.29,0.755,13.0,37.0,13.0,11.6,4.8,11.0,17.2,alabama-birmingham
111,2021-12-18,H,west-virginia,0,-0.71,season2022,9.0,0.818182,78.0,56.2,0.4416,0.3346,0.6958,12.2,36.8,12.4,11.2,6.0,9.2,16.0,alabama-birmingham
112,2021-12-22,H,mississippi-valley-state,1,-0.71,season2022,9.0,0.75,77.6,56.6,0.445,0.3298,0.6622,13.8,38.6,13.4,11.2,5.4,10.6,15.8,alabama-birmingham
113,2021-12-30,H,texas-el-paso,1,-0.71,season2022,10.0,0.769231,83.6,57.0,0.458,0.4262,0.6678,13.4,40.6,13.6,10.8,5.0,12.2,17.4,alabama-birmingham
114,2022-01-01,H,texas-san-antonio,1,-0.71,season2022,11.0,0.785714,83.2,55.0,0.4612,0.412,0.6754,12.0,40.4,12.6,10.0,5.2,12.0,15.4,alabama-birmingham


---
## 2_full_season_data



In [126]:
ppfd_df = pd.read_pickle('/Users/sean/Documents/bracket_buster/bracket_buster_v2.0/2_full_season_data/player_per100_full_data.pkl')

In [128]:
ppfd_df.head()

Unnamed: 0,Player,G,GS,MP,FG,FGA,FG%,2P,2PA,2P%,3P,3PA,3P%,FT,FTA,FT%,TRB,AST,STL,BLK,TOV,PF,PTS,ORtg,DRtg,Team,Season
0,Julian Edmonson,30,5,568,10.5,26.2,0.401,6.2,15.2,0.411,4.3,11.0,0.387,9.3,12.5,0.742,5.7,3.0,2.3,0.2,5.6,6.6,34.5,102.3,107.9,abilene-christian,2014
1,Jacob Lancaster,31,11,434,12.0,21.7,0.553,11.8,21.5,0.551,0.1,0.1,1.0,5.0,7.9,0.638,19.6,1.0,1.0,5.0,5.6,6.7,29.1,102.9,99.3,abilene-christian,2014
2,Parker Wentz,31,29,1040,7.7,15.7,0.491,3.2,5.6,0.576,4.5,10.1,0.444,4.5,5.2,0.87,3.9,4.9,2.8,0.1,3.4,4.0,24.5,124.7,107.5,abilene-christian,2014
3,LaDarrien Williams,16,14,474,8.1,21.8,0.371,6.4,16.3,0.389,1.7,5.5,0.318,6.5,9.1,0.712,10.5,7.7,2.4,0.5,4.2,5.2,24.4,101.2,104.1,abilene-christian,2014
4,Harrison Hawkins,16,16,481,6.8,18.1,0.374,3.7,10.4,0.353,3.1,7.6,0.403,7.5,8.8,0.847,5.0,6.3,2.1,0.0,6.0,5.8,24.1,100.9,108.5,abilene-christian,2014


In [135]:
ppfd_df[(ppfd_df['Team'] == 'alabama-birmingham') & (ppfd_df['Season'] == 2022)]

Unnamed: 0,Player,G,GS,MP,FG,FGA,FG%,2P,2PA,2P%,3P,3PA,3P%,FT,FTA,FT%,TRB,AST,STL,BLK,TOV,PF,PTS,ORtg,DRtg,Team,Season
38653,Jon Coleman,4,0,10,22.7,34.0,0.667,11.3,17.0,0.667,11.3,17.0,0.667,0.0,5.7,0.0,0.0,5.7,0.0,0.0,0.0,0.0,56.7,156.8,107.5,alabama-birmingham,2022
38654,Elijah Tate,10,1,27,14.7,31.5,0.467,4.2,8.4,0.5,10.5,23.1,0.455,2.1,2.1,1.0,16.8,6.3,2.1,2.1,0.0,8.4,42.0,158.1,96.6,alabama-birmingham,2022
38655,Jordan Walker,33,33,1044,11.7,29.0,0.403,5.4,13.6,0.398,6.2,15.4,0.406,7.0,7.9,0.883,5.1,8.7,2.7,0.1,6.3,3.4,36.5,111.5,97.2,alabama-birmingham,2022
38656,Quan Jackson,34,33,980,9.1,18.7,0.485,8.3,15.6,0.535,0.8,3.2,0.236,4.7,6.7,0.698,11.7,3.9,4.6,0.9,3.6,3.3,23.6,109.5,88.5,alabama-birmingham,2022
38657,Michael Ertel,33,11,832,8.4,19.6,0.431,4.2,8.7,0.477,4.3,10.9,0.394,2.0,2.7,0.769,2.9,3.4,3.1,0.0,1.6,3.8,23.2,120.4,98.3,alabama-birmingham,2022
38658,Jamal Johnson,27,3,299,7.2,18.2,0.396,1.5,3.0,0.5,5.7,15.2,0.375,2.3,3.2,0.706,5.3,1.7,1.7,1.1,2.7,4.4,22.4,112.9,98.7,alabama-birmingham,2022
38659,KJ Buffen,34,32,911,8.0,15.5,0.518,7.8,14.7,0.53,0.2,0.8,0.308,4.9,6.4,0.767,14.1,2.0,2.1,1.4,3.4,6.2,21.2,115.5,93.9,alabama-birmingham,2022
38660,Tyreke Locure,8,1,87,7.8,19.5,0.4,4.6,8.5,0.538,3.3,11.1,0.294,2.0,3.9,0.5,6.5,5.9,5.2,0.0,5.2,3.9,20.9,94.6,90.9,alabama-birmingham,2022
38661,Tavin Lovan,32,19,713,8.6,17.7,0.486,8.5,17.1,0.498,0.1,0.6,0.143,3.3,5.5,0.594,8.7,3.3,2.9,0.6,1.7,3.4,20.5,113.4,95.5,alabama-birmingham,2022
38662,Justin Brown,30,1,275,6.2,15.7,0.395,1.4,2.9,0.5,4.7,12.8,0.371,2.1,2.7,0.769,9.7,1.4,1.4,0.2,1.2,3.3,19.2,126.2,98.8,alabama-birmingham,2022


In [152]:
psfd_df = pd.read_pickle('/Users/sean/Documents/bracket_buster/bracket_buster_v2.0/2_full_season_data/player_stats_full.pkl')

In [153]:
psfd_df[(psfd_df['Team'] == 'alabama-birmingham') & (psfd_df['Season'] == 2022)]

Unnamed: 0,Player,G,GS,MP,FG,FGA,FG%,2P,2PA,2P%,3P,3PA,3P%,FT,FTA,FT%,TRB,AST,STL,BLK,TOV,PF,PTS,ORtg,DRtg,Team,Season,Class,Pos,Height
38655,Jon Coleman,4,0,10,22.7,34.0,0.667,11.3,17.0,0.667,11.3,17.0,0.667,0.0,5.7,0.0,0.0,5.7,0.0,0.0,0.0,0.0,56.7,156.8,107.5,alabama-birmingham,2022,2.0,G,70.0
38656,Elijah Tate,10,1,27,14.7,31.5,0.467,4.2,8.4,0.5,10.5,23.1,0.455,2.1,2.1,1.0,16.8,6.3,2.1,2.1,0.0,8.4,42.0,158.1,96.6,alabama-birmingham,2022,1.0,G,75.0
38657,Jordan Walker,33,33,1044,11.7,29.0,0.403,5.4,13.6,0.398,6.2,15.4,0.406,7.0,7.9,0.883,5.1,8.7,2.7,0.1,6.3,3.4,36.5,111.5,97.2,alabama-birmingham,2022,3.0,G,71.0
38658,Quan Jackson,34,33,980,9.1,18.7,0.485,8.3,15.6,0.535,0.8,3.2,0.236,4.7,6.7,0.698,11.7,3.9,4.6,0.9,3.6,3.3,23.6,109.5,88.5,alabama-birmingham,2022,4.0,G,76.0
38659,Michael Ertel,33,11,832,8.4,19.6,0.431,4.2,8.7,0.477,4.3,10.9,0.394,2.0,2.7,0.769,2.9,3.4,3.1,0.0,1.6,3.8,23.2,120.4,98.3,alabama-birmingham,2022,4.0,G,74.0
38660,Jamal Johnson,27,3,299,7.2,18.2,0.396,1.5,3.0,0.5,5.7,15.2,0.375,2.3,3.2,0.706,5.3,1.7,1.7,1.1,2.7,4.4,22.4,112.9,98.7,alabama-birmingham,2022,3.0,G,76.0
38661,KJ Buffen,34,32,911,8.0,15.5,0.518,7.8,14.7,0.53,0.2,0.8,0.308,4.9,6.4,0.767,14.1,2.0,2.1,1.4,3.4,6.2,21.2,115.5,93.9,alabama-birmingham,2022,3.0,F,79.0
38662,Tyreke Locure,8,1,87,7.8,19.5,0.4,4.6,8.5,0.538,3.3,11.1,0.294,2.0,3.9,0.5,6.5,5.9,5.2,0.0,5.2,3.9,20.9,94.6,90.9,alabama-birmingham,2022,2.0,G,72.0
38663,Tavin Lovan,32,19,713,8.6,17.7,0.486,8.5,17.1,0.498,0.1,0.6,0.143,3.3,5.5,0.594,8.7,3.3,2.9,0.6,1.7,3.4,20.5,113.4,95.5,alabama-birmingham,2022,3.0,G,76.0
38664,Justin Brown,30,1,275,6.2,15.7,0.395,1.4,2.9,0.5,4.7,12.8,0.371,2.1,2.7,0.769,9.7,1.4,1.4,0.2,1.2,3.3,19.2,126.2,98.8,alabama-birmingham,2022,4.0,G,78.0


In [154]:
for x, y in [('a', 'b'), ('c', 'd')]:
    print(x, y)

a b
c d


---
## Final Scores

In [176]:
final_scores_df = pd.read_pickle('/Users/sean/Documents/bracket_buster/bracket_buster_v2.0/0_scraped_data/season_2022_gamelog_data.pkl')

In [177]:
final_scores_df.head()

Unnamed: 0,G,Date,Home,Opp,W,Pts,PtsA,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,RB,AST,STL,BLK,TO,PF,Tm,sos,GameType
0,1,2021-11-09,A,utah,0,56,70,22,57,0.386,4,21,0.19,8,17,0.471,7,27,11,9,6,10,24,abilene-christian,-2.1,season2022
1,2,2021-11-12,A,texas-am,0,80,81,32,73,0.438,10,26,0.385,6,13,0.462,14,35,22,13,0,16,23,abilene-christian,-2.1,season2022
2,3,2021-11-16,A,texas-arlington,1,80,71,22,67,0.328,7,20,0.35,29,38,0.763,17,40,13,15,0,14,25,abilene-christian,-2.1,season2022
3,4,2021-11-27,H,mcmurry,1,89,54,34,64,0.531,5,20,0.25,16,24,0.667,11,36,24,17,2,14,18,abilene-christian,-2.1,season2022
4,5,2021-11-29,H,schreiner,1,88,47,34,67,0.507,6,21,0.286,14,16,0.875,15,33,19,18,5,16,21,abilene-christian,-2.1,season2022


In [178]:
final_scores_df[['Pts', 'PtsA']] = final_scores_df[['Pts', 'PtsA']].astype('float')

In [179]:
final_scores_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11204 entries, 0 to 11203
Data columns (total 26 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   G         11204 non-null  object 
 1   Date      11204 non-null  object 
 2   Home      11204 non-null  object 
 3   Opp       11204 non-null  object 
 4   W         11204 non-null  int64  
 5   Pts       11204 non-null  float64
 6   PtsA      11204 non-null  float64
 7   FG        11204 non-null  object 
 8   FGA       11204 non-null  object 
 9   FG%       11204 non-null  object 
 10  3P        11204 non-null  object 
 11  3PA       11204 non-null  object 
 12  3P%       11204 non-null  object 
 13  FT        11204 non-null  object 
 14  FTA       11204 non-null  object 
 15  FT%       11202 non-null  object 
 16  ORB       11204 non-null  object 
 17  RB        11204 non-null  object 
 18  AST       11204 non-null  object 
 19  STL       11204 non-null  object 
 20  BLK       11204 non-null  ob

In [180]:
final_scores_df[['Tm', 'Pts', 'PtsA']].head()

Unnamed: 0,Tm,Pts,PtsA
0,abilene-christian,56.0,70.0
1,abilene-christian,80.0,81.0
2,abilene-christian,80.0,71.0
3,abilene-christian,89.0,54.0
4,abilene-christian,88.0,47.0


In [183]:
teams = ['gonzaga', 'arizona', 'auburn', 'alabama', 'kansas', 'duke', 'houston']
final_scores_df[final_scores_df['Tm'].isin(teams)][['Tm', 'Pts', 'PtsA']].groupby(by='Tm').mean()

Unnamed: 0_level_0,Pts,PtsA
Tm,Unnamed: 1_level_1,Unnamed: 2_level_1
alabama,79.96875,76.40625
arizona,84.558824,67.529412
auburn,78.71875,67.03125
duke,80.176471,67.147059
gonzaga,87.827586,65.344828
houston,75.823529,58.882353
kansas,78.588235,68.147059
