## Bracket Challenge Predictor

Reading data

In [393]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
import re

from functools import reduce
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold

from xgboost import XGBRegressor

## Data Preprocessing

In [511]:
team_matchups = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/Tournament Matchups.csv")

away_stats = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/Barttorvik Away.csv")
home_stats = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/Barttorvik Home.csv")
neutral_stats = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/Barttorvik Neutral.csv")

conf_perf = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/Conference Results.csv")

team_res = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/Team Results.csv")
team_hist = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/Resumes.csv")
team_v_ranked = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/TeamRankings.csv")

ken_eff = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/INT _ KenPom _ Efficiency.csv")
ken_height = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/INT _ KenPom _ Height.csv")
ken_conf = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/DEV _ March Madness.csv")

cur_coaches = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/REF _ Current NCAAM Coaches.csv")
coach_perf = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/Coach Results.csv")

seed_res = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/Seed Results.csv")
upset_count = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/Upset Count.csv")
upset_inf = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/Upset Seed Info.csv")

  ken_conf = pd.read_csv("/Users/nicholaskim/Documents/Repositories/bracket challenge/data/DEV _ March Madness.csv")


since ken_ datasets don't have TEAMNO var will first create seperate lookup table to create a column for its respective id

In [512]:
team_ids = team_matchups[['YEAR','TEAM','TEAM NO']].drop_duplicates()

team_res.rename(columns={'TEAM ID':'TEAM NO'},inplace=True)

ken_eff.rename(columns={'Season':'YEAR','Team':'TEAM'},inplace=True)
ken_height.rename(columns={'Season':'YEAR','TeamName':'TEAM'},inplace=True)

ken_eff = ken_eff.merge(team_ids,on=['YEAR','TEAM'],how='left')
ken_height = ken_height.merge(team_ids,on=['YEAR','TEAM'],how='left')

In [513]:
temp_away = away_stats.loc[:,['YEAR','TEAM NO','BADJ O','BADJ D','WIN%','EFG%','EFG%D','PPPO','PPPD']]
temp_home = home_stats.loc[:,['YEAR','TEAM NO','BADJ O','BADJ D','WIN%','EFG%','EFG%D','PPPO','PPPD']]
temp_neutral = neutral_stats.loc[:,['YEAR','TEAM NO','BADJ O','BADJ D','WIN%','EFG%','EFG%D','PPPO','PPPD']]

#adds _A to all columns to represent Away
temp_away.columns = list(temp_away.columns[:2]) + [col + "_A" for col in temp_away.columns[2:]]

#_H for Home
temp_home.columns = list(temp_home.columns[:2]) + [col + "_H" for col in temp_home.columns[2:]]

#_N for Neutral
temp_neutral.columns = list(temp_neutral.columns[:2]) + [col + "_N" for col in temp_neutral.columns[2:]]


#combined stats as they are highly correlated away, home, and neutral
#function to create new combined dataframe easily
def var_mean(dfs, vars):
    combined_df = [dfs[0].loc[:,vars],dfs[1].loc[:,vars],dfs[2].loc[:,vars]]
    combined_df = reduce(lambda left, right: pd.merge(left, right, on=['YEAR','TEAM NO']),combined_df)
    for var in vars[2:]:
        combined_df[var] = combined_df[[var+'_x',var+'_y',var]].mean(axis=1)
        combined_df.drop([var+'_x',var+'_y'],axis=1,inplace=True)
    return combined_df

vars = ['YEAR','TEAM NO','BADJ EM','FTR','FTRD',
        'BADJ T','TOV%','TOV%D','OREB%','OP OREB%','RAW T','WAB',
        '2PT%', '2PT%D', '3PT%', '3PT%D', 'BLK%', 'BLKED%',
        'AST%', 'OP AST%', '2PTR', '3PTR', '2PTRD', '3PTRD','ELITE SOS']
dfs = [away_stats,home_stats,neutral_stats]

team_stats = var_mean(dfs,vars)

temp_res = team_res.loc[:,['TEAM','PAKE','PASE','R64','R32','S16','E8','F4',
                           'F2','CHAMP','TOP2']]

temp_his = team_hist.drop(['TEAM','SEED','ROUND','Q1 PLUS Q2 W','B POWER','BID TYPE'],axis=1)

temp_v_ranked = team_v_ranked.loc[:,['YEAR','TEAM NO','TR RATING','V 1-25 WINS','V 1-25 LOSS',
                                     'V 26-50 WINS','V 26-50 LOSS','LUCK RATING','CONSISTENCY TR RATING']]

temp_ken_eff = ken_eff.loc[:,['YEAR','TEAM NO','Adjusted Offensive Efficiency','Adjusted Defensive Efficiency',
                              'Avg Possession Length (Offense)','Avg Possession Length (Defense)']]

temp_height = ken_height.loc[:,['YEAR','TEAM NO','AvgHeight','EffectiveHeight','Experience','Bench']]


Creating supplmental dataset that aren't dependent on team metrics (i.e: conference, coach, upsets)

For conference we'll just use the conf_perf dataset with selected variables

In [415]:
conf_perf = conf_perf.drop(['GAMES','W','L'],axis=1)

In [472]:
team_ids.columns

Index(['YEAR', 'TEAM', 'TEAM NO'], dtype='object')

For coaches will create a column in coach_perf on the years and Teams they've coaches respectively

In [517]:
ken_conf[['YEAR','TEAM','COACH','Since']]

Unnamed: 0,YEAR,TEAM,COACH,Since
2,2015,Kentucky Wildcats,Mark Pope,202425.0
3,2021,Gonzaga Bulldogs,Mark Few,199900.0
4,2024,UConn Huskies,Dan Hurley,201819.0
6,2008,Kansas Jayhawks,Bill Self,200304.0
8,2019,Virginia Cavaliers,Ron Sanchez,202425.0
...,...,...,...,...
8302,2008,NJIT Highlanders,Grant Billmeier,202324.0
8304,2009,NJIT Highlanders,Grant Billmeier,202324.0
8308,2015,Grambling State Tigers,Donte Jackson,201718.0
8311,2021,Mississippi Valley State Delta Devils,George Ivory,202223.0


In [520]:
ken_conf.rename(columns={'Season':'YEAR','Full Team Name':'TEAM'},inplace=True)

#filtering data to match years in other datasets
coach_stats = ken_conf[(ken_conf['YEAR']<=2024) & (ken_conf['YEAR']>=2008) & (ken_conf['YEAR'] != 2020)]

#dictionary to remap Team Name is consistent form
team_name_mapping = {
    'Abilene Christian Wildcats': 'Abilene Christian',
    'Akron Zips': 'Akron',
    'Alabama Crimson Tide': 'Alabama',
    'Albany Great Danes': 'Albany',
    'American Eagles': 'American',
    'Arizona Wildcats': 'Arizona',
    'Arizona State Sun Devils': 'Arizona St.',
    'Arkansas Razorbacks': 'Arkansas',
    'Arkansas–Pine Bluff Golden Lions': 'Arkansas Pine Bluff',
    'Auburn Tigers': 'Auburn',
    'Austin Peay Governors': 'Austin Peay',
    'BYU Cougars': 'BYU',
    'Baylor Bears': 'Baylor',
    'Belmont Bruins': 'Belmont',
    'Binghamton Bearcats': 'Binghamton',
    'Boise State Broncos': 'Boise St.',
    'Boston College Eagles': 'Boston College',
    'Boston University Terriers': 'Boston University',
    'Bradley Braves': 'Bradley',
    'Bucknell Bison': 'Bucknell',
    'Buffalo Bulls': 'Buffalo',
    'Butler Bulldogs': 'Butler',
    'Cal Poly Mustangs': 'Cal Poly',
    'Cal State Bakersfield Roadrunners': 'Cal St. Bakersfield',
    'Cal State Fullerton Titans': 'Cal St. Fullerton',
    'Cal State Northridge Matadors': 'Cal St. Northridge',
    'California Golden Bears': 'California',
    'Chattanooga Mocs': 'Chattanooga',
    'Cincinnati Bearcats': 'Cincinnati',
    'Clemson Tigers': 'Clemson',
    'Cleveland State Vikings': 'Cleveland St.',
    'Coastal Carolina Chanticleers': 'Coastal Carolina',
    'Colgate Raiders': 'Colgate',
    'Charleston Cougars': 'College of Charleston',
    'Colorado Buffaloes': 'Colorado',
    'Colorado State Rams': 'Colorado St.',
    'UConn Huskies': 'Connecticut',
    'Cornell Big Red': 'Cornell',
    'Creighton Bluejays': 'Creighton',
    'Davidson Wildcats': 'Davidson',
    'Dayton Flyers': 'Dayton',
    "Delaware Fightin' Blue Hens": 'Delaware',
    'Detroit Mercy Titans': 'Detroit',
    'Drake Bulldogs': 'Drake',
    'Drexel Dragons': 'Drexel',
    'Duke Blue Devils': 'Duke',
    'Duquesne Dukes': 'Duquesne',
    'East Tennessee State Buccaneers': 'East Tennessee St.',
    'Eastern Kentucky Colonels': 'Eastern Kentucky',
    'Eastern Washington Wagles': 'Eastern Washington',
    'Fairleigh Dickinson Knights': 'Fairleigh Dickinson',
    'Florida Gators': 'Florida',
    'Florida Atlantic Owls': 'Florida Atlantic',
    'Florida Gulf Coast Eagles': 'Florida Gulf Coast',
    'Florida State Seminoles': 'Florida St.',
    'Fresno Stata Bulldogs': 'Fresno St.',
    'Furman Paladins': 'Furman',
    "Gardner–Webb Runnin' Bulldogs": 'Gardner Webb',
    'George Mason Patriots': 'George Mason',
    'George Washington Revolutionaries': 'George Washington',
    'Georgetown Hoyas': 'Georgetown',
    'Georgia Bulldogs': 'Georgia',
    'Georgia State Panthers': 'Georgia St.',
    'Georgia Tech Yellow Jackets': 'Georgia Tech',
    'Gonzaga Bulldogs': 'Gonzaga',
    'Grambling State Tigers': 'Grambling St.',
    'Grand Canyon Antelopes': 'Grand Canyon',
    'Green Bay Phoenix': 'Green Bay',
    'Hampton Pirates': 'Hampton',
    'Harvard Crimson': 'Harvard',
    'Hawaii Rainbow Warriors': 'Hawaii',
    'Holy Cross Crusaders': 'Holy Cross',
    'Houston Cougars': 'Houston',
    'Howard Bison': 'Howard',
    'Illinois Fighting Illini': 'Illinois',
    'Indiana Hoosiers': 'Indiana',
    'Indiana State Sycamores': 'Indiana St.',
    'Iona Gaels': 'Iona',
    'Iowa Hawkeyes': 'Iowa',
    'Iowa State Cyclones': 'Iowa St.',
    'Jacksonville State Tigers': 'Jacksonville St.',
    'James Madison Dukes': 'James Madison',
    'Kansas Jayhawks': 'Kansas',
    'Kansas State Wildcats': 'Kansas St.',
    'Kennesaw State Owls': 'Kennesaw St.',
    'Kent State Golden Flashes': 'Kent St.',
    'Kentucky Wildcats': 'Kentucky',
    'LIU Sharks': 'LIU Brooklyn',
    'LSU Tigers': 'LSU',
    'La Salle Explorers': 'La Salle',
    'Lafayette Leopards': 'Lafayette',
    'Lehigh Mountain Hawks': 'Lehigh',
    'Liberty Flames': 'Liberty',
    'Lipscomb Bisons': 'Lipscomb',
    'Little Rock Trojans': 'Little Rock',
    'Long Beach State Beach': 'Long Beach St.',
    'Longwood Lancers': 'Longwood',
    "Louisiana Ragin' Cajuns": 'Louisiana Lafayette',
    'Louisville Cardinals': 'Louisville',
    'Loyola Chicago Ramblers': 'Loyola Chicago',
    'Loyola (Maryland) Greyhounds': 'Loyola MD',
    'Manhattan Jaspers': 'Manhattan',
    'Marquette Golden Eagles': 'Marquette',
    'Marshall Thundering Herd': 'Marshall',
    'Maryland Terrapins': 'Maryland',
    'UMass Minutemen': 'Massachusetts',
    'McNeese Cowboys': 'McNeese St.',
    'Memphis Tigers': 'Memphis',
    'Mercer Bears': 'Mercer',
    'Miami Hurricanes': 'Miami FL',
    'Michigan Wolverines': 'Michigan',
    'Michigan State Spartans': 'Michigan St.',
    'Middle Tennessee Blue Raiders': 'Middle Tennessee',
    'Milwaukee Panthers': 'Milwaukee',
    'Minnesota Golden Gophers': 'Minnesota',
    'Ole Miss Rebels': 'Mississippi',
    'Mississippi State Bulldogs': 'Mississippi St.',
    'Mississippi Valley State Delta Devils': 'Mississippi Valley St.',
    'Missouri Tigers': 'Missouri',
    'Montana Grizzlies': 'Montana',
    'Montana State Bobcats': 'Montana St.',
    'Morehead State Eagles': 'Morehead St.',
    'Morgan State Bears': 'Morgan St.',
    "Mount St. Mary's Mountaineers": "Mount St. Mary's",
    'Murray State Racers': 'Murray St.',
    'Nebraska Cornhuskers': 'Nebraska',
    'Nevada Wolf Pack': 'Nevada',
    'New Mexico Lobos': 'New Mexico',
    'New Mexico State Aggies': 'New Mexico St.',
    'Norfolk State Spartans': 'Norfolk St.',
    'North Carolina Tar Heels': 'North Carolina',
    'North Carolina A&T Aggies': 'North Carolina A&T',
    'North Carolina Central Eagles': 'North Carolina Central',
    'NC State Wolfpack': 'North Carolina St.',
    'North Dakota Fighting Hawks': 'North Dakota',
    'North Dakota State Bison': 'North Dakota St.',
    'North Texas Mean Green': 'North Texas',
    'Northeastern Huskies': 'Northeastern',
    'Northern Colorado Bears': 'Northern Colorado',
    'Northern Iowa Panthers': 'Northern Iowa',
    'Northern Kentucky Norse': 'Northern Kentucky',
    'Northwestern Wildcats': 'Northwestern',
    'Northwestern State Demons': 'Northwestern St.',
    'Notre Dame Fighting Irish': 'Notre Dame',
    'Oakland Golden Grizzlies': 'Oakland',
    'Ohio Bobcats': 'Ohio',
    'Ohio State Buckeyes': 'Ohio St.',
    'Oklahoma Sooners': 'Oklahoma',
    'Oklahoma State Cowboys': 'Oklahoma St.',
    'Old Dominion Monarchs': 'Old Dominion',
    'Oral Roberts Golden Eagles': 'Oral Roberts',
    'Oregon Ducks': 'Oregon',
    'Oregon State Beavers': 'Oregon St.',
    'Pacific Tigers': 'Pacific',
    'Penn Quakers': 'Penn',
    'Penn State Nittany Lions': 'Penn St.',
    'Pittsburgh Panthers': 'Pittsburgh',
    'Portland State Vikings': 'Portland St.',
    'Princeton Tigers': 'Princeton',
    'Providence Friars': 'Providence',
    'Purdue Boilermakers': 'Purdue',
    'Radford Highlanders': 'Radford',
    'Rhode Island Rams': 'Rhode Island',
    'Richmond Spiders': 'Richmond',
    'Robert Morris Colonials': 'Robert Morris',
    'Rutgers Scarlet Knights': 'Rutgers',
    'SMU Mustangs': 'SMU',
    "Saint Joseph's Hawks": "Saint Joseph's",
    'Saint Louis Billikens': 'Saint Louis',
    "Saint Mary's Gaels": "Saint Mary's",
    "Saint Peter's Peacocks": "Saint Peter's",
    'Sam Houston Bearkats': 'Sam Houston St.',
    'Samford Bulldogs': 'Samford',
    'San Diego Toreros': 'San Diego',
    'San Diego State Aztecs': 'San Diego St.',
    'San Francisco Dons': 'San Francisco',
    'Seton Hall Pirates': 'Seton Hall',
    'Siena Saints': 'Siena',
    'South Alabama Jaguars': 'South Alabama',
    'South Carolina Gamecocks': 'South Carolina',
    'South Dakota State Bulldogs': 'South Dakota St.',
    'South Florida Bulls': 'South Florida',
    'Southern Jaguars': 'Southern',
    'Southern Miss Golden Eagles': 'Southern Miss',
    'St. Bonaventure Bonnies': 'St. Bonaventure',
    "St. John's Red Storm": "St. John's",
    'Stanford Cardinal': 'Stanford',
    'Stephen F. Austin Lumberjacks': 'Stephen F. Austin',
    'Stetson Hatters': 'Stetson',
    'Stony Brook Seawolves': 'Stony Brook',
    'Syracuse Orange': 'Syracuse',
    'TCU Horned Frogs': 'TCU',
    'Temple Owls': 'Temple',
    'Tennessee Volunteers': 'Tennessee',
    'Texas Longhorns': 'Texas',
    'Texas A&M Aggies': 'Texas A&M',
    'Texas A&M-Corpus Chris Islanders': 'Texas A&M Corpus Chris',
    'Texas Southern Tigers': 'Texas Southern',
    'Texas Tech Red Raiders': 'Texas Tech',
    'Troy Trojans': 'Troy',
    'Tulsa Golden Hurricane': 'Tulsa',
    'UAB Blazers': 'UAB',
    'UC Davis Aggies': 'UC Davis',
    'UC Irvine Anteaters': 'UC Irvine',
    'UC Santa Barbara Gauchos': 'UC Santa Barbara',
    'UCF Knights': 'UCF',
    'UCLA Bruins': 'UCLA',
    'UMBC Retrievers': 'UMBC',
    'UNC Asheville Bulldogs': 'UNC Asheville',
    'UNC Greensboro Spartans': 'UNC Greensboro',
    'UNC Wilmington Seahawks': 'UNC Wilmington',
    "UNLV Runnin' Rebels": 'UNLV',
    'USC Trojans': 'USC',
    'UT Arlington Mavericks': 'UT Arlington',
    'UTEP Miners': 'UTEP',
    'UTSA Roadrunners': 'UTSA',
    'Utah Utes': 'Utah',
    'Utah State Aggies': 'Utah St.',
    'VCU Rams': 'VCU',
    'Valparaiso Beacons': 'Valparaiso',
    'Vanderbilt Commodores': 'Vanderbilt',
    'Vermont Catamounts': 'Vermont',
    'Villanova Wildcats': 'Villanova',
    'Virginia Cavaliers': 'Virginia',
    'Virginia Tech Hokies': 'Virginia Tech',
    'Wagner Seahawks': 'Wagner',
    'Wake Forest Demon Deacons': 'Wake Forest',
    'Washington Huskies': 'Washington',
    'Washington State Cougars': 'Washington St.',
    'Weber State Wildcats': 'Weber St.',
    'West Virginia Mountaineers': 'West Virginia',
    'Western Kentucky Hilltoppers': 'Western Kentucky',
    'Western Michigan Broncos': 'Western Michigan',
    'Wichita State Shockers': 'Wichita St.',
    'Winthrop Eagles': 'Winthrop',
    'Wisconsin Badgers': 'Wisconsin',
    'Wofford Terriers': 'Wofford',
    'Wright State Raiders': 'Wright St.',
    'Wyoming Cowboys': 'Wyoming',
    'Xavier Musketeers': 'Xavier',
    'Yale Bulldogs': 'Yale'
}

#need to manually add Hartford Hawks as not in dataset
hart = {'YEAR': 2021,
        'TEAM': 'Hartford'}

coach_stats['TEAM'] = coach_stats['TEAM'].replace(team_name_mapping)

coach_stats = pd.concat([coach_stats,pd.DataFrame([hart])],ignore_index=True)

#converting 'Active Coaching Length' to a integer value

coach_stats['Active Coaching Length'] = (
    ken_conf['Active Coaching Length']
    .astype(str)  # Convert everything to string to avoid NaN issues
    .str.extract(r'(\d+)')  # Extract the first number
    .astype(float)  # Convert to float (or use .astype(int) if no NaNs)
)

#now merging the TEAM NO to this dataset
coach_stats = coach_stats.loc[:,['YEAR','TEAM']].merge(team_ids,on=['YEAR','TEAM'],how='left')

#removing nan on TEAM NO as that means they weren't in the tournament that year
coach_stats = coach_stats.dropna(subset=['TEAM NO'])

coach_stats.head()

Unnamed: 0,YEAR,TEAM,TEAM NO
0,2015,Kentucky,510.0
1,2021,Gonzaga,856.0
2,2024,Connecticut,1067.0
3,2008,Kansas,43.0
4,2019,Virginia,745.0


Now need to add columns on years coached, for what team to then merge on 'coach_per' statisics which are of interest

In [None]:
coach_perf['TEAM'] = [['North Carolina'],['Michigan St.'],['Butler'],['Memphis','Kentucky'],['Creighton','Oregon'],['Syracuse'],['Michigan'],
                      ['Villanova'],['Nevada','Arkansas'],['Connecticut'],['Loyola Chicago','Oklahoma'],['Kansas St.','South Carolina','Massachusetts'],
                      ['Wagner','Rhode Island','Connecticut'],['Kansas'],['George Mason','Miami FL'],['North Carolina'],['Gonzaga'],['Texas Tech','Texas'],
                      ['Florida'],['Connecticut',"Saint Joseph's"],["Saint Peter's",'Seton Hall'],['Florida Atlantic'],['Louisville','Iona'],['Duke'],
                      ['Xavier','Arizona','Xavier'],['Wisconsin'],['Ohio','Illinois','Akron'],['Dayton','Indiana','Rhode Island'],['San Diego St.'],
                      ['Oklahoma','Pittsburgh'],[]]
coach_perf['YRS'] = [['2008-2021'],['2008-2024'],['2008-2013'],['2008-2009','2010-2024'],['2008-2010','2011-2024'],['2008-2024'],['2008-2019'],
                     ['2008-2024'],['2015-2019','2021-2024'],['2012-2018'],['2011-2021','2021-2024'],['2008-2012','2013-2022','2023-2024'],
                     ['2010-2012','2013-2018','2019-2024'],['2008-2024'],['2008-2011','2012-2024'],['2022-2024'],['2008-2024'],['2016-2021','2022-2023'],
                     ['2008-2015'],['2008-2012','2018-2021'],['2018-2022','2023-2024'],['2018-2024'],['2008-2017','2021-2023'],['2008-2022'],
                     ['2008-2009','2010-2021','2023-2024'],['2008-2015'],['2009-2012','2013-2017','2018-2024'],['2012-2017','2018-2021','2023-2024'],['2018-2024'],
                     ['2008-2011','2019-2024'],[]]

Unnamed: 0,Year,TEAM NOS
0,2024,"[1044, 133]"
1,2025,100


Created a column on who won each matchup for every game in the dataset. Will now add columns of interest to this dataset and will try to do feature engineering as to model who'd win each matchup.

In [367]:
wins = []
for i in range(len(team_matchups)//2):
    if team_matchups.iloc[i,8] > team_matchups.iloc[i+1,8]:
        wins.append(1)
        wins.append(0)
    else:
        wins.append(0)
        wins.append(1)
    i += 2

comp_stats = team_matchups.iloc[:,[0,3,4,5,6,7,8]]
comp_stats['WIN'] = wins
        
comp_stats.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comp_stats['WIN'] = wins


Unnamed: 0,YEAR,TEAM NO,TEAM,SEED,ROUND,CURRENT ROUND,SCORE,WIN
0,2024,1067,Connecticut,1,1,64,91,1
1,2024,1026,Stetson,16,64,64,52,0
2,2024,1060,Florida Atlantic,8,64,64,65,0
3,2024,1036,Northwestern,9,32,64,77,1
4,2024,1029,San Diego St.,5,16,64,69,0


In [368]:
dfs = [comp_stats.drop('SCORE',axis=1),temp_away,temp_home,temp_neutral,temp_v_ranked,temp_height,temp_ken_eff,temp_his,team_stats]

comp_team_stats = reduce(lambda left, right: pd.merge(left, right, on=['YEAR','TEAM NO']),dfs)

comp_team_stats = comp_team_stats.merge(temp_res,on='TEAM',how='left')

comp_team_stats.head()

Unnamed: 0,YEAR,TEAM NO,TEAM,SEED,ROUND,CURRENT ROUND,WIN,BADJ O_A,BADJ D_A,WIN%_A,EFG%_A,EFG%D_A,PPPO_A,PPPD_A,BADJ O_H,BADJ D_H,WIN%_H,EFG%_H,EFG%D_H,PPPO_H,PPPD_H,BADJ O_N,BADJ D_N,WIN%_N,EFG%_N,EFG%D_N,PPPO_N,PPPD_N,TR RATING,V 1-25 WINS,V 1-25 LOSS,V 26-50 WINS,V 26-50 LOSS,LUCK RATING,CONSISTENCY TR RATING,AvgHeight,EffectiveHeight,Experience,Bench,Adjusted Offensive Efficiency,Adjusted Defensive Efficiency,Avg Possession Length (Offense),Avg Possession Length (Defense),NET RPI,RESUME,WAB RANK,ELO,Q1 W,Q2 W,Q3 Q4 L,PLUS 500,R SCORE,BADJ EM,FTR,FTRD,BADJ T,TOV%,TOV%D,OREB%,OP OREB%,RAW T,WAB,2PT%,2PT%D,3PT%,3PT%D,BLK%,BLKED%,AST%,OP AST%,2PTR,3PTR,2PTRD,3PTRD,ELITE SOS,PAKE,PASE,R64,R32,S16,E8,F4,F2,CHAMP,TOP2
0,2024,1067,Connecticut,1,1,64,1,129.5,101.7,75.0,56.6,50.6,1.206,1.109,123.7,89.4,100.0,57.5,41.4,1.261,0.881,126.7,88.5,100.0,57.0,44.2,1.195,0.954,22.3,10,2,4,0,2.4,10.8,78.42,1.53,2.43106,28.14,127.5,91.1,18.6,17.5,2,2,1,1,13,8,0,29,99.98,33.433333,32.6,32.6,65.433333,14.733333,15.533333,35.2,26.1,66.266667,3.766667,58.933333,44.266667,36.033333,31.666667,13.633333,8.233333,65.2,46.866667,59.4,40.6,67.666667,32.333333,37.840667,10.8,13.3,10,6,5,5,5,4,4,2
1,2024,1026,Stetson,16,64,64,0,106.0,115.4,37.5,49.9,54.9,1.047,1.152,110.0,113.4,84.615385,56.6,50.2,1.171,1.097,120.9,109.0,100.0,61.5,42.5,1.253,1.028,-2.0,0,1,0,1,2.7,12.7,77.68,1.34,1.78601,22.03,109.8,114.3,18.4,17.5,209,212,137,157,1,0,8,9,0.0,-0.3,28.166667,22.6,65.6,16.3,12.566667,29.966667,32.633333,65.5,-2.28,55.8,50.3,37.566667,31.9,7.933333,8.8,52.3,48.1,58.066667,41.933333,60.233333,39.766667,9.814,0.0,0.0,1,0,0,0,0,0,0,0
2,2024,1060,Florida Atlantic,8,64,64,0,114.1,108.2,50.0,51.7,52.3,1.144,1.111,118.1,101.6,92.857143,56.5,47.2,1.213,0.987,120.9,97.9,77.777778,56.6,50.6,1.147,1.045,12.7,1,1,1,0,-0.1,9.9,76.55,-0.32,2.57785,32.11,117.8,102.1,16.5,18.3,38,63,40,37,2,8,3,17,62.2,15.133333,35.166667,28.166667,69.066667,15.966667,16.533333,33.166667,29.5,69.1,0.4,55.966667,49.4,35.466667,34.3,7.6,6.8,50.466667,38.4,61.433333,38.566667,64.133333,35.866667,22.516333,2.8,2.7,2,1,1,1,1,0,0,0
3,2024,1036,Northwestern,9,32,64,1,112.4,97.9,36.363636,49.5,51.8,1.027,1.088,121.5,103.7,88.235294,56.6,52.3,1.212,1.05,107.6,87.6,50.0,48.3,48.0,0.999,0.952,11.7,3,5,3,3,1.5,10.6,77.59,0.43,2.51412,24.66,115.9,99.5,18.9,18.1,52,43,33,54,4,5,1,10,76.47,17.433333,27.4,39.633333,64.433333,14.4,18.666667,25.933333,27.233333,64.4,0.503333,48.166667,49.1,37.933333,35.4,10.5,9.9,56.7,56.533333,62.266667,37.733333,62.533333,37.466667,32.679333,0.9,0.8,3,3,0,0,0,0,0,0
4,2024,1029,San Diego St.,5,16,64,0,110.8,96.2,42.857143,48.1,49.4,1.051,1.044,110.1,90.7,92.307692,52.4,46.2,1.095,0.911,119.3,97.3,80.0,48.0,44.5,1.155,1.023,13.1,2,1,2,5,0.7,10.2,77.56,0.6,2.56787,29.61,113.4,94.0,17.7,18.3,20,33,24,29,4,6,0,15,97.98,18.666667,40.3,33.133333,66.7,14.866667,17.2,32.633333,28.933333,66.9,0.866667,50.966667,48.133333,31.566667,29.633333,12.2,8.533333,52.0,49.066667,63.2,36.8,59.766667,40.233333,31.405667,0.8,1.1,11,6,4,1,1,1,0,1


In [399]:
comp_team_stats.to_csv("teamstats")

## Model Testing