In [2]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import numpy
import time
import os
import numpy as np
import tensorflow as tf
import keras
import matplotlib.pyplot as plt
import random


pd.option_context('display.max_rows', None, 'display.max_columns', None)

<pandas._config.config.option_context at 0x28e3d5640>

In [3]:
def getSeasonOffStats(year):
  response = requests.get("https://www.sports-reference.com/cbb/seasons/men/"+ str(year) + "-advanced-school-stats.html")
  time.sleep(2.1)
  soup = bs(response.text, "html.parser")
  bsProduct = soup.find_all("table", attrs={"class": "sortable stats_table"})
  bsTable = bsProduct[0]
  bsTableRows = bsTable.find_all("tr")

  schools = []
  for row in bsTableRows:
    if row.find("th", attrs={"class": "right"}):
      schools.append(row)

  schoolStats = []
  for i in schools:

    dataDict = {}
    for j in i.find_all(["th", "td"]):
      attrName = j.get("data-stat")
      attrValue = j.contents

      if attrName == 'school_name':
        dataDict["school_name"] = attrValue[0].contents[0]
        dataDict["link"] = attrValue[0].get("href")
        if len(attrValue) > 1:
          dataDict["Tourney"] = True
        else:
          dataDict["Tourney"] = False
      elif attrName == 'ranker':
        pass
      elif len(attrValue) > 0:
        dataDict[attrName] = attrValue[0]

    schoolStats.append(dataDict)

  df = pd.DataFrame(schoolStats)
  return df


In [4]:
def getSeasonDefStats(year):
  response = requests.get("https://www.sports-reference.com/cbb/seasons/men/"+ str(year) + "-advanced-opponent-stats.html")
  time.sleep(2.1)
  soup = bs(response.text, "html.parser")
  bsProduct = soup.find_all("table", attrs={"class": "sortable stats_table"})
  bsTable = bsProduct[0]
  bsTableRows = bsTable.find_all("tr")

  schools = []
  for row in bsTableRows:
    if row.find("th", attrs={"class": "right"}):
      schools.append(row)


  schoolStats = []
  for i in schools:

    dataDict = {}
    for j in i.find_all(["th", "td"]):
      attrName = j.get("data-stat")
      attrValue = j.contents

      if attrName == 'school_name':
        dataDict["school_name"] = attrValue[0].contents[0]
        dataDict["link"] = attrValue[0].get("href")
        if len(attrValue) > 1:
          dataDict["Tourney"] = True
        else:
          dataDict["Tourney"] = False
      elif attrName == 'ranker':
        pass
      elif len(attrValue) > 0:
        dataDict[attrName] = attrValue[0]

    schoolStats.append(dataDict)

  df = pd.DataFrame(schoolStats)
  return df


In [5]:
def getSeasonStatsCSV(year):

  stats = pd.read_csv(str(year)+ "/SeasonStats.csv", index_col=0)
  return stats

def saveSeasonStats(year, df):
  try: 
    os.mkdir(str(year))
  except FileExistsError:
    pass
  df.to_csv(str(year) + "/SeasonStats.csv")


In [6]:
def getSeasonStats(year):

  try: 
    return getSeasonStatsCSV(year)
  except FileNotFoundError:
    pass 

  df1 = getSeasonOffStats(year)
  df2 = getSeasonDefStats(year)
  dfNew = df1.join(df2, rsuffix="DROPPY").filter(regex="^(?!.*DROPPY)")
  saveSeasonStats(year, dfNew)
  return dfNew

print(getSeasonStats(2024).to_string())

                   school_name                                                       link  Tourney   g  wins  losses  win_loss_pct    srs    sos  wins_conf  losses_conf  wins_home  losses_home  wins_visitor  losses_visitor   pts  opp_pts  pace  off_rtg  fta_per_fga_pct  fg3a_per_fga_pct  ts_pct  trb_pct  ast_pct  stl_pct  blk_pct  efg_pct  tov_pct  orb_pct  ft_rate  opp_pace  opp_off_rtg  opp_fta_per_fga_pct  opp_fg3a_per_fga_pct  opp_ts_pct  opp_trb_pct  opp_ast_pct  opp_stl_pct  opp_blk_pct  opp_efg_pct  opp_tov_pct  opp_orb_pct  opp_ft_rate
0            Abilene Christian               /cbb/schools/abilene-christian/men/2024.html    False  33    16      17         0.485  -3.47  -1.22       10.0         10.0          8            6             5               9  2393     2414  71.2    100.7            0.386             0.263   0.523     47.7     50.2     10.9      5.5    0.479     15.5     26.7    0.281      71.2        101.6                0.405                 0.317       0.545     

In [7]:
def getTeamScheduleCSV(link):

  name = link
  name  = name.replace("/", "|")
  year = link.split("/")[5][0:4]
  team = pd.read_csv(filepath_or_buffer=str(year) + "/" + name + ".csv", index_col=0)
  return team


In [8]:
def saveTeamSchedule(link, df):
  
  name = link
  name  = name.replace("/", "|")
  year = link.split("/")[5][0:4]
  try: 
    os.mkdir(str(year))
  except FileExistsError:
    pass
  df.to_csv(path_or_buf=str(year) + "/" + name + ".csv")
  


In [9]:
def getTeamSchedule(link):

  try:
    return getTeamScheduleCSV(link)
  except FileNotFoundError:
    pass

  scheduleLink = "https://www.sports-reference.com" + link[0:-5] + "-schedule.html"
  response = requests.get(scheduleLink)
  time.sleep(2.1)
  soup = bs(response.text, "html.parser")
  bsProduct = soup.find_all("table", attrs={"class": "sortable stats_table"})
  bsTable = bsProduct[1]
  bsTableRows = bsTable.find_all("tr")

  games = []
  for row in bsTableRows:

    if row.find("td", attrs={"class": "left"}):
        games.append(row)
        
  gameStats = []
  for game in games:
    
    dataDict = {}
    for j in game.find_all(["th", "td"]):
      
      attrName = j.get("data-stat")
      attrValue = j.contents

      if attrName == 'date_game' and j.find("a"):
        dataDict["boxScorelink"] = attrValue[0].get("href")
        dataDict["date"] = attrValue[0].contents[0]
      elif attrName == 'date_game':
        dataDict["date"] = attrValue
      elif attrName == 'opp_name' and j.find("a"):
        dataDict["oppLink"] = attrValue[0].get("href")
        dataDict["opp"] = attrValue[0].contents[0]
      elif attrName == 'opp_name':
        dataDict["opp"] = attrValue[0]
      elif attrName == 'conf_abbr' and j.find("a"):
        dataDict["confLink"] = attrValue[0].get("href")
        dataDict["conf"] = attrValue[0].contents[0]
      elif attrName == 'conf_abbr':
        dataDict["conf"] = numpy.nan
      elif attrName == 'g':
        pass
      elif len(attrValue) > 0:
        dataDict[attrName] = attrValue[0]

    dataDict["homeLink"] = link
    gameStats.append(dataDict)
    
  gameDf = pd.DataFrame(gameStats)
  saveTeamSchedule(link, gameDf)
  return gameDf


In [10]:
def getTourneyScheduleCSV(year):

  schedule = pd.read_csv(str(year) + "/SeasonSchedule.csv", index_col=0)
  return schedule

def saveTourneySchedule(year, df):
  try: 
    os.mkdir(str(year))
  except FileExistsError:
    pass
  df.to_csv(str(year) + "/SeasonSchedule.csv")
  


In [11]:
def getTourneySchedule(year):

  #try:
  #  return getTourneyScheduleCSV(year)
  #except FileNotFoundError:
  #  pass

  teams = getSeasonStats(year)
  teamsTourney = teams[teams["Tourney"] == True]
  teamsTourney = teamsTourney.reset_index()
  ncaaMatches = []
  for index, team in teamsTourney.iterrows():
    #print(team["school_name"])
    name = team["link"]
    name  = name.replace("/", "|")
    teamTourneyMatches = getTeamSchedule(team["link"])
    teamTourneyMatches = teamTourneyMatches[teamTourneyMatches["game_type"] == "NCAA"]
    ncaaMatches.append(teamTourneyMatches)

  ncaaMatches = pd.concat(ncaaMatches)
  ncaaMatches = ncaaMatches.drop_duplicates(subset='boxScorelink', keep="first")
  ncaaMatches = ncaaMatches.reset_index()
  ncaaMatches = ncaaMatches.drop(axis=1, columns="index")

  saveTourneySchedule(year, ncaaMatches)

  return ncaaMatches

print(getTourneySchedule(2018))

                                         boxScorelink               date  \
0     /cbb/boxscores/2018-03-15-21-virginia-tech.html  Thu, Mar 15, 2018   
1         /cbb/boxscores/2018-03-17-12-villanova.html  Sat, Mar 17, 2018   
2           /cbb/boxscores/2018-03-15-21-arizona.html  Thu, Mar 15, 2018   
3     /cbb/boxscores/2018-03-14-21-arizona-state.html  Wed, Mar 14, 2018   
4          /cbb/boxscores/2018-03-16-15-arkansas.html  Fri, Mar 16, 2018   
..                                                ...                ...   
62  /cbb/boxscores/2018-03-16-21-texas-christian.html  Fri, Mar 16, 2018   
63        /cbb/boxscores/2018-03-15-12-tennessee.html  Thu, Mar 15, 2018   
64           /cbb/boxscores/2018-03-16-19-xavier.html  Fri, Mar 16, 2018   
65        /cbb/boxscores/2018-03-25-14-villanova.html  Sun, Mar 25, 2018   
66        /cbb/boxscores/2018-03-23-19-villanova.html  Fri, Mar 23, 2018   

   time_game game_type game_location  \
0      9:20p      NCAA             N   
1     1

In [12]:
def getTeamData(link):

  year = link.split("/")[5][0:4]
  seasonStats = getSeasonStats(year)
  team = seasonStats[seasonStats["link"]==link].iloc[0]
  return pd.concat([team.iloc[7:9],team.iloc[17:]])


#/cbb/schools/villanova/men/2018.html  
#getTeamData("/cbb/schools/villanova/men/2018.html")

In [13]:
def getFinalTableCSV(year):

  df = pd.read_csv(filepath_or_buffer=str(year) + "/FinalTable.csv", index_col=0)
  return df

def saveFinalTable(year, df):

  try: 
    os.mkdir(str(year))
  except FileExistsError:
    pass
  df.to_csv(str(year) + "/FinalTable.csv")
    

In [14]:
def getFinalTable(year):

  try:
    #return getFinalTableCSV(year)
    pass
  except FileNotFoundError:
    pass

  schedule = getTourneySchedule(year)
  
  schedule = schedule.drop(columns=["boxScorelink",
                                    "date",
                                    "game_type",
                                    "game_location",
                                    "opp",
                                    "confLink",
                                    "conf",
                                    "srs",
                                    "wins",
                                    "losses",
                                    "game_streak",
                                    "arena",
                                    "overtimes",
                                    "game_result"])
  if "time_game" in schedule.columns:
    schedule = schedule.drop(columns=["time_game",])

  finalTable = []

  for index, row in schedule.iterrows():

    team1 = getTeamData(row["homeLink"])
    team2 = getTeamData(row["oppLink"])
    team1 = team1.add_prefix("team1_")
    team2 = team2.add_prefix("team2_")
    teamsStats = pd.concat([row, team1, team2])
    finalTable.append(teamsStats)

  finalTable = pd.DataFrame(finalTable)
  finalTable["pts_diff"] = finalTable["pts"] - schedule["opp_pts"]
  saveFinalTable(year, finalTable)
  return finalTable

print(getFinalTable(2019).to_string())



    pts  opp_pts                                           homeLink                                           oppLink  team1_srs  team1_sos  team1_pace  team1_off_rtg  team1_fta_per_fga_pct  team1_fg3a_per_fga_pct  team1_ts_pct  team1_trb_pct  team1_ast_pct  team1_stl_pct  team1_blk_pct  team1_efg_pct  team1_tov_pct  team1_orb_pct  team1_ft_rate  team1_opp_pace  team1_opp_off_rtg  team1_opp_fta_per_fga_pct  team1_opp_fg3a_per_fga_pct  team1_opp_ts_pct  team1_opp_trb_pct  team1_opp_ast_pct  team1_opp_stl_pct  team1_opp_blk_pct  team1_opp_efg_pct  team1_opp_tov_pct  team1_opp_orb_pct  team1_opp_ft_rate  team2_srs  team2_sos  team2_pace  team2_off_rtg  team2_fta_per_fga_pct  team2_fg3a_per_fga_pct  team2_ts_pct  team2_trb_pct  team2_ast_pct  team2_stl_pct  team2_blk_pct  team2_efg_pct  team2_tov_pct  team2_orb_pct  team2_ft_rate  team2_opp_pace  team2_opp_off_rtg  team2_opp_fta_per_fga_pct  team2_opp_fg3a_per_fga_pct  team2_opp_ts_pct  team2_opp_trb_pct  team2_opp_ast_pct  team2_opp_stl_p

In [15]:
#compile final table with all years
def getFinalTableAllYears():
  finalTable = []

  for i in range(2010, 2024):

    if i != 2020:
      finalTable.append(getFinalTable(i))

  df = pd.concat(finalTable)
  df = df.reset_index()
  df = df.drop(axis=1, columns="index")

  df = df.drop(columns= ["opp_pts", "pts", "oppLink", "homeLink"])
  return df

df = getFinalTableAllYears()
df.to_csv("FinalTable.csv")
print(df.to_string())

     team1_srs  team1_sos  team1_pace  team1_off_rtg  team1_fta_per_fga_pct  team1_fg3a_per_fga_pct  team1_ts_pct  team1_trb_pct  team1_ast_pct  team1_stl_pct  team1_blk_pct  team1_efg_pct  team1_tov_pct  team1_orb_pct  team1_ft_rate  team1_opp_pace  team1_opp_off_rtg  team1_opp_fta_per_fga_pct  team1_opp_fg3a_per_fga_pct  team1_opp_ts_pct  team1_opp_trb_pct  team1_opp_ast_pct  team1_opp_stl_pct  team1_opp_blk_pct  team1_opp_efg_pct  team1_opp_tov_pct  team1_opp_orb_pct  team1_opp_ft_rate  team2_srs  team2_sos  team2_pace  team2_off_rtg  team2_fta_per_fga_pct  team2_fg3a_per_fga_pct  team2_ts_pct  team2_trb_pct  team2_ast_pct  team2_stl_pct  team2_blk_pct  team2_efg_pct  team2_tov_pct  team2_orb_pct  team2_ft_rate  team2_opp_pace  team2_opp_off_rtg  team2_opp_fta_per_fga_pct  team2_opp_fg3a_per_fga_pct  team2_opp_ts_pct  team2_opp_trb_pct  team2_opp_ast_pct  team2_opp_stl_pct  team2_opp_blk_pct  team2_opp_efg_pct  team2_opp_tov_pct  team2_opp_orb_pct  team2_opp_ft_rate  pts_diff
0     

In [16]:
df

Unnamed: 0,team1_srs,team1_sos,team1_pace,team1_off_rtg,team1_fta_per_fga_pct,team1_fg3a_per_fga_pct,team1_ts_pct,team1_trb_pct,team1_ast_pct,team1_stl_pct,...,team2_opp_ts_pct,team2_opp_trb_pct,team2_opp_ast_pct,team2_opp_stl_pct,team2_opp_blk_pct,team2_opp_efg_pct,team2_opp_tov_pct,team2_opp_orb_pct,team2_opp_ft_rate,pts_diff
0,-8.29,-7.68,67.4,93.1,0.484,0.262,0.495,54.2,58.1,9.8,...,0.489,49.0,56.4,9.3,8.5,0.449,19.7,30.2,0.260,17.0
1,-8.29,-7.68,67.4,93.1,0.484,0.262,0.495,54.2,58.1,9.8,...,0.477,45.6,49.0,7.9,9.7,0.437,18.2,32.1,0.234,-29.0
2,17.97,8.03,67.2,113.7,0.358,0.325,0.580,54.4,51.2,9.7,...,0.523,47.0,68.9,9.0,7.9,0.486,18.9,29.0,0.274,9.0
3,17.97,8.03,67.2,113.7,0.358,0.325,0.580,54.4,51.2,9.7,...,0.486,44.2,53.5,10.2,8.4,0.455,19.3,30.2,0.198,8.0
4,17.97,8.03,67.2,113.7,0.358,0.325,0.580,54.4,51.2,9.7,...,0.489,46.8,38.8,8.4,6.3,0.459,13.8,30.9,0.199,23.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
863,10.59,5.20,68.1,110.4,0.329,0.434,0.562,51.1,54.4,7.6,...,0.532,46.1,53.6,9.7,9.2,0.503,14.4,25.0,0.194,-11.0
864,17.41,7.18,63.8,109.6,0.320,0.369,0.550,55.3,49.1,10.6,...,0.504,49.9,51.3,9.6,9.9,0.469,20.8,30.0,0.219,12.0
865,-7.40,-6.81,73.8,103.2,0.378,0.399,0.540,49.2,51.9,9.3,...,0.548,47.4,57.7,8.9,8.4,0.520,18.5,27.8,0.244,-4.0
866,20.56,10.43,70.1,110.1,0.305,0.345,0.565,49.8,56.7,11.1,...,0.532,46.1,53.6,9.7,9.2,0.503,14.4,25.0,0.194,12.0


In [65]:
df = pd.read_csv("FinalTable.csv", index_col=0)
col = df.columns
trainCol = col[0:-1]

colMax = {}
colMin = {}
def normalise(series):
    return (series - series.min())/(series.max() - series.min())

for column in trainCol:
    colMax[column] = df[column].max()
    colMin[column] = df[column].min()
    df[column] = normalise(df[column])

#print(df.to_string())
'''data = {
    "training data": np.array(df[trainCol]),
    "labels": np.array(df[col[-1]])
}'''
dropList = []
for index, row in df.iterrows():

    for col in df.columns:

        if not np.isfinite(row[col]):
            dropList.append(index)

df = df.drop(axis=0, index=dropList)

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
print(X[0][31] == 0)
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,
                                               random_state=42)



tf.random.set_seed(43)
# Standardize features
model=keras.Sequential([
    keras.layers.Dense(100),
    keras.layers.Dense(200),
    keras.layers.Dense(210),
    keras.layers.Dense(1)
])

opt = keras.optimizers.Adam(learning_rate=0.01)

loss_fn = keras.losses.MeanSquaredError(reduction="sum")
model.compile(optimizer=opt,
              loss=loss_fn,
              metrics=['accuracy'])

model.fit(X_train,y_train,epochs=20)#check the results with insurnce model test data
model.evaluate(X_test, y_test, verbose=2)

predictions = model(X_test[:10]).numpy()
print(predictions)
print(y_test[:10])


True
Epoch 1/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.0090 - loss: 7699.1816       
Epoch 2/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 998us/step - accuracy: 0.0088 - loss: 5516.2144   
Epoch 3/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0069 - loss: 4556.9531     
Epoch 4/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0086 - loss: 4021.5532     
Epoch 5/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0080 - loss: 3818.2510     
Epoch 6/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0085 - loss: 3821.4685     
Epoch 7/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0072 - loss: 3813.6367     
Epoch 8/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0054 - loss: 

In [53]:
correct = 0
total = 0
for i in range(0, len(X_test)):
    pred = numpy.sign(model(np.array([X_test[i]])).numpy())[0]
    print(str(model(np.array([X_test[i]])).numpy()[0][0]) + " --- " + str(y_test[i]))
    if pred == numpy.sign([y_test[i]])[0]:
        
        correct += 1

    total += 1

print(correct/total)
print(total)

-4.2309246 --- -8.0
-27.981812 --- -36.0
-0.15205717 --- 1.0
-5.944214 --- -32.0
-15.072867 --- -7.0
2.9125197 --- 17.0
2.6061807 --- 6.0
5.1427636 --- 14.0
6.6380253 --- -7.0
2.4490345 --- -9.0
10.3095875 --- 2.0
-4.860182 --- -4.0
0.28997776 --- 1.0
4.58279 --- -4.0
23.287489 --- 13.0
-7.105894 --- -6.0
-0.8654245 --- 1.0
-2.507753 --- -22.0
8.375305 --- 20.0
12.857143 --- 19.0
19.52808 --- 8.0
-6.166216 --- 2.0
6.5859504 --- 3.0
0.023679381 --- 7.0
12.168804 --- 2.0
5.574097 --- 16.0
-1.0692512 --- -4.0
-10.762886 --- -17.0
-4.1977353 --- 3.0
-1.035471 --- 11.0
11.492343 --- 10.0
-19.868158 --- -20.0
-8.8652725 --- 2.0
0.23588693 --- -7.0
-4.7815523 --- -14.0
26.327362 --- 12.0
-3.4986513 --- 10.0
10.978747 --- 16.0
0.13791233 --- -29.0
6.347298 --- 7.0
21.62588 --- 22.0
2.730916 --- 25.0
5.230283 --- -11.0
3.7518384 --- -27.0
11.225267 --- 3.0
-2.1199646 --- -12.0
-18.922447 --- -20.0
-2.966844 --- -17.0
6.589069 --- 14.0
2.1953611 --- 5.0
-4.7411275 --- -4.0
-8.352843 --- -17.0
5.

In [19]:
teamIDs = pd.read_csv("data/MTeams.csv", index_col=0)
teamIDs
seeds = pd.read_csv("data/MNCAATourneySeeds.csv" , index_col=0)
seeds = seeds.iloc[-68:]
name = []
for index, row in seeds.iterrows():
  name.append(teamIDs.loc[row["TeamID"]]["TeamName"])

seeds["TeamName"] = name
seeds.to_csv("TeamIDs.csv")

In [96]:
slots = pd.read_csv("data/MNCAATourneySlots.csv")
Teams = pd.read_csv("TeamIDs.csv")
slots = slots[slots["Season"] == 2024]
slots = slots.iloc[:-4]
slots["Winner"] = np.NAN
Teams = Teams.set_index("Seed")
slots = slots.set_index("Slot")
stats = pd.read_csv("2024/SeasonStats.csv", index_col =0)
stats = stats.set_index("link")
print(slots)

for i in range(0,8):

  for index, row in slots.iterrows():
    #print(row["Winner"])
    if pd.isnull(row["Winner"]):

      if len(row["StrongSeed"]) == 3 and len(index) == 4:
        f = row["StrongSeed"]
        team1 = Teams.loc[row["StrongSeed"]]
        team2 = Teams.loc[row["WeakSeed"]]
        team1 = getTeamData(team1["Link"])
        team2 = getTeamData(team2["Link"])
        teams = []
        if random.randint(0,1) == 0:
          team1 = team1.add_prefix("team1_")
          team2 = team2.add_prefix("team2_")
          teams = [row["StrongSeed"], row["WeakSeed"]]
          comb = pd.concat([team1,  team2])
        else:
          team1 = team1.add_prefix("team2_")
          team2 = team2.add_prefix("team1_")
          teams = [row["WeakSeed"], row["StrongSeed"]]
          comb = pd.concat([team2,  team1])

        for column in trainCol:
          comb[column] = (comb[column] - colMin[column])/(colMax[column] - colMin[column])
        
        result = (model(np.array([comb])).numpy())[0][0]
        if result >= 0:
          slots.at[index, "Winner"] = teams[0]
        else:
          slots.at[index, "Winner"] = teams[1]

      elif not (pd.isnull(slots.loc[row["StrongSeed"]]["Winner"]) or pd.isnull(slots.loc[row["WeakSeed"]]["Winner"])):

        team1 = Teams.loc[slots.loc[row["StrongSeed"]]["Winner"]]
        team2 = Teams.loc[slots.loc[row["WeakSeed"]]["Winner"]]
        team1 = getTeamData(team1["Link"])
        team2 = getTeamData(team2["Link"])
        teams = []
        if random.randint(0,1) == 0:
          team1 = team1.add_prefix("team1_")
          team2 = team2.add_prefix("team2_")
          teams = [slots.loc[row["StrongSeed"]]["Winner"], slots.loc[row["WeakSeed"]]["Winner"]]
          comb = pd.concat([team1,  team2])
        else:
          team1 = team1.add_prefix("team2_")
          team2 = team2.add_prefix("team1_")
          teams = [slots.loc[row["WeakSeed"]]["Winner"], slots.loc[row["StrongSeed"]]["Winner"]]
          comb = pd.concat([team2,  team1])

        for column in trainCol:
          comb[column] = (comb[column] - colMin[column])/(colMax[column] - colMin[column])
        
        result = (model(np.array([comb])).numpy())[0][0]
        if result >= 0:
          slots.at[index, "Winner"] = teams[0]
        else:
          slots.at[index, "Winner"] = teams[1]


print(slots.to_string)
slots.to_csv("submission.csv")
      
    

      Season StrongSeed WeakSeed  Winner
Slot                                    
R1W1    2024        W01      W16     NaN
R1W2    2024        W02      W15     NaN
R1W3    2024        W03      W14     NaN
R1W4    2024        W04      W13     NaN
R1W5    2024        W05      W12     NaN
...      ...        ...      ...     ...
R4Y1    2024       R3Y1     R3Y2     NaN
R4Z1    2024       R3Z1     R3Z2     NaN
R5WX    2024       R4W1     R4X1     NaN
R5YZ    2024       R4Y1     R4Z1     NaN
R6CH    2024       R5WX     R5YZ     NaN

[63 rows x 4 columns]


  slots.at[index, "Winner"] = teams[0]


<bound method DataFrame.to_string of       Season StrongSeed WeakSeed Winner
Slot                                   
R1W1    2024        W01      W16    W01
R1W2    2024        W02      W15    W02
R1W3    2024        W03      W14    W03
R1W4    2024        W04      W13    W04
R1W5    2024        W05      W12    W05
...      ...        ...      ...    ...
R4Y1    2024       R3Y1     R3Y2    Y02
R4Z1    2024       R3Z1     R3Z2    Z01
R5WX    2024       R4W1     R4X1    W01
R5YZ    2024       R4Y1     R4Z1    Z01
R6CH    2024       R5WX     R5YZ    Z01

[63 rows x 4 columns]>
