<a href="https://www.kaggle.com/code/miltiadesgeneral/marchmadness-xgboostkfolds?scriptVersionId=124989783" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import glob
import pandas as pd
import numpy as np
import math
import random
import matplotlib.pyplot as plt
import re

from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.linear_model import LinearRegression, ElasticNet
import seaborn as sns

%matplotlib inline

In [None]:
class Config:
    paths = glob.glob("/kaggle/input/march-machine-learning-mania-2023/*.csv")
    paths_dct = {}
    for path in paths:
        paths_dct[path.split("/")[-1][:-4]] = path
    
    paths_keys = list(paths_dct.keys())

In [None]:
paths = Config.paths_dct

## Constructing the Dataframe
- Get the team seeds
- Get the team rankings
- Engineer regular season results for a team during a season
- Engineer tourney results for a team during a season

### Include Features
- W/L percentage
- Point Differential
- Tournament Seed

### Team Seeds

In [None]:
Mseeds = Config.paths_dct["MNCAATourneySeeds"]
Wseeds = Config.paths_dct["WNCAATourneySeeds"]

seeds_df = pd.concat([
        pd.read_csv(Mseeds),
        pd.read_csv(Wseeds)
], ignore_index=True)

seeds_df.head()

### Regular Season Results

In [None]:
# Create a dataframe of all the games from both men and women, regular season and tourneys
MRegularSeason = Config.paths_dct["MRegularSeasonDetailedResults"]
WRegularSeason = Config.paths_dct["WRegularSeasonDetailedResults"]
regular_season_df = pd.concat([
    pd.read_csv(MRegularSeason),
    pd.read_csv(WRegularSeason)
], ignore_index=True).drop(["WLoc"], axis=1)

regular_season_df["Tourney"] = 0

# Create tournament df for testing
MTourney = Config.paths_dct["MNCAATourneyDetailedResults"]
WTourney = Config.paths_dct["WNCAATourneyDetailedResults"]
tourney_df = pd.concat([
    pd.read_csv(MTourney),
    pd.read_csv(WTourney)
], ignore_index=True).drop(["WLoc", "NumOT"], axis=1)

# tourney_df = tourney_df[tourney_df["Season"] < 2017]

tourney_df["Tourney"] = 1

regular_season_df = pd.concat([
    regular_season_df,
    tourney_df
], ignore_index=True)

### Flatten the Dataframe so each team has its own row 

In [None]:
# Separate and reconnect the winning and losing teams
rename_winner_cols = {col: col[1:] for col in regular_season_df.columns if col[0] == "W"}
rename_loser_cols = {col: col[1:] for col in regular_season_df.columns if col[0] == "L"}

teams_df = pd.concat([
    pd.read_csv(paths["MTeams"]),
    pd.read_csv(paths["WTeams"])
], ignore_index=True).drop(["FirstD1Season", "LastD1Season"], axis=1)


_winning_df = regular_season_df.copy()
_winning_df["PtDiff"] = _winning_df["WScore"] - _winning_df["LScore"]
_winning_df["PtsAllowed"] = _winning_df["LScore"]
_winning_df["FGM/FGA_Allowed"] = _winning_df["LFGM"] / _winning_df["LFGA"]
_winning_df["Win"] = 1
_winning_df = _winning_df.rename(rename_winner_cols, axis=1).drop(rename_loser_cols.keys(), axis=1)

winning_teams_df = pd.merge(
    left=teams_df,
    right=_winning_df,
    on="TeamID"
)

_losing_df = regular_season_df.copy()
_losing_df["PtDiff"] = _losing_df["LScore"] - _losing_df["WScore"]
_losing_df["PtsAllowed"] = _losing_df["WScore"]
_losing_df["FGM/FGA_Allowed"] = _losing_df["LFGM"] / _losing_df["LFGA"]
_losing_df["Win"] = 0
_losing_df = _losing_df.rename(rename_loser_cols, axis=1).drop(rename_winner_cols.keys(), axis=1)

losing_teams_df = pd.merge(
    left=teams_df,
    right=_losing_df,
    on="TeamID"
)

teams_df = pd.concat([
    winning_teams_df,
    losing_teams_df
], ignore_index=True).sort_values(["Season", "TeamID", "DayNum"]).reset_index(drop=True)

### Calculate cumulative season statistics for each team going into each game

In [None]:
# Save win, tourney and score column to reattach
win_col = teams_df["Win"]
score_col = teams_df["Score"]
tourney_col = teams_df["Tourney"]

cumulative_cols = ['Win', 'Score', 'PtDiff', 'FGM', 'FGA', 'FGM3', 'FGA3', 'FTM', 'FTA', 'OR', 'DR',                    
                   'Ast', 'TO', 'Stl', 'Blk', 'PF', 'PtsAllowed', 'FGM/FGA_Allowed']

cum_sum = teams_df.groupby(["Season", "TeamID"])[cumulative_cols].cumsum()
teams_df = teams_df[["TeamID", "TeamName", "Season", "DayNum"]]
teams_df[cumulative_cols] = cum_sum
teams_df = teams_df.rename({ "Win": "CumWins", "Score": "CumScore" }, axis=1)

# reattach cols
teams_df["Win"] = win_col
teams_df["FinalScore"] = score_col
teams_df["Tourney"] = tourney_col

count = teams_df.groupby(["Season", "TeamID"]).cumcount() + 1
teams_df["count"] = count

def calculatePerGameStatistics(df, category):
    df[f"{category}/g"] = df[category] / df["count"]
    if category == "CumScore":
        df.rename({"CumScore/g": "Pts/g"}, axis=1, inplace=True)
    if category == "CumWins":
        df.rename({"CumWins/g": "WPct"}, axis=1, inplace=True)

# Create per game statistics for the following categories
for category in ["CumWins", "PtDiff", "PtsAllowed", "CumScore", "OR", "DR", "TO", "Stl", "Blk", "Ast", "PF", "FGM/FGA_Allowed"]:
    calculatePerGameStatistics(teams_df, category)


def calculatePercentageStatistics(df, cat1, cat2):
    df[f"{cat1}/{cat2}"] = df[cat1] / df[cat2]
    df = df.drop([cat1, cat2], axis=1)

# Create percentage statitistics for the following categories
for cat1, cat2 in [("FGM", "FGA"), ("FGM3", "FGA3"), ("FTM", "FTA")]:
    calculatePercentageStatistics(teams_df, cat1, cat2)

def treatSeed(seed):
    return int(re.sub('[^0-9]', "", seed))

teams_df = pd.merge(
    teams_df,
    seeds_df,
    how='left',
    left_on=["Season", "TeamID"],
    right_on=["Season", "TeamID"]
)

teams_df["Seed"] = teams_df["Seed"].fillna('X17')
teams_df["Seed"] = teams_df["Seed"].apply(treatSeed)
teams_df = teams_df.drop_duplicates()

In [None]:
# Save the season averages for each team
# seeds = teams_df["Seed"]
# cumulative_cols = ['Win', "WPct", "PtDiff/g", "Pts/g", 'PtDiff', 'FGM/FGA', 'FGM3/FGA3', 'FTM/FTA', 'OR', 'DR', 
#                    'Ast', 'TO', 'Stl', 'Blk', 'PF', 'PtAllowed/g', 'FGM/FGA_Allowed/g']
# season_avg = teams_df.groupby(["Season", "TeamID"])[cumulative_cols].mean()
# season_avg = season_avg.reset_index()
# season_avg["Seed"] = seeds

In [None]:
# Shift the day by one up so that each statistic corresponds to the statistic going into that game for a given team
shifted_day = teams_df.groupby(["Season", "TeamID"])["DayNum"].shift(-1)
teams_df.loc[:, "DayNum"] = shifted_day
teams_df = teams_df.dropna()
teams_df.loc[:, "DayNum"] = teams_df["DayNum"].astype("int")

In [None]:
# Check which categories have the highest correlation with wins
corr_cols = ['CumWins', 'CumScore', 'PtDiff',
       'FGM/FGA', 'FGM3/FGA3', 'FTM/FTA', 'OR', 'DR', 'Ast', 'TO',
       'Stl', 'Blk', 'PF', 'Win', 'FinalScore', 'count', 'WPct',
       'PtDiff/g', 'Pts/g']

teams_df[corr_cols].corr()["Win"].sort_values(ascending=False)

In [None]:
def findComplimentaryIndices(arr, end, start=0):
    complimentary_values = set(range(start, end+1)) - set(arr)
    return list(complimentary_values)

def swapIDs(df, col1, col2, col3, col4):
    # Create a random array of indices to take from the winner column
    _rand_range = list(range(len(df) - 1))
    rand_index = []
    for i in range(len(_rand_range) // 2):
        _choice = random.choice(_rand_range)
        rand_index.append(_choice)
        _rand_range.remove(_choice)
    
    comp_index = findComplimentaryIndices(rand_index, end=len(df) - 1)
    
    _winners_toA = df.loc[rand_index, col1].sort_index()
    _losers_toA = df.loc[comp_index, col2].sort_index()
    
    _winners_toB = df.loc[comp_index, col1].sort_index()
    _losers_toB = df.loc[rand_index, col2].sort_index()
    # -----------------------------------------------------
    _winners_2toA = df.loc[rand_index, col3].sort_index()
    _losers_2toA = df.loc[comp_index, col4].sort_index()
    
    _winners_2toB = df.loc[comp_index, col3].sort_index()
    _losers_2toB = df.loc[rand_index, col4].sort_index()
    
    _a_col = pd.concat([
        _winners_toA,
        _losers_toA
    ]).sort_index()
    
    _b_col = pd.concat([
        _winners_toB,
        _losers_toB
    ]).sort_index()
    
    _a_2col = pd.concat([
        _winners_2toA,
        _losers_2toA
    ]).sort_index()
    
    _b_2col = pd.concat([
        _winners_2toB,
        _losers_2toB
    ]).sort_index()
    
        
    df[f"A{col1[1:]}"] = _a_col
    df[f"B{col2[1:]}"] = _b_col
    df[f"A{col3[1:]}"] = _a_2col
    df[f"B{col4[1:]}"] = _b_2col

    df.drop([col1, col2, col3, col4], axis=1, inplace=True)

### Used on the first run to build the full_games with randomized A and B teams
* After the first run, import the .csv file to skip this time consuming process

In [None]:
# full_games_df = pd.concat([
#     pd.read_csv(Config.paths_dct["MRegularSeasonCompactResults"]),
#     pd.read_csv(Config.paths_dct["WRegularSeasonCompactResults"]),
#     pd.read_csv(Config.paths_dct["MNCAATourneyCompactResults"]),
#     pd.read_csv(Config.paths_dct["WNCAATourneyCompactResults"])
# ], ignore_index=True)

# full_games_df = full_games_df.drop(["WLoc", "NumOT"], axis=1)

In [None]:
# swapIDs(full_games_df, "WTeamID", "LTeamID", "WScore", "LScore")
# full_games_df.to_csv("full_games_df.csv", index=False)

In [None]:
full_games_df = pd.read_csv("/kaggle/input/full-games-with-score-helper-file-mm-2023-3-15-23/full_games_df (1).csv")
full_games_df = full_games_df[full_games_df["Season"] >= 2003].reset_index(drop=True)

In [None]:
rename_a_map = { col: f"A{col}" for col in teams_df.columns if col not in ["Season", "DayNum", "Score"] }
rename_b_map = { col: f"B{col}" for col in teams_df.columns if col not in ["Season", "DayNum", "Score"] }

rename_a_map["Score"] = "ACumPts"
rename_b_map["Score"] = "BCumPts"

train_df = pd.merge(
    left=full_games_df,
    right=teams_df,
    how="left",
    left_on=["ATeamID", "Season", "DayNum"], 
    right_on=["TeamID", "Season", "DayNum"]
).drop(["count", "TeamID", "TeamName"], axis=1).rename(rename_a_map, axis=1)

train_df = pd.merge(
    left=train_df,
    right=teams_df,
    how="left",
    left_on=["BTeamID", "Season", "DayNum"],
    right_on=["TeamID", "Season", "DayNum"]
).drop(["count", "TeamID", "TeamName"], axis=1).rename(rename_b_map, axis=1)

train_df = train_df.dropna()

In [None]:
# Create a win column based on the A and B scores
train_df["Win"] = train_df.apply(lambda row: 1 if row["AScore"] > row["BScore"] else 0, axis=1)
# 
train_df["ScoreDiff"] = train_df["AScore"] - train_df["BScore"]

In [None]:
def findDiffCols(df, a_cols, b_cols):
    """
    Creates a dataframe with columns of difference from input columns
    Columns X, Y becomes one column X - Y
    """
    df = df.copy()
    features = []
    for i, colA in enumerate(a_cols):
        new_col = f'{colA[1:]}-diff'
        features.append(new_col)
        df[new_col] = df[colA] - df[b_cols[i]]
        df = df.drop([colA, b_cols[i]], axis=1)
    return df, features

In [None]:
train_df["ASeed"] = train_df["ASeed"].astype(int)
train_df["BSeed"] = train_df["BSeed"].astype(int)

In [None]:
features_to_change = ['AWPct', 'APtDiff/g', 'APts/g', 'ACumScore',
       'AFGM/FGA', 'AFGM3/FGA3', 'AFTM/FTA', 'AOR/g', 'ADR/g', 'AAst/g',
       'ATO/g', 'AStl/g', 'ABlk/g', 'APF/g', 'APtsAllowed/g', 'AFGM/FGA_Allowed/g', 'ASeed', 'BWPct', 'BPtDiff/g', 'BPts/g',
       'BCumScore', 'BFGM/FGA', 'BFGM3/FGA3', 'BFTM/FTA', 'BOR/g', 'BDR/g', 'BAst/g', 'BTO/g',
       'BStl/g', 'BBlk/g', 'BPF/g', 'BPtsAllowed/g', 'BFGM/FGA_Allowed/g', 'BSeed']

In [None]:
# Apply the findDiffCols function to the train_df separating the A columns from the B columns
df, features = findDiffCols(train_df, features_to_change[:(len(features_to_change)//2)], features_to_change[(len(features_to_change)//2):])

### Build and fit the machine learning model

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import xgboost as xgb

In [None]:
df[["ATourney", "BTourney"]] = df[["ATourney", "BTourney"]].astype(int)
test_df = df[(df["Season"] >= 2017) & (df["ATourney"] == 1)].reset_index(drop=True)
test_df

In [None]:
df = df[(train_df["Season"] < 2017) | ((df["ATourney"] == 0))].reset_index(drop=True)
df

In [None]:
scaler = StandardScaler()
X_train, y_train, X_test, y_test = df[features].values, df["Win"], test_df[features], test_df["Win"]
X_train, X_test = scaler.fit_transform(X_train), scaler.fit_transform(X_test)

In [None]:
# #     for season in seasons[1:]:
#     for fold in range(folds):
# #         print(f'\n Validation on season: {season}')
#         print(f'\n Validation on fold: {fold + 1}')
        
        
#         # For random folds
#         val_size = round(len(df) * 0.2)
#         val_indices = random.choices(range(len(df)), k=val_size)
#         train_indices = list(set(range(0, len(df))) - set(val_indices))
#         df_train = df.iloc[train_indices]
#         df_val = df.iloc[val_indices]
        
#         # For seasonal folds
# #         df_train = df[df["Season"] != season]
# #         df_val = df[df["Season"] == season]   

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def KFolds(df,  df_test=None, df_submission=None, folds=None, mode='reg'):
    seasons = df['Season'].unique()
    cvs = []
    pred_tests = []
    pred_subs = []
    target="ScoreDiff" if mode == 'reg' else 'Win'
    scaler = StandardScaler()
    fold = 0
    
    minScore = float("inf")
    best_index = 0
    
    maxAccScore = float(0)
    best_acc_index = 0
    
    for fold in range(folds):
        print(f'\n Validation on fold: {fold + 1}')
        
        
        # For random folds
        val_size = round(len(df) * 0.2)
        val_indices = random.choices(range(len(df)), k=val_size)
        train_indices = list(set(range(0, len(df))) - set(val_indices))
        df_train = df.iloc[train_indices]
        df_val = df.iloc[val_indices]     
        
        X_train, X_val = df_train[features], df_val[features]
        y_train, y_val = df_train[target], df_val[target]
        X_train, X_val = scaler.fit_transform(X_train), scaler.fit_transform(X_val)
        
        if mode == 'reg':
            model = xgb.XGBRegressor(n_estimators=10, seed=123)
        else:
            model = xgb.XGBClassifier()
        
        model.fit(X_train, y_train)
        
        if mode == "reg":
                pred = model.predict(df_val[features])
        else:
            pred = model.predict_proba(df_val[features])[:, 1]
        
        if df_test is not None:
            if mode == 'reg':
                pred_test = model.predict(df_test[features])
                pred_test = (pred_test - pred_test.min()) / (pred_test.max() - pred_test.min())
            else:
                pred_test = model.predict(df_test[features])
            pred_tests.append(pred_test)
            
            accuracy = accuracy_score(pred_test, df_test[target])
            maxAccScore = max(maxAccScore, accuracy)
            if df_submission is not None:
                if mode == 'reg':
                    pred_submission = model.predict(df_submission[features])
                    pred_submission = (pred_submission - pred_submission.min()) / (pred_submission.max() - pred_submission.min())
                else:
                    pred_submission = model.predict(df_submission[features])
                pred_subs.append(pred_submission)

            if accuracy == maxAccScore:
                best_acc_index = fold

            print(f'Accuracy: {accuracy}, Best Accuracy: {maxAccScore}, Best Accuracy Fold: {best_acc_index+1}')
            
        
        pred = pred if df_test is not None else 0.5
            
        pred = (pred - pred.min()) / (pred.max() - pred.min())
        pred = np.clip(pred, 0, 1)
        
        score = ((df_val['Win'].values - pred) ** 2).mean()
        cvs.append(score)
        
        if score < minScore:
            best_index = fold
            minScore = score
        print(f'\t Best Fold: {best_index+1}')
        
        print(f'\t -> Scored {score:.3f}')
        fold += 1
    print(f'\n Local CV is {np.mean(cvs):.3f}')
    
    return pred_tests, best_index, pred_subs, best_acc_index
        

### Use for model Evaluation

In [None]:
# predictions, best_index = KFolds(df, test_df, folds=3, mode='lin')

# for i in range(len(predictions)):
#     print("Accuracy:", accuracy_score(y_test, predictions[i]))
#     print("Precision:", precision_score(y_test, predictions[i]))
#     print("Recall:", recall_score(y_test, predictions[i]))
#     print("F1 score:", f1_score(y_test, predictions[i]))
    
# print("Accuracy:", accuracy_score(y_test, predictions[best_index]))
# print("Precision:", precision_score(y_test, predictions[best_index]))
# print("Recall:", recall_score(y_test, predictions[best_index]))
# print("F1 score:", f1_score(y_test, predictions[best_index]))

In [None]:
# importance_scores = model.feature_importances_
# print(importance_scores)

In [None]:
# fig = plt.figure(figsize=(20,5))
# plt.bar(features, importance_scores)
# plt.xticks(rotation=90)
# plt.title("feature importance")

# Apply to testing df
____________________________________________________

In [None]:
team_season_avg = teams_df.groupby(["TeamID", "Season"]).mean().reset_index()
subs_df = pd.read_csv(Config.paths_dct["SampleSubmission2023"])

season = subs_df["ID"].apply(lambda x: x.split("_")[0])
ATeamID = subs_df["ID"].apply(lambda x: x.split("_")[1])
BTeamID = subs_df["ID"].apply(lambda x: x.split("_")[2])

subs_df["Season"] = season.astype("int")
subs_df["ATeamID"] = ATeamID.astype("int")
subs_df["BTeamID"] = BTeamID.astype("int")
subs_df

In [None]:
df_2023 = teams_df[teams_df["Season"] == 2023].reset_index(drop=True)

teams_2023 = df_2023["TeamID"].unique()

most_recent_games_df = pd.DataFrame()
for team in teams_2023:
    most_recent_games_df = pd.concat([
        most_recent_games_df,
        df_2023[df_2023["TeamID"] == team][-1:]
    ], ignore_index=True)

most_recent_games_df

In [None]:
subs_df = pd.merge(
    subs_df,
    most_recent_games_df,
    left_on=["Season", "ATeamID"],
    right_on=["Season", "TeamID"],
    how='left'
).drop(["count", "TeamID", "DayNum"], axis=1).rename(rename_a_map, axis=1)

subs_df = pd.merge(
    subs_df,
    most_recent_games_df,
    left_on=["Season", "BTeamID"],
    right_on=["Season", "TeamID"],
    how='left'
).drop(["count", "TeamID", "DayNum"], axis=1).rename(rename_b_map, axis=1)

In [None]:
subs_df, _ = findDiffCols(subs_df, features_to_change[:(len(features_to_change) //2) ], features_to_change[len(features_to_change)//2:])

In [None]:
predictions, best_index, subs_preds, subs_best_index = KFolds(df, test_df, subs_df, folds=15, mode='lin')

## Prepare for Submission

In [None]:
len(subs_preds[subs_best_index]) == len(subs_df)

In [None]:
submission_df = pd.DataFrame(subs_df["ID"])
submission_df["Pred"] = subs_preds[subs_best_index]

In [None]:
submission_df.loc[submission_df["ID"] == "2023_1116_1228", "Pred"] = 0 # Loss
submission_df.loc[submission_df["ID"] == "2023_1179_1274", "Pred"] = 1 # Win
submission_df.loc[submission_df["ID"] == "2023_1235_1338", "Pred"] = 0 # Loss
submission_df.loc[submission_df["ID"] == "2023_1181_1331", "Pred"] = 0 # Loss Duke vs Oral Roberts

In [None]:
submission_df.to_csv("/kaggle/working/submission.csv", index=False)

### Viewing the Tournament

In [None]:
submission_df = pd.read_csv("/kaggle/input/team-proba-df/submission_proba.csv")
submission_df
tournament_2023_df = submission_df.copy()
tournament_2023_df["Season"] = tournament_2023_df["ID"].apply(lambda x: int(x.split("_")[0]))
tournament_2023_df["ATeam"] = tournament_2023_df["ID"].apply(lambda x: int(x.split("_")[1]))
tournament_2023_df["BTeam"] = tournament_2023_df["ID"].apply(lambda x: int(x.split("_")[2]))
names_df = teams_df[teams_df["Season"] == 2023][["TeamID", "TeamName", "Seed"]]
names_df = names_df.drop_duplicates()
tournament_2023_df = pd.merge(
    left=tournament_2023_df,
    right=names_df,
    how="left",
    left_on="ATeam",
    right_on="TeamID"
).rename({ "TeamName": "ATeamName", "Seed": "ASeed" }, axis=1).drop("TeamID", axis=1)
tournament_2023_df = pd.merge(
    left=tournament_2023_df,
    right=names_df,
    how="left",
    left_on="BTeam",
    right_on="TeamID"
).rename({ "TeamName": "BTeamName", "Seed": "BSeed" }, axis=1).drop("TeamID", axis=1)
def searchDf(df, team1, team2):
    return df[(df["ATeamName"] == team1) & (df["BTeamName"] == team2) | (df["ATeamName"] == team2) & (df["BTeamName"] == team1)]
searchDf(tournament_2023_df, "Missouri", "Utah St")

### Handpicking Upsets blind from Seeds

In [None]:
names = ['Houston', 'N Kentucky', 'Iowa', 'Auburn', 'Miami FL', 'Drake', 'Indiana', 'Kent', 'Iowa St', 'Pittsburgh', 
         'Xavier', 'Kennesaw', 'Texas A&M', 'Penn St', 'Texas', 'Colgate', 'Kansas', 'Howard', 'Arkansas', 'Illinois', 
         "St Mary's CA", 'VCU', 'Connecticut', 'Iona', 'TCU', 'Arizona St', 'Gonzaga', 'Grand Canyon', 'Northwestern', 
         'Boise St', 'UCLA', 'North Carolina', 'UNC Asheville', 'Alabama', 'TAM C. Christi', 'Maryland', 'West Virginia', 'San Diego St', 
         'Col Charleston', 'Virginia', 'Furman', 'Creighton', 'NC State', 'Baylor', 'UC Santa Barbara', 'Missouri', 
         'Utah St', 'Arizona', 'Princeton', 'Purdue', 'F Dickinson', 'Memphis', 'Florida', 'FL Atlantic', 'Duke', 
         'Oral Roberts', 'Tennessee', 'Louisiana', 'Kentucky', 'Providence', 'Kansas St', 'Montana St', 
         'Michigan St', 'USC', 'Marquette', 'Vermont']

# Create a 2-dimensional array of name pairs
name_pairs = []
for i in range(0, len(names), 2):
    name_pairs.append([names[i], names[i+1]])

In [None]:
for team1, team2 in name_pairs:
    display(searchDf(tournament_2023_df, team1, team2))