# Team Classifier
From the observations, I have decided to create a model to classify a team's division rather than an individuals division.

In [161]:
import pandas as pd

In [162]:
df = pd.read_csv("Dataset/ultimate_college_championship.csv")
df.head()

Unnamed: 0,player,level,gender,division,team_name,Turns,Ds,Assists,Points,plus_minus,team_games,turns_per_game,ds_per_game,ast_per_game,pts_per_game,pls_mns_per_game
0,Jacques Nissen,Division 1,Men,Division 1 Men,Brown Brownian Motion,12,8,38,13,47,8,1.5,1.0,4.75,1.625,5.875
1,Cal Nightingale,Division 1,Men,Division 1 Men,Brown Brownian Motion,3,0,12,27,36,8,0.375,0.0,1.5,3.375,4.5
2,Faye Burdick,Division 1,Women,Division 1 Women,Colorado Quandary,6,12,16,13,35,7,0.857143,1.714286,2.285714,1.857143,5.0
3,Matthew Gregor,Division 3,Men,Division 3 Men,Franciscan Fatal,2,6,3,26,33,6,0.333333,1.0,0.5,4.333333,5.5
4,Frankie Saraniti,Division 3,Women,Division 3 Women,Carleton Eclipse,11,15,12,17,33,6,1.833333,2.5,2.0,2.833333,5.5


In [163]:
# Split by gender and get team ids
male_df = df[df['gender'] == 'Men'].drop(columns='team_games')
female_df = df[df['gender'] == 'Women'].drop(columns='team_games')
team_divs = df[['team_name', 'level', 'gender']].drop_duplicates().set_index(keys='team_name')
male_df = male_df.loc[:, "player":"plus_minus"]
female_df = female_df.loc[:, "player":"plus_minus"]

In [164]:
def make_bipolar(cell):
    if cell == 'Division 1':
        return 1
    return -1

In [165]:
def prepare_data(df, div):
    df_mean = df.groupby('team_name').mean(numeric_only=True)
    df_mean.columns = [col + ' mean' for col in df_mean.columns]
    df_stdev = df.groupby('team_name').std(numeric_only=True)
    df_stdev.columns = [col + ' stdev' for col in df_stdev.columns]
    new_df = df_mean.join(df_stdev)
    new_df = new_df.join(div['level'])
    new_df['level'] = new_df['level'].apply(make_bipolar)
    return new_df

In [166]:
# male_teams_mean = male_df.groupby('team_name').mean(numeric_only=True)
# male_teams_mean.columns = [col + ' mean' for col in male_teams_mean.columns]
# male_teams_stdev = male_df.groupby('team_name').std(numeric_only=True)
# male_teams_stdev.columns = [col + ' stdev' for col in male_teams_stdev.columns]
# male_teams_all = male_teams_mean.join(male_teams_stdev)
# male_teams_all = male_teams_all.join(team_divs['level'])
# male_teams_all.head()
male_teams_all = prepare_data(male_df, team_divs)

In [167]:
male_train = male_teams_all[0:28]
male_x_train = male_train.drop(columns='level')
male_y_train = male_train['level']

male_test = male_teams_all[28:]
male_x_test = male_test.drop(columns='level')
male_y_test = male_test['level']

In [168]:
def get_results(prediction, actual):
    data = {
        'Prediction': prediction,
        'Actual': actual
    }
    results = pd.DataFrame(data)
    num_correct = len(results[results['Prediction'] == results['Actual']])
    total = len(results)
    accuracy = round(num_correct/total * 100, 2)
    print(f"Accuracy: {accuracy}%")
    return results

In [169]:
from sklearn import tree

In [170]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(male_x_train, male_y_train)

In [171]:
predictions = clf.predict(male_x_test)
results = get_results(predictions, male_y_test)

Accuracy: 70.0%


# Test with 2025 Data

In [172]:
df_2025 = pd.read_csv("Dataset/2025 Database.csv")
df_2025.head()

Unnamed: 0,No.,Player,PronounsMere Info PronounsInterested in learning more about pronouns? Click here.,Position,Year,Height,Points,Assists,Ds,Turns,team_name,level,gender,division
0,1,Nima Lhamo,,Cutter,College (JR),"6'0""",5.0,2.0,2.0,2.0,Massachusetts,Division 1,Men,Division 1Men
1,4,Ethan Lieman,,,College (FR),"8'2""",16.0,11.0,,4.0,Massachusetts,Division 1,Men,Division 1Men
2,6,Roan Dunkerley,,,,,4.0,,,4.0,Massachusetts,Division 1,Men,Division 1Men
3,7,Ian Buchanan,,,,,,1.0,,2.0,Massachusetts,Division 1,Men,Division 1Men
4,8,Tomo Liou,,Defense (Cutter),College (SO),"5'8""",3.0,,,2.0,Massachusetts,Division 1,Men,Division 1Men


In [173]:
clean_2025 = df_2025[['Player', 'level', 'gender', 'division', 'team_name', 'Turns', 'Ds', 'Assists', 'Points']]
clean_2025 = clean_2025.fillna(0)
clean_2025['plus_minus'] = clean_2025['Points'] + clean_2025['Assists'] + clean_2025['Ds'] - clean_2025['Turns']
clean_2025.head()

Unnamed: 0,Player,level,gender,division,team_name,Turns,Ds,Assists,Points,plus_minus
0,Nima Lhamo,Division 1,Men,Division 1Men,Massachusetts,2.0,2.0,2.0,5.0,7.0
1,Ethan Lieman,Division 1,Men,Division 1Men,Massachusetts,4.0,0.0,11.0,16.0,23.0
2,Roan Dunkerley,Division 1,Men,Division 1Men,Massachusetts,4.0,0.0,0.0,4.0,0.0
3,Ian Buchanan,Division 1,Men,Division 1Men,Massachusetts,2.0,0.0,1.0,0.0,-1.0
4,Tomo Liou,Division 1,Men,Division 1Men,Massachusetts,2.0,0.0,0.0,3.0,1.0


In [174]:
male_2025 = clean_2025[clean_2025['gender'] == 'Men']
women_2025 = clean_2025[clean_2025['gender'] == 'Women']
divs_2025 = clean_2025[['team_name', 'gender', 'level']].drop_duplicates().set_index(keys='team_name')

In [175]:
processed_2025 = prepare_data(male_2025, divs_2025)
processed_2025.head()

Unnamed: 0_level_0,Turns mean,Ds mean,Assists mean,Points mean,plus_minus mean,Turns stdev,Ds stdev,Assists stdev,Points stdev,plus_minus stdev,level
team_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Berry,4.259259,1.703704,2.185185,2.259259,1.888889,6.502027,2.569268,4.674292,3.13286,4.079341,-1
Bowdoin,3.428571,1.178571,2.642857,2.642857,3.035714,6.379531,1.467009,5.286,5.498918,6.50061,-1
British Columbia,2.478261,1.0,2.434783,2.434783,3.391304,4.230512,1.279204,5.623216,4.009376,5.844493,1
British Columbia,2.478261,1.0,2.434783,2.434783,3.391304,4.230512,1.279204,5.623216,4.009376,5.844493,1
Cal Poly-SLO,1.777778,1.037037,3.148148,3.148148,5.555556,2.485857,1.192331,5.695502,3.909591,7.438638,1


In [177]:
test_2025_x = processed_2025.drop(columns='level')
test_2025_y = processed_2025['level']

In [178]:
predict = clf.predict(test_2025_x)
results = get_results(predict, test_2025_y)

Accuracy: 80.0%
