# Team Classifier
From the observations, I have decided to create a model to classify a team's division rather than an individuals division.

In [40]:
import pandas as pd

In [41]:
df = pd.read_csv("Dataset/ultimate_college_championship.csv")
df.head()

Unnamed: 0,player,level,gender,division,team_name,Turns,Ds,Assists,Points,plus_minus,team_games,turns_per_game,ds_per_game,ast_per_game,pts_per_game,pls_mns_per_game
0,Jacques Nissen,Division 1,Men,Division 1 Men,Brown Brownian Motion,12,8,38,13,47,8,1.5,1.0,4.75,1.625,5.875
1,Cal Nightingale,Division 1,Men,Division 1 Men,Brown Brownian Motion,3,0,12,27,36,8,0.375,0.0,1.5,3.375,4.5
2,Faye Burdick,Division 1,Women,Division 1 Women,Colorado Quandary,6,12,16,13,35,7,0.857143,1.714286,2.285714,1.857143,5.0
3,Matthew Gregor,Division 3,Men,Division 3 Men,Franciscan Fatal,2,6,3,26,33,6,0.333333,1.0,0.5,4.333333,5.5
4,Frankie Saraniti,Division 3,Women,Division 3 Women,Carleton Eclipse,11,15,12,17,33,6,1.833333,2.5,2.0,2.833333,5.5


In [42]:
# Split by gender and get team ids
male_df = df[df['gender'] == 'Men'].drop(columns='team_games')
female_df = df[df['gender'] == 'Women'].drop(columns='team_games')
team_divs = df[['team_name', 'level', 'gender']].drop_duplicates().set_index(keys='team_name')

In [43]:
male_teams_mean = male_df.groupby('team_name').mean(numeric_only=True)
male_teams_mean.columns = [col + ' mean' for col in male_teams_mean.columns]
male_teams_stdev = male_df.groupby('team_name').std(numeric_only=True)
male_teams_stdev.columns = [col + ' stdev' for col in male_teams_stdev.columns]
male_teams_all = male_teams_mean.join(male_teams_stdev)
male_teams_all = male_teams_all.join(team_divs['level'])
male_teams_all.head()

Unnamed: 0_level_0,Turns mean,Ds mean,Assists mean,Points mean,plus_minus mean,turns_per_game mean,ds_per_game mean,ast_per_game mean,pts_per_game mean,pls_mns_per_game mean,...,Ds stdev,Assists stdev,Points stdev,plus_minus stdev,turns_per_game stdev,ds_per_game stdev,ast_per_game stdev,pts_per_game stdev,pls_mns_per_game stdev,level
team_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama-Huntsville Nightmares,3.26087,1.521739,2.26087,2.304348,2.826087,0.652174,0.304348,0.452174,0.46087,0.565217,...,2.371604,3.81644,2.835406,4.302771,0.900944,0.474321,0.763288,0.567081,0.860554,Division 1
Bates Orange Whip,5.235294,1.647059,4.588235,4.588235,5.588235,0.872549,0.27451,0.764706,0.764706,0.931373,...,2.644361,10.099869,4.925175,8.46359,1.231149,0.440727,1.683311,0.820863,1.410598,Division 3
Berry Bucks,3.653846,1.461538,3.0,3.0,3.807692,0.608974,0.24359,0.5,0.5,0.634615,...,1.838059,6.835203,4.758151,6.0796,0.921282,0.306343,1.1392,0.793025,1.013267,Division 3
Brown Brownian Motion,3.48,1.44,4.52,4.56,7.04,0.435,0.18,0.565,0.57,0.88,...,1.980741,10.364684,7.112665,13.308769,0.744004,0.247593,1.295585,0.889083,1.663596,Division 1
Cal Poly-SLO Slocore,3.625,1.791667,4.208333,4.208333,6.583333,0.517857,0.255952,0.60119,0.60119,0.940476,...,2.021174,7.672532,4.96929,8.662346,0.824226,0.288739,1.096076,0.709899,1.237478,Division 1


In [44]:
def make_bipolar(cell):
    if cell == 'Division 1':
        return 1
    return -1

In [45]:
male_teams_all['level'] = male_teams_all['level'].apply(make_bipolar)

In [46]:
male_train = male_teams_all[0:28]
male_x_train = male_train.drop(columns='level')
male_y_train = male_train['level']

male_test = male_teams_all[28:]
male_x_test = male_test.drop(columns='level')
male_y_test = male_test['level']

In [47]:
def get_results(prediction, actual):
    data = {
        'Prediction': prediction,
        'Actual': actual
    }
    results = pd.DataFrame(data)
    num_correct = len(results[results['Prediction'] == results['Actual']])
    total = len(results)
    accuracy = round(num_correct/total * 100, 2)
    print(f"Accuracy: {accuracy}%")
    return results

In [48]:
from sklearn import tree

In [49]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(male_x_train, male_y_train)

In [50]:
predictions = clf.predict(male_x_test)
results = get_results(predictions, male_y_test)

Accuracy: 70.0%
