In [None]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import Imputer
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import KFold

resultsWin = pd.read_csv('data/RegularSeasonDetailedResults.csv')
teams = pd.read_csv('data/Teams.csv')

# Get only winning teams game stats to predict their score
resultsWin = resultsWin.drop(['WTeamID', 'LTeamID', 'WLoc'], axis=1)

def neuralNetwork(results) :
    train_features, test_features, train_outcome, test_outcome = train_test_split(
        results.drop("WScore", axis=1),
        results.WScore,
        test_size=0.30, 
        random_state=11
    )
    scaler = MinMaxScaler()
    mlp_reg = MLPClassifier()

    imputer = Imputer()
    selector = SelectPercentile()
    threshold = VarianceThreshold(.1)
    pipe = make_pipeline(imputer, threshold, selector, scaler, mlp_reg)

    param_grid = {
        'selectpercentile__percentile':range(10, 30, 5)
        }

    crossVal = KFold()
    grid = GridSearchCV(pipe, param_grid, cv = crossVal, scoring="neg_mean_absolute_error")
    grid.fit(train_features, train_outcome)
    grid.score(test_features, test_outcome)

    score = grid.score(test_features, test_outcome)

    predictedValues = grid.predict(test_features)

    return [score, predictedValues, grid, test_outcome]

neural = neuralNetwork(resultsWin)

# How many points off were we from predicting the winning score?
print(neural[0])

# This can be exactly replicated for the other team using LScore for losing score
# Then need to randomize which teams get which grid and we run the grid for each playoff game invididually
# in their own csv. Take the two scores, see who won, move them manually to the next round.



I'm not quite done withe the following but it is a way to predict the outcomes of the tournament games as they contunue. The games each round can be based off of the predicted outcome of the last round.

In [117]:
def next_round(teams_df):
    '''Takes a dataframe with two columns: School id, and ranking, none of 
    these teams should have been eliminated. It returns the next matchups for these 
    teams based on their ranking'''
    first_round_bracket = np.array([1,16,8,9,5,12,4,13,6,11,3,14,7,10,2,15])
    print(teams_df.size)
    if(len(teams_df) < 16):
        won_ranks = teams_df['ranking'].values.astype('int')
        first_round_bracket = np.array([x for x in first_round_bracket if x in won_ranks])
    print(first_round_bracket)
    picks1 = first_round_bracket[np.arange(1, len(teams_df), 2)]
    picks2 = first_round_bracket[np.arange(0, len(teams_df), 2)]
    print(picks1)
    print(picks2)
    teams_df = teams_df.set_index('ranking')
    teams_next = pd.DataFrame()
    teams_next['team1'] = teams_df.loc[picks1, 'school'].values
    teams_next['team2'] = teams_df.loc[picks2, 'school'].values
    teams_next['rank1'] = picks1
    teams_next['rank2'] = picks2
    return teams_next

def find_winners(nx):
    '''Takes a dataframe with teamsids and ranks from matches and keeps only the 
    winners'''
    nx['score'] = (nx['team1'] - nx['team2'])
    nx['school'] = nx.loc[:, 'team2']
    nx['ranking'] = nx.loc[:, 'rank2']
    
    condition = nx['score'] > 0
    #this can be replaced with an ouput from a neural net to predict winners
    underdogs = nx.loc[condition,['rank1', 'team1']]
    underdogs.columns = ['ranking', 'school']
    nx.update(underdogs)
    return(nx.loc[:, ['ranking', 'school']])

teams_df = pd.DataFrame()
teams_df['ranking'] = np.arange(1,17,1)
teams_df['school'] = np.random.randint(50, 100, size = 16)
print(teams_df)
nx = next_round(teams_df)
print(nx)
round2 = find_winners(nx)
nx2 = next_round(round2)
print(nx2)
round3 = find_winners(nx2)
nx3 = next_round(round3)
print(nx3)
round4 = find_winners(nx3)
nx4 = next_round(round4)
print(nx4)

    ranking  school
0         1      89
1         2      76
2         3      61
3         4      61
4         5      65
5         6      69
6         7      75
7         8      65
8         9      71
9        10      82
10       11      87
11       12      59
12       13      89
13       14      72
14       15      50
15       16      74
32
[ 1 16  8  9  5 12  4 13  6 11  3 14  7 10  2 15]
[16  9 12 13 11 14 10 15]
[1 8 5 4 6 3 7 2]
   team1  team2  rank1  rank2
0     74     89     16      1
1     71     65      9      8
2     59     65     12      5
3     89     61     13      4
4     87     69     11      6
5     72     61     14      3
6     82     75     10      7
7     50     76     15      2
16
[ 1  9  5 13 11 14 10  2]
[ 9 13 14  2]
[ 1  5 11 10]
   team1  team2  rank1  rank2
0   71.0   89.0      9      1
1   89.0   65.0     13      5
2   72.0   87.0     14     11
3   76.0   82.0      2     10
8
[ 1 13 11 10]
[13 10]
[ 1 11]
   team1  team2  rank1  rank2
0   89.0   89.0     13  

In [21]:
pd.DataFrame({'star':np.arange(1,17,1), 'ed':np.arange(1,17,1)})

Unnamed: 0,star,ed
0,1,1
1,2,2
2,3,3
3,4,4
4,5,5
5,6,6
6,7,7
7,8,8
8,9,9
9,10,10
