In [None]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import Imputer
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import KFold

resultsWin = pd.read_csv('data/RegularSeasonDetailedResults.csv')
teams = pd.read_csv('data/Teams.csv')

# Get only winning teams game stats to predict their score
resultsWin = resultsWin.drop(['WTeamID', 'LTeamID', 'WLoc'], axis=1)

def neuralNetwork(results) :
    train_features, test_features, train_outcome, test_outcome = train_test_split(
        results.drop("WScore", axis=1),
        results.WScore,
        test_size=0.30, 
        random_state=11
    )
    scaler = MinMaxScaler()
    mlp_reg = MLPClassifier()

    imputer = Imputer()
    selector = SelectPercentile()
    threshold = VarianceThreshold(.1)
    pipe = make_pipeline(imputer, threshold, selector, scaler, mlp_reg)

    param_grid = {
        'selectpercentile__percentile':range(10, 30, 5)
        }

    crossVal = KFold()
    grid = GridSearchCV(pipe, param_grid, cv = crossVal, scoring="neg_mean_absolute_error")
    grid.fit(train_features, train_outcome)
    grid.score(test_features, test_outcome)

    score = grid.score(test_features, test_outcome)

    predictedValues = grid.predict(test_features)

    return [score, predictedValues, grid, test_outcome]

neural = neuralNetwork(resultsWin)

# How many points off were we from predicting the winning score?
print(neural[0])

# This can be exactly replicated for the other team using LScore for losing score
# Then need to randomize which teams get which grid and we run the grid for each playoff game invididually
# in their own csv. Take the two scores, see who won, move them manually to the next round.



I'm not quite done withe the following but it is a way to predict the outcomes of the tournament games as they contunue. The games each round can be based off of the predicted outcome of the last round.

In [177]:
def next_round(teams_df):
    '''Takes a dataframe with two columns: School id, and ranking, none of 
    these teams should have been eliminated. It returns the next matchups for these 
    teams based on their ranking'''
    arar = np.char.array(['01','16','08','09','05','12','04','13','06','11','03','14','07','10','02','15'])
    arr = np.append(arar, arar)
    first_round_bracket = np.char.array(['w', 'x', 'y', 'z']).repeat(16) + np.append(arr, arr)
    if(len(teams_df) < 64):
        won_ranks = teams_df['Seed'].values
        first_round_bracket = np.array([x for x in first_round_bracket if x in won_ranks])
    #print(first_round_bracket)
    picks1 = first_round_bracket[np.arange(1, len(teams_df), 2)]
    picks2 = first_round_bracket[np.arange(0, len(teams_df), 2)]
    #print(picks1)
    #print(picks2)
    teams_df = teams_df.set_index('Seed')
    teams_next = pd.DataFrame()
    teams_next['team1'] = teams_df.loc[picks1, 'TeamID'].values
    teams_next['team2'] = teams_df.loc[picks2, 'TeamID'].values
    teams_next['rank1'] = picks1
    teams_next['rank2'] = picks2
    return teams_next

def find_winners(nx):
    '''Takes a dataframe with teamsids and ranks from matches and keeps only the 
    winners'''
    nx['score'] = (nx['team1'] - nx['team2'])
    nx['TeamID'] = nx.loc[:, 'team2']
    nx['Seed'] = nx.loc[:, 'rank2']
    
    condition = nx['score'] > 0
    #this can be replaced with an ouput from a neural net to predict winners
    underdogs = nx.loc[condition,['rank1', 'team1']]
    underdogs.columns = ['Seed', 'TeamID']
    nx.update(underdogs)
    return(nx)#.loc[:, ['Seed', 'TeamID']])

#makes a fake testing year
arar = np.char.array(['01','16','08','09','05','12','04','13','06','11','03','14','07','10','02','15'])
arr = np.append(arar, arar)
teams_df = pd.DataFrame()
teams_df['Seed'] = np.char.array(['w', 'x', 'y', 'z']).repeat(16) + np.append(arr, arr)
teams_df['TeamID'] = np.random.randint(50, 100, size = 64)

round = 1 
games_record = pd.DataFrame()
#runs until only 1 team remains
while len(teams_df) > 1:
    games = next_round(teams_df)
    #features = games.join()
    #
    ## use plt to find scores on nx
    if len(games_record) < 1:
        games_record = games#.loc[:,:]
    else:
        games_record = games_record.append(games, ignore_index=True)
    teams_df = find_winners(games)
    print(round)
    round = round + 1
    print(games)
print(games_record)

1
    team1  team2 rank1 rank2  score  TeamID Seed
0      93     56   w16   w01     37    93.0  w16
1      84     91   w09   w08     -7    91.0  w08
2      56     52   w12   w05      4    56.0  w12
3      58     69   w13   w04    -11    69.0  w04
4      78     68   w11   w06     10    78.0  w11
5      72     87   w14   w03    -15    87.0  w03
6      91     53   w10   w07     38    91.0  w10
7      96     59   w15   w02     37    96.0  w15
8      76     88   x16   x01    -12    88.0  x01
9      84     68   x09   x08     16    84.0  x09
10     90     55   x12   x05     35    90.0  x12
11     67     71   x13   x04     -4    71.0  x04
12     88     92   x11   x06     -4    92.0  x06
13     65     60   x14   x03      5    65.0  x14
14     53     69   x10   x07    -16    69.0  x07
15     90     98   x15   x02     -8    98.0  x02
16     52     93   y16   y01    -41    93.0  y01
17     83     87   y09   y08     -4    87.0  y08
18     74     75   y12   y05     -1    75.0  y05
19     52     56  

In [161]:
#np.array([1,16,8,9,5,12,4,13,6,11,3,14,7,10,2,15]).astype('str') #+ 'z'#np.array(['w','x','y','z']))
arar = np.char.array(['1','16','8','9','5','12','4','13','6','11','3','14','7','10','2','15'])
arr = np.append(arar, arar)
np.char.array(['w', 'x', 'y', 'z']).repeat(16) + np.append(arr, arr)
games_record.append(games_record)

Unnamed: 0,team1,team2,rank1,rank2,score,TeamID,Seed
0,85,61,w16,w01,24,85.0,w16
1,73,61,w09,w08,12,73.0,w09
2,65,91,w12,w05,-26,91.0,w05
3,50,59,w13,w04,-9,59.0,w04
4,67,96,w11,w06,-29,96.0,w06
5,91,90,w14,w03,1,91.0,w14
6,82,91,w10,w07,-9,91.0,w07
7,83,77,w15,w02,6,83.0,w15
8,87,73,x16,x01,14,87.0,x16
9,86,54,x09,x08,32,86.0,x09


In [21]:
pd.DataFrame({'star':np.arange(1,17,1), 'ed':np.arange(1,17,1)})

Unnamed: 0,star,ed
0,1,1
1,2,2
2,3,3
3,4,4
4,5,5
5,6,6
6,7,7
7,8,8
8,9,9
9,10,10
