In [None]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import Imputer
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import KFold

resultsWin = pd.read_csv('data/RegularSeasonDetailedResults.csv')
teams = pd.read_csv('data/Teams.csv')

# Get only winning teams game stats to predict their score
resultsWin = resultsWin.drop(['WTeamID', 'LTeamID', 'WLoc'], axis=1)

def neuralNetwork(results) :
    train_features, test_features, train_outcome, test_outcome = train_test_split(
        results.drop("WScore", axis=1),
        results.WScore,
        test_size=0.30, 
        random_state=11
    )
    scaler = MinMaxScaler()
    mlp_reg = MLPClassifier()

    imputer = Imputer()
    selector = SelectPercentile()
    threshold = VarianceThreshold(.1)
    pipe = make_pipeline(imputer, threshold, selector, scaler, mlp_reg)

    param_grid = {
        'selectpercentile__percentile':range(10, 30, 5)
        }

    crossVal = KFold()
    grid = GridSearchCV(pipe, param_grid, cv = crossVal, scoring="neg_mean_absolute_error")
    grid.fit(train_features, train_outcome)
    grid.score(test_features, test_outcome)

    score = grid.score(test_features, test_outcome)

    predictedValues = grid.predict(test_features)

    return [score, predictedValues, grid, test_outcome]

neural = neuralNetwork(resultsWin)

# How many points off were we from predicting the winning score?
print(neural[0])

# This can be exactly replicated for the other team using LScore for losing score
# Then need to randomize which teams get which grid and we run the grid for each playoff game invididually
# in their own csv. Take the two scores, see who won, move them manually to the next round.



In [187]:
resultsWin.head()

Unnamed: 0,Season,DayNum,WScore,LScore,NumOT,WFGM,WFGA,WFGM3,WFGA3,WFTM,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
0,2003,10,68,62,0,27,58,3,14,11,...,10,16,22,10,22,8,18,9,2,20
1,2003,10,70,63,0,26,62,8,20,10,...,24,9,20,20,25,7,12,8,6,16
2,2003,11,73,61,0,24,58,8,18,17,...,26,14,23,31,22,9,12,2,5,23
3,2003,11,56,50,0,18,38,3,9,17,...,22,8,15,17,20,9,19,4,3,23
4,2003,11,77,71,0,30,61,6,14,11,...,16,17,27,21,15,12,10,7,1,14


I'm not quite done withe the following but it is a way to predict the outcomes of the tournament games as they contunue. The games each round can be based off of the predicted outcome of the last round.

In [184]:
def next_round(teams_df):
    '''Takes a dataframe with two columns: School id, and ranking, none of 
    these teams should have been eliminated. It returns the next matchups for these 
    teams based on their ranking'''
    arar = np.char.array(['01','16','08','09','05','12','04','13','06','11','03','14','07','10','02','15'])
    arr = np.append(arar, arar)
    first_round_bracket = np.char.array(['w', 'x', 'y', 'z']).repeat(16) + np.append(arr, arr)
    if(len(teams_df) < 64):
        won_ranks = teams_df['Seed'].values
        first_round_bracket = np.array([x for x in first_round_bracket if x in won_ranks])
    #print(first_round_bracket)
    picks1 = first_round_bracket[np.arange(1, len(teams_df), 2)]
    picks2 = first_round_bracket[np.arange(0, len(teams_df), 2)]
    #print(picks1)
    #print(picks2)
    teams_df = teams_df.set_index('Seed')
    teams_next = pd.DataFrame()
    teams_next['team1'] = teams_df.loc[picks1, 'TeamID'].values
    teams_next['team2'] = teams_df.loc[picks2, 'TeamID'].values
    teams_next['rank1'] = picks1
    teams_next['rank2'] = picks2
    return teams_next

def find_winners(nx):
    '''Takes a dataframe with teamsids and ranks from matches and keeps only the 
    winners'''
    nx['score'] = (nx['team1'] - nx['team2'])
    nx['TeamID'] = nx.loc[:, 'team2']
    nx['Seed'] = nx.loc[:, 'rank2']
    
    condition = nx['score'] > 0
    #this can be replaced with an ouput from a neural net to predict winners
    underdogs = nx.loc[condition,['rank1', 'team1']]
    underdogs.columns = ['Seed', 'TeamID']
    nx.update(underdogs)
    return(nx)#.loc[:, ['Seed', 'TeamID']])

#makes a fake testing year
arar = np.char.array(['01','16','08','09','05','12','04','13','06','11','03','14','07','10','02','15'])
arr = np.append(arar, arar)
teams_df = pd.DataFrame()
teams_df['Seed'] = np.char.array(['w', 'x', 'y', 'z']).repeat(16) + np.append(arr, arr)
teams_df['TeamID'] = np.random.randint(50, 100, size = 64)

round = 1 
games_record = pd.DataFrame()
#runs until only 1 team remains
while len(teams_df) > 1:
    games = next_round(teams_df)
    #features = games.join()
    #
    ## use plt to find scores on nx
    games['round'] = round
    if len(games_record) < 1:
        games_record = games#.loc[:,:]
    else:
        games_record = games_record.append(games, ignore_index=True)
    teams_df = find_winners(games.copy())
    round = round + 1
print(games_record)

    team1  team2 rank1 rank2  round
0    70.0   59.0   w16   w01      1
1    76.0   99.0   w09   w08      1
2    89.0   90.0   w12   w05      1
3    88.0   62.0   w13   w04      1
4    90.0   90.0   w11   w06      1
5    57.0   68.0   w14   w03      1
6    79.0   98.0   w10   w07      1
7    76.0   73.0   w15   w02      1
8    74.0   52.0   x16   x01      1
9    70.0   83.0   x09   x08      1
10   56.0   69.0   x12   x05      1
11   58.0   96.0   x13   x04      1
12   67.0   99.0   x11   x06      1
13   66.0   93.0   x14   x03      1
14   50.0   90.0   x10   x07      1
15   51.0   50.0   x15   x02      1
16   88.0   97.0   y16   y01      1
17   55.0   62.0   y09   y08      1
18   94.0   98.0   y12   y05      1
19   62.0   75.0   y13   y04      1
20   85.0   75.0   y11   y06      1
21   52.0   85.0   y14   y03      1
22   88.0   63.0   y10   y07      1
23   61.0   74.0   y15   y02      1
24   70.0   58.0   z16   z01      1
25   92.0   97.0   z09   z08      1
26   80.0   58.0   z12   z05

In [193]:
regular_season_results = pd.read_csv('data/RegularSeasonDetailedResults.csv')

In [237]:
winners = regular_season_results.loc[:,['Season', 'WTeamID', 'WFGM', 'WFGA', 'WFGM3', 'WFGA3', 
                                         'WFTM', 'WFTA', 'WOR', 'WDR', 'WAst', 'WTO', 'WStl', 'WBlk', 'WPF']]
losers = regular_season_results.loc[:,['Season', 'LTeamID', 'LFGM', 'LFGA', 'LFGM3', 'LFGA3',
                                      'LFTM', 'LFTA', 'LOR', 'LDR', 'LAst', 'LTO', 'LStl', 'LBlk', 'LPF']]
winners.columns = ['Season', 'TeamID', 'FGM', 'FGA', 'FGM3', 'FGA3',
                                      'FTM', 'FTA', 'OR', 'DR', 'Ast', 'TO', 'Stl', 'Blk', 'PF']
losers.columns = ['Season', 'TeamID', 'FGM', 'FGA', 'FGM3', 'FGA3',
                                      'FTM', 'FTA', 'OR', 'DR', 'Ast', 'TO', 'Stl', 'Blk', 'PF']
all_teams = winners.copy()
all_teams = all_teams.append(losers.copy(), ignore_index=True)
team_summary_stats = all_teams.groupby(['Season', 'TeamID'], as_index=False).mean()
print(team_summary_stats.head())

   Season  TeamID        FGM        FGA      FGM3       FGA3        FTM  \
0    2003    1102  19.142857  39.785714  7.821429  20.821429  11.142857   
1    2003    1103  27.148148  55.851852  5.444444  16.074074  19.037037   
2    2003    1104  24.035714  57.178571  6.357143  19.857143  14.857143   
3    2003    1105  24.384615  61.615385  7.576923  20.769231  15.423077   
4    2003    1106  23.428571  55.285714  6.107143  17.642857  10.642857   

         FTA         OR         DR        Ast         TO       Stl       Blk  \
0  17.107143   4.178571  16.821429  13.000000  11.428571  5.964286  1.785714   
1  25.851852   9.777778  19.925926  15.222222  12.629630  7.259259  2.333333   
2  20.928571  13.571429  23.928571  12.107143  13.285714  6.607143  3.785714   
3  21.846154  13.500000  23.115385  14.538462  18.653846  9.307692  2.076923   
4  16.464286  12.285714  23.857143  11.678571  17.035714  8.357143  3.142857   

          PF  
0  18.750000  
1  19.851852  
2  18.035714  
3  20.23