In [235]:
import numpy as np
import pandas as pd
import requests

import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
from bs4 import BeautifulSoup as bs
import datetime
import time
from sklearn.model_selection import train_test_split
import pickle


rf = pickle.load(open('ncaa_rfc','rb'))
regr = pickle.load(open('ncaa_rfr','rb'))

In [30]:
stat_list = {'win_loss_pct':168,
             'assist_turnover_ratio':474, 
             'assists_per_game':216, 
             'd_rebounds':859, 
             'fg_pct':148,
             'd_fg_pct':149,
             'ft_pct':150,
             'o_rebounds':857,
             'd_scoring':147,
             'o_scoring':145,
             'three_pct':152,
             'turnover_margin':519}


team_stats = {}

for stat in stat_list.keys():
    status_code = True
    num = 1
    while status_code == True:
        
        r = requests.get(f'https://www.ncaa.com/stats/basketball-men/d1/current/team/{stat_list[stat]}/p{num}')
        
        if r.status_code == 200:
            status_code = True
            soup = bs(r.content,'html.parser')
            for team in soup.table.find_all('tr')[1:]:
                if stat == 'win_loss_pct' and team.a.text != 'Northern Ariz.':
                    team_stats[team.a.text] = {}
                    team_stats[team.a.text][stat] = team.find_all('td')[4].text
                if stat in ['assist_turnover_ratio','fg_pct','d_fg_pct','ft_pct','three_pct'] and team.a.text != 'Northern Ariz.':
                    team_stats[team.a.text][stat] = team.find_all('td')[5].text
                if stat in ['assists_per_game','d_rebounds','o_rebounds','d_scoring','o_scoring'] and team.a.text != 'Northern Ariz.':
                    team_stats[team.a.text][stat] = team.find_all('td')[4].text
                    
        else:
            status_code = False
        if not status_code:
            continue
        

        num += 1
        
        
           
df = pd.DataFrame.from_dict(team_stats,orient='index')
df.reset_index(inplace = True)
df.rename(columns={'index':'team_name'},inplace = True)

date = datetime.date.today()
new_games = []

r = requests.get(f'https://www.ncaa.com/scoreboard/basketball-men/d1/{str(date.year)}/{str(date.month).zfill(2)}/{str(date.day).zfill(2)}/all-conf')
r.content
if r.status_code == 200:
    soup = bs(r.content,'html.parser')
    for game in soup.find_all(class_="gamePod-game-teams"):
        team_one = game.find_all(class_="gamePod-game-team-name")[0].text
        team_two = game.find_all(class_="gamePod-game-team-name")[1].text
        game_score = {'team_one':team_one, 'team_two':team_two}
        new_games.append(game_score)

new_games_df = pd.DataFrame(new_games)



new_combined_df = pd.merge(new_games_df,df,how='inner',left_on = 'team_one',right_on = 'team_name')
new_combined_df.rename(columns = {'team_name':'one_team_name',
                              'win_loss_pct':'one_win_loss_pct',
                              'assist_turnover_ratio':'one_assist_turnover_ratio',
                              'assists_per_game':'one_assists_per_game',                              
                              'd_rebounds':'one_d_rebounds',
                              'fg_pct':'one_fg_pct',
                              'd_fg_pct':'one_d_fg_pct',
                              'ft_pct':'one_ft_pct',
                              'o_rebounds':'one_o_rebounds',
                              'd_scoring':'one_d_scoring',
                              'o_scoring':'one_o_scoring',
                              'three_pct':'one_three_pct'},inplace=True)
new_combined_df = pd.merge(new_combined_df,df,how='inner',left_on = 'team_two',right_on = 'team_name')
new_combined_df.rename(columns = {'team_name':'two_team_name',
                              'win_loss_pct':'two_win_loss_pct',
                              'assist_turnover_ratio':'two_assist_turnover_ratio',
                              'assists_per_game':'two_assists_per_game',                              
                              'd_rebounds':'two_d_rebounds',
                              'fg_pct':'two_fg_pct',
                              'd_fg_pct':'two_d_fg_pct',
                              'ft_pct':'two_ft_pct',
                              'o_rebounds':'two_o_rebounds',
                              'd_scoring':'two_d_scoring',
                              'o_scoring':'two_o_scoring',
                              'three_pct':'two_three_pct'},inplace=True)
new_combined_df.dropna(inplace=True)

new_x = new_combined_df[['one_win_loss_pct', 'one_assist_turnover_ratio',
       'one_assists_per_game', 'one_d_rebounds', 'one_fg_pct', 'one_d_fg_pct',
       'one_ft_pct', 'one_o_rebounds', 'one_d_scoring', 'one_o_scoring',
       'one_three_pct','two_win_loss_pct', 'two_assist_turnover_ratio', 'two_assists_per_game',
       'two_d_rebounds', 'two_fg_pct', 'two_d_fg_pct', 'two_ft_pct',
       'two_o_rebounds', 'two_d_scoring', 'two_o_scoring', 'two_three_pct']].dropna()


new_x = new_combined_df[['one_d_rebounds', 'one_fg_pct', 'one_d_fg_pct','one_ft_pct', 
                 'one_o_rebounds', 'one_three_pct',
                 'two_d_rebounds', 'two_fg_pct', 'two_d_fg_pct', 'two_ft_pct',
                 'two_o_rebounds', 'two_three_pct']].dropna()


new_preds = rf.predict(new_x)
new_regr_preds = regr.predict(new_x)
new_combined_df['team_one_win'] = new_preds
new_combined_df['team_one_score'] = new_regr_preds[:,0]
new_combined_df['team_two_score'] = new_regr_preds[:,1]
def new_winner(row):
    if row['team_one_win'] == True:
        return row['team_one']
    else:
        return row['team_two']
new_combined_df['pred_winner'] = new_combined_df.apply(new_winner, axis=1)
new_combined_df['total_score'] = new_combined_df['team_one_score'] + new_combined_df['team_two_score']
new_combined_df['one-two'] = new_combined_df['team_one_score'] - new_combined_df['team_two_score']
new_combined_df[['team_one','team_two','one_win_loss_pct','two_win_loss_pct','team_one_score','team_two_score','pred_winner','total_score','one-two']]

KeyboardInterrupt: 

In [36]:
first_combined_df = pd.merge(new_games_df,df,how='left',left_on = 'team_one',right_on = 'team_name')
first_combined_df.rename(columns = {'team_name':'one_team_name',
                              'win_loss_pct':'one_win_loss_pct',
                              'assist_turnover_ratio':'one_assist_turnover_ratio',
                              'assists_per_game':'one_assists_per_game',                              
                              'd_rebounds':'one_d_rebounds',
                              'fg_pct':'one_fg_pct',
                              'd_fg_pct':'one_d_fg_pct',
                              'ft_pct':'one_ft_pct',
                              'o_rebounds':'one_o_rebounds',
                              'd_scoring':'one_d_scoring',
                              'o_scoring':'one_o_scoring',
                              'three_pct':'one_three_pct'},inplace=True)
first_combined_df = pd.merge(first_combined_df,df,how='left',left_on = 'team_two',right_on = 'team_name')
first_combined_df.rename(columns = {'team_name':'two_team_name',
                              'win_loss_pct':'two_win_loss_pct',
                              'assist_turnover_ratio':'two_assist_turnover_ratio',
                              'assists_per_game':'two_assists_per_game',                              
                              'd_rebounds':'two_d_rebounds',
                              'fg_pct':'two_fg_pct',
                              'd_fg_pct':'two_d_fg_pct',
                              'ft_pct':'two_ft_pct',
                              'o_rebounds':'two_o_rebounds',
                              'd_scoring':'two_d_scoring',
                              'o_scoring':'two_o_scoring',
                              'three_pct':'two_three_pct'},inplace=True)

first_combined_df['o_rebound'] = first_combined_df['one_o_rebounds'].astype(float) - first_combined_df['two_o_rebounds'].astype(float)
first_combined_df['at_ratio'] = first_combined_df['one_assist_turnover_ratio'].astype(float) - first_combined_df['two_assist_turnover_ratio'].astype(float)
first_combined_df['assists'] = first_combined_df['one_assists_per_game'].astype(float) - first_combined_df['two_assists_per_game'].astype(float)
first_combined_df['d_rebound'] = first_combined_df['one_d_rebounds'].astype(float)- first_combined_df['two_d_rebounds'].astype(float)
first_combined_df['fg_pct'] = first_combined_df['one_fg_pct'].astype(float) - first_combined_df['two_fg_pct'].astype(float)
first_combined_df['d_fg_pct'] = first_combined_df['one_d_fg_pct'].astype(float) - first_combined_df['one_d_fg_pct'].astype(float)
first_combined_df['ft_pct'] = first_combined_df['one_ft_pct'].astype(float) - first_combined_df['two_ft_pct'].astype(float)
first_combined_df['d_scoring'] = first_combined_df['one_d_scoring'].astype(float) - first_combined_df['two_d_scoring'].astype(float)
first_combined_df['o_scoring'] = first_combined_df['one_o_scoring'].astype(float) - first_combined_df['two_o_scoring'].astype(float)
first_combined_df['three_pct'] = first_combined_df['one_three_pct'].astype(float) - first_combined_df['two_three_pct'].astype(float)

new_x = first_combined_df[['one_win_loss_pct', 'one_assist_turnover_ratio',
       'one_assists_per_game', 'one_d_rebounds', 'one_fg_pct', 'one_d_fg_pct',
       'one_ft_pct', 'one_o_rebounds', 'one_d_scoring', 'one_o_scoring',
       'one_three_pct','two_win_loss_pct', 'two_assist_turnover_ratio', 'two_assists_per_game',
       'two_d_rebounds', 'two_fg_pct', 'two_d_fg_pct', 'two_ft_pct',
       'two_o_rebounds', 'two_d_scoring', 'two_o_scoring', 'two_three_pct']].dropna()

new_x = first_combined_df[['one_d_rebounds', 'one_assist_turnover_ratio','one_fg_pct', 'one_d_fg_pct','one_ft_pct', 
                 'one_o_rebounds','one_o_scoring','one_d_scoring', 'one_three_pct',
                 'two_d_rebounds', 'two_assist_turnover_ratio','two_fg_pct', 'two_d_fg_pct', 'two_ft_pct',
                 'two_o_rebounds', 'two_o_scoring','two_d_scoring', 'two_three_pct']].dropna()

new_x = first_combined_df[['o_rebound','at_ratio','assists','d_rebound','fg_pct','d_fg_pct','ft_pct','d_scoring','o_scoring','three_pct']].dropna()

new_preds = rf.predict(new_x)
new_preds_proba = rf.predict_proba(new_x)
mlist = np.array([probs.max() for probs in new_preds_proba])
new_regr_preds = regr.predict(new_x)
first_combined_df['team_one_win'] = new_preds
first_combined_df['one_prediction_proba'] = mlist
first_combined_df['team_one_score'] = new_regr_preds[:,0]
first_combined_df['team_two_score'] = new_regr_preds[:,1]
def new_winner(row):
    if row['team_one_win'] == True:
        return row['team_one']
    else:
        return row['team_two']
first_combined_df['pred_winner'] = first_combined_df.apply(new_winner, axis=1)
first_combined_df['total_score'] = first_combined_df['team_one_score'] + first_combined_df['team_two_score']
first_combined_df['one-two'] = first_combined_df['team_one_score'] - first_combined_df['team_two_score']

two_combined_df = pd.merge(new_games_df,df,how='left',left_on = 'team_two',right_on = 'team_name')
two_combined_df.rename(columns = {'team_name':'one_team_name',
                              'win_loss_pct':'one_win_loss_pct',
                              'assist_turnover_ratio':'one_assist_turnover_ratio',
                              'assists_per_game':'one_assists_per_game',                              
                              'd_rebounds':'one_d_rebounds',
                              'fg_pct':'one_fg_pct',
                              'd_fg_pct':'one_d_fg_pct',
                              'ft_pct':'one_ft_pct',
                              'o_rebounds':'one_o_rebounds',
                              'd_scoring':'one_d_scoring',
                              'o_scoring':'one_o_scoring',
                              'three_pct':'one_three_pct'},inplace=True)
two_combined_df = pd.merge(two_combined_df,df,how='left',left_on = 'team_one',right_on = 'team_name')
two_combined_df.rename(columns = {'team_name':'two_team_name',
                              'win_loss_pct':'two_win_loss_pct',
                              'assist_turnover_ratio':'two_assist_turnover_ratio',
                              'assists_per_game':'two_assists_per_game',                              
                              'd_rebounds':'two_d_rebounds',
                              'fg_pct':'two_fg_pct',
                              'd_fg_pct':'two_d_fg_pct',
                              'ft_pct':'two_ft_pct',
                              'o_rebounds':'two_o_rebounds',
                              'd_scoring':'two_d_scoring',
                              'o_scoring':'two_o_scoring',
                              'three_pct':'two_three_pct'},inplace=True)

two_combined_df['o_rebound'] = two_combined_df['one_o_rebounds'].astype(float) - two_combined_df['two_o_rebounds'].astype(float)
two_combined_df['at_ratio'] = two_combined_df['one_assist_turnover_ratio'].astype(float) - two_combined_df['two_assist_turnover_ratio'].astype(float)
two_combined_df['assists'] = two_combined_df['one_assists_per_game'].astype(float) - two_combined_df['two_assists_per_game'].astype(float)
two_combined_df['d_rebound'] = two_combined_df['one_d_rebounds'].astype(float)- two_combined_df['two_d_rebounds'].astype(float)
two_combined_df['fg_pct'] = two_combined_df['one_fg_pct'].astype(float) - two_combined_df['two_fg_pct'].astype(float)
two_combined_df['d_fg_pct'] = two_combined_df['one_d_fg_pct'].astype(float) - two_combined_df['one_d_fg_pct'].astype(float)
two_combined_df['ft_pct'] = two_combined_df['one_ft_pct'].astype(float) - two_combined_df['two_ft_pct'].astype(float)
two_combined_df['d_scoring'] = two_combined_df['one_d_scoring'].astype(float) - two_combined_df['two_d_scoring'].astype(float)
two_combined_df['o_scoring'] = two_combined_df['one_o_scoring'].astype(float) - two_combined_df['two_o_scoring'].astype(float)
two_combined_df['three_pct'] = two_combined_df['one_three_pct'].astype(float) - two_combined_df['two_three_pct'].astype(float)


new_x = two_combined_df[['one_win_loss_pct', 'one_assist_turnover_ratio',
       'one_assists_per_game', 'one_d_rebounds', 'one_fg_pct', 'one_d_fg_pct',
       'one_ft_pct', 'one_o_rebounds', 'one_d_scoring', 'one_o_scoring',
       'one_three_pct','two_win_loss_pct', 'two_assist_turnover_ratio', 'two_assists_per_game',
       'two_d_rebounds', 'two_fg_pct', 'two_d_fg_pct', 'two_ft_pct',
       'two_o_rebounds', 'two_d_scoring', 'two_o_scoring', 'two_three_pct']].dropna()

new_x = two_combined_df[['one_d_rebounds', 'one_assist_turnover_ratio','one_fg_pct', 'one_d_fg_pct','one_ft_pct', 
                 'one_o_rebounds','one_o_scoring','one_d_scoring', 'one_three_pct',
                 'two_d_rebounds', 'two_assist_turnover_ratio','two_fg_pct', 'two_d_fg_pct', 'two_ft_pct',
                 'two_o_rebounds', 'two_o_scoring','two_d_scoring', 'two_three_pct']].dropna()

new_x = two_combined_df[['o_rebound','at_ratio','assists','d_rebound','fg_pct','d_fg_pct','ft_pct','d_scoring','o_scoring','three_pct']].dropna()

new_preds = rf.predict(new_x)
new_preds_proba = rf.predict_proba(new_x)
mlist = np.array([probs.max() for probs in new_preds_proba])
new_regr_preds = regr.predict(new_x)
two_combined_df['team_one_win'] = new_preds
two_combined_df['two_prediction_proba'] = mlist
two_combined_df['team_one_score'] = new_regr_preds[:,0]
two_combined_df['team_two_score'] = new_regr_preds[:,1]
def new_winner(row):
    if row['team_one_win'] == True:
        return row['team_two']
    else:
        return row['team_one']
two_combined_df['pred_winner'] = two_combined_df.apply(new_winner, axis=1)
two_combined_df['total_score'] = two_combined_df['team_one_score'] + two_combined_df['team_two_score']
two_combined_df['one-two'] = two_combined_df['team_one_score'] - two_combined_df['team_two_score']

first_combined_df['pred_winner_two'] = two_combined_df['pred_winner']
first_combined_df['one-two-two'] = two_combined_df['one-two']
first_combined_df['two_prediction_proba'] = two_combined_df['two_prediction_proba']
def total_winner(row):
    if row['one_prediction_proba'] >= row['two_prediction_proba']:
        return row['pred_winner']
    else:
        return row['pred_winner_two']
first_combined_df['total_winner'] = first_combined_df.apply(total_winner,axis=1)

first_combined_df[['team_one','team_two','one_win_loss_pct','two_win_loss_pct','team_one_score','team_two_score','pred_winner','pred_winner_two','one_prediction_proba','two_prediction_proba','total_score','one-two','one-two-two','total_winner']]


Unnamed: 0,team_one,team_two,one_win_loss_pct,two_win_loss_pct,team_one_score,team_two_score,pred_winner,pred_winner_two,one_prediction_proba,two_prediction_proba,total_score,one-two,one-two-two,total_winner
0,Texas St.,North Texas,75.0,80.0,61.253311,63.523069,North Texas,Texas St.,0.57459,0.521605,124.77638,-2.269758,-1.528937,North Texas
1,Cleveland St.,Xavier,66.7,58.1,70.110489,74.493455,Xavier,Cleveland St.,0.602917,0.626712,144.603944,-4.382966,-5.745716,Cleveland St.
2,Alcorn,Texas A&M,51.5,65.7,62.340866,76.221523,Texas A&M,Texas A&M,0.840309,0.6641,138.562389,-13.880657,6.217163,Texas A&M
3,Oregon,Utah St.,57.6,54.5,68.486486,76.614358,Utah St.,Oregon,0.763138,0.512797,145.100843,-8.127872,-1.323753,Utah St.
4,Indiana,Wyoming,60.6,75.8,65.424771,70.415272,Wyoming,Indiana,0.627469,0.649957,135.840043,-4.990502,-5.115816,Indiana
5,St. Bonaventure,Colorado,69.0,65.6,64.697968,69.537259,Colorado,St. Bonaventure,0.71357,0.636416,134.235227,-4.839291,-4.679333,Colorado
6,Santa Clara,Washington St.,65.6,57.6,71.726126,75.522232,Santa Clara,Santa Clara,0.546052,0.72216,147.248357,-3.796106,-5.149934,Santa Clara
7,USC Upstate,App State,46.7,57.6,68.775157,67.175896,App State,USC Upstate,0.519976,0.542001,135.951053,1.599261,-0.986544,USC Upstate
8,A&M-Corpus Christi,Texas Southern,67.6,60.0,69.095939,70.887974,Texas Southern,A&M-Corpus Christi,0.505508,0.753129,139.983914,-1.792035,-10.863796,A&M-Corpus Christi
9,Belmont,Vanderbilt,78.1,51.5,72.811885,72.00067,Belmont,Belmont,0.60281,0.750865,144.812555,0.811214,-7.329716,Belmont


In [143]:
date = datetime.date(2022,3,17)
new_games = []
r = requests.get(f'https://www.ncaa.com/scoreboard/basketball-men/d1/{str(date.year)}/{str(date.month).zfill(2)}/{str(date.day).zfill(2)}/all-conf')
r.content
if r.status_code == 200:
    soup = bs(r.content,'html.parser')
    for game in soup.find_all(class_="gamePod-game-teams"):
        team_one = game.find_all(class_="gamePod-game-team-name")[0].text
        team_two = game.find_all(class_="gamePod-game-team-name")[1].text
        game_score = {'team_one':team_one, 'team_two':team_two}
        new_games.append(game_score)
date = datetime.date(2022,3,18)
r = requests.get(f'https://www.ncaa.com/scoreboard/basketball-men/d1/{str(date.year)}/{str(date.month).zfill(2)}/{str(date.day).zfill(2)}/all-conf')
r.content
if r.status_code == 200:
    soup = bs(r.content,'html.parser')
    for game in soup.find_all(class_="gamePod-game-teams"):
        team_one = game.find_all(class_="gamePod-game-team-name")[0].text
        team_two = game.find_all(class_="gamePod-game-team-name")[1].text
        game_score = {'team_one':team_one, 'team_two':team_two}
        new_games.append(game_score)

new_games_df = pd.DataFrame(new_games)
new_games_df =new_games_df[new_games_df['team_one']!='Eastern Wash.']

In [156]:
game_1 = pd.DataFrame({'team_one':[new_games_df.loc[6]['team_one']],'team_two':[new_games_df.loc[6]['team_two']]})
game_2 = pd.DataFrame({'team_one':[new_games_df.loc[2]['team_one']],'team_two':[new_games_df.loc[2]['team_two']]})
game_3 = pd.DataFrame({'team_one':[new_games_df.loc[8]['team_one']],'team_two':[new_games_df.loc[8]['team_two']]})
game_4 = pd.DataFrame({'team_one':[new_games_df.loc[12]['team_one']],'team_two':[new_games_df.loc[12]['team_two']]})
game_5 = pd.DataFrame({'team_one':[new_games_df.loc[23]['team_one']],'team_two':[new_games_df.loc[23]['team_two']]})
game_6 = pd.DataFrame({'team_one':[new_games_df.loc[19]['team_one']],'team_two':[new_games_df.loc[19]['team_two']]})
game_7 = pd.DataFrame({'team_one':[new_games_df.loc[30]['team_one']],'team_two':[new_games_df.loc[30]['team_two']]})
game_8 = pd.DataFrame({'team_one':[new_games_df.loc[26]['team_one']],'team_two':[new_games_df.loc[26]['team_two']]})

game_9 = pd.DataFrame({'team_one':[new_games_df.loc[3]['team_one']],'team_two':[new_games_df.loc[3]['team_two']]})
game_10 = pd.DataFrame({'team_one':[new_games_df.loc[7]['team_one']],'team_two':[new_games_df.loc[7]['team_two']]})
game_11 = pd.DataFrame({'team_one':[new_games_df.loc[10]['team_one']],'team_two':[new_games_df.loc[10]['team_two']]})
game_12 = pd.DataFrame({'team_one':[new_games_df.loc[14]['team_one']],'team_two':[new_games_df.loc[14]['team_two']]})
game_13 = pd.DataFrame({'team_one':[new_games_df.loc[24]['team_one']],'team_two':[new_games_df.loc[24]['team_two']]})
game_14 = pd.DataFrame({'team_one':[new_games_df.loc[20]['team_one']],'team_two':[new_games_df.loc[20]['team_two']]})
game_15 = pd.DataFrame({'team_one':[new_games_df.loc[13]['team_one']],'team_two':[new_games_df.loc[13]['team_two']]})
game_16 = pd.DataFrame({'team_one':[new_games_df.loc[9]['team_one']],'team_two':[new_games_df.loc[9]['team_two']]})

game_17 = pd.DataFrame({'team_one':[new_games_df.loc[28]['team_one']],'team_two':[new_games_df.loc[28]['team_two']]})
game_18 = pd.DataFrame({'team_one':[new_games_df.loc[32]['team_one']],'team_two':[new_games_df.loc[32]['team_two']]})
game_19 = pd.DataFrame({'team_one':[new_games_df.loc[29]['team_one']],'team_two':[new_games_df.loc[29]['team_two']]})
game_20 = pd.DataFrame({'team_one':[new_games_df.loc[25]['team_one']],'team_two':[new_games_df.loc[25]['team_two']]})
game_21 = pd.DataFrame({'team_one':[new_games_df.loc[0]['team_one']],'team_two':[new_games_df.loc[0]['team_two']]})
game_22 = pd.DataFrame({'team_one':[new_games_df.loc[4]['team_one']],'team_two':[new_games_df.loc[4]['team_two']]})
game_23 = pd.DataFrame({'team_one':[new_games_df.loc[17]['team_one']],'team_two':[new_games_df.loc[17]['team_two']]})
game_24 = pd.DataFrame({'team_one':[new_games_df.loc[21]['team_one']],'team_two':[new_games_df.loc[21]['team_two']]})

game_25 = pd.DataFrame({'team_one':[new_games_df.loc[15]['team_one']],'team_two':[new_games_df.loc[15]['team_two']]})
game_26 = pd.DataFrame({'team_one':[new_games_df.loc[11]['team_one']],'team_two':[new_games_df.loc[11]['team_two']]})
game_27 = pd.DataFrame({'team_one':[new_games_df.loc[5]['team_one']],'team_two':[new_games_df.loc[5]['team_two']]})
game_28 = pd.DataFrame({'team_one':[new_games_df.loc[1]['team_one']],'team_two':[new_games_df.loc[1]['team_two']]})
game_29 = pd.DataFrame({'team_one':[new_games_df.loc[27]['team_one']],'team_two':[new_games_df.loc[27]['team_two']]})
game_30 = pd.DataFrame({'team_one':[new_games_df.loc[31]['team_one']],'team_two':[new_games_df.loc[31]['team_two']]})
game_31 = pd.DataFrame({'team_one':[new_games_df.loc[22]['team_one']],'team_two':[new_games_df.loc[22]['team_two']]})
game_32 = pd.DataFrame({'team_one':[new_games_df.loc[18]['team_one']],'team_two':[new_games_df.loc[18]['team_two']]})


In [245]:
rf = pickle.load(open('ncaa_rfc','rb'))
regr = pickle.load(open('ncaa_rfr','rb'))
def play_game(i):
    first_combined_df = pd.merge(i,df,how='left',left_on = 'team_one',right_on = 'team_name')
    first_combined_df.rename(columns = {'team_name':'one_team_name',
                                  'win_loss_pct':'one_win_loss_pct',
                                  'assist_turnover_ratio':'one_assist_turnover_ratio',
                                  'assists_per_game':'one_assists_per_game',                              
                                  'd_rebounds':'one_d_rebounds',
                                  'fg_pct':'one_fg_pct',
                                  'd_fg_pct':'one_d_fg_pct',
                                  'ft_pct':'one_ft_pct',
                                  'o_rebounds':'one_o_rebounds',
                                  'd_scoring':'one_d_scoring',
                                  'o_scoring':'one_o_scoring',
                                  'three_pct':'one_three_pct'},inplace=True)
    first_combined_df = pd.merge(first_combined_df,df,how='left',left_on = 'team_two',right_on = 'team_name')
    first_combined_df.rename(columns = {'team_name':'two_team_name',
                                  'win_loss_pct':'two_win_loss_pct',
                                  'assist_turnover_ratio':'two_assist_turnover_ratio',
                                  'assists_per_game':'two_assists_per_game',                              
                                  'd_rebounds':'two_d_rebounds',
                                  'fg_pct':'two_fg_pct',
                                  'd_fg_pct':'two_d_fg_pct',
                                  'ft_pct':'two_ft_pct',
                                  'o_rebounds':'two_o_rebounds',
                                  'd_scoring':'two_d_scoring',
                                  'o_scoring':'two_o_scoring',
                                  'three_pct':'two_three_pct'},inplace=True)
    
    first_combined_df['o_rebound'] = first_combined_df['one_o_rebounds'].astype(float) - first_combined_df['two_o_rebounds'].astype(float)
    first_combined_df['at_ratio'] = first_combined_df['one_assist_turnover_ratio'].astype(float) - first_combined_df['two_assist_turnover_ratio'].astype(float)
    first_combined_df['assists'] = first_combined_df['one_assists_per_game'].astype(float) - first_combined_df['two_assists_per_game'].astype(float)
    first_combined_df['d_rebound'] = first_combined_df['one_d_rebounds'].astype(float)- first_combined_df['two_d_rebounds'].astype(float)
    first_combined_df['fg_pct'] = first_combined_df['one_fg_pct'].astype(float) - first_combined_df['two_fg_pct'].astype(float)
    first_combined_df['d_fg_pct'] = first_combined_df['one_d_fg_pct'].astype(float) - first_combined_df['one_d_fg_pct'].astype(float)
    first_combined_df['ft_pct'] = first_combined_df['one_ft_pct'].astype(float) - first_combined_df['two_ft_pct'].astype(float)
    first_combined_df['d_scoring'] = first_combined_df['one_d_scoring'].astype(float) - first_combined_df['two_d_scoring'].astype(float)
    first_combined_df['o_scoring'] = first_combined_df['one_o_scoring'].astype(float) - first_combined_df['two_o_scoring'].astype(float)
    first_combined_df['three_pct'] = first_combined_df['one_three_pct'].astype(float) - first_combined_df['two_three_pct'].astype(float)

    new_x = first_combined_df[['one_win_loss_pct', 'one_assist_turnover_ratio',
           'one_assists_per_game', 'one_d_rebounds', 'one_fg_pct', 'one_d_fg_pct',
           'one_ft_pct', 'one_o_rebounds', 'one_d_scoring', 'one_o_scoring',
           'one_three_pct','two_win_loss_pct', 'two_assist_turnover_ratio', 'two_assists_per_game',
           'two_d_rebounds', 'two_fg_pct', 'two_d_fg_pct', 'two_ft_pct',
           'two_o_rebounds', 'two_d_scoring', 'two_o_scoring', 'two_three_pct']].dropna()

    new_x = first_combined_df[['o_rebound','at_ratio','assists','d_rebound','fg_pct','d_fg_pct','ft_pct','d_scoring','o_scoring','three_pct']].dropna()
    
    new_x = first_combined_df[['one_d_rebounds', 'one_assist_turnover_ratio','one_fg_pct', 'one_d_fg_pct','one_ft_pct', 
                 'one_o_rebounds','one_o_scoring','one_d_scoring', 'one_three_pct',
                 'two_d_rebounds', 'two_assist_turnover_ratio','two_fg_pct', 'two_d_fg_pct', 'two_ft_pct',
                 'two_o_rebounds', 'two_o_scoring','two_d_scoring', 'two_three_pct']].dropna()
    
#     new_x = first_combined_df[['one_d_rebounds', 'one_fg_pct', 'one_d_fg_pct', 
#                      'one_o_rebounds','one_three_pct',
#                      'two_d_rebounds','two_fg_pct', 'two_d_fg_pct', 
#                      'two_o_rebounds', 'two_three_pct']].dropna()
    
    #### Second Model
#     new_x = first_combined_df[['one_assist_turnover_ratio', 'one_assists_per_game', 'one_d_rebounds', 'one_fg_pct', 
#            'one_ft_pct', 'one_o_rebounds','one_three_pct',
#                  'two_assist_turnover_ratio', 'two_assists_per_game',
#            'two_d_rebounds', 'two_fg_pct', 'two_ft_pct','two_o_rebounds', 'two_three_pct']]

    new_preds = rf.predict(new_x)
    new_preds_proba = rf.predict_proba(new_x)
    mlist = np.array([probs.max() for probs in new_preds_proba])
    new_regr_preds = regr.predict(new_x)
    first_combined_df['team_one_win'] = new_preds
    first_combined_df['one_prediction_proba'] = mlist
    first_combined_df['team_one_score'] = new_regr_preds[:,0]
    first_combined_df['team_two_score'] = new_regr_preds[:,1]
    def new_winner(row):
        if row['team_one_win'] == True:
            return row['team_one']
        else:
            return row['team_two']
    first_combined_df['pred_winner'] = first_combined_df.apply(new_winner, axis=1)
    first_combined_df['total_score'] = first_combined_df['team_one_score'] + first_combined_df['team_two_score']
    first_combined_df['one-two'] = first_combined_df['team_one_score'] - first_combined_df['team_two_score']

    two_combined_df = pd.merge(i,df,how='left',left_on = 'team_two',right_on = 'team_name')
    two_combined_df.rename(columns = {'team_name':'one_team_name',
                                  'win_loss_pct':'one_win_loss_pct',
                                  'assist_turnover_ratio':'one_assist_turnover_ratio',
                                  'assists_per_game':'one_assists_per_game',                              
                                  'd_rebounds':'one_d_rebounds',
                                  'fg_pct':'one_fg_pct',
                                  'd_fg_pct':'one_d_fg_pct',
                                  'ft_pct':'one_ft_pct',
                                  'o_rebounds':'one_o_rebounds',
                                  'd_scoring':'one_d_scoring',
                                  'o_scoring':'one_o_scoring',
                                  'three_pct':'one_three_pct'},inplace=True)
    two_combined_df = pd.merge(two_combined_df,df,how='left',left_on = 'team_one',right_on = 'team_name')
    two_combined_df.rename(columns = {'team_name':'two_team_name',
                                  'win_loss_pct':'two_win_loss_pct',
                                  'assist_turnover_ratio':'two_assist_turnover_ratio',
                                  'assists_per_game':'two_assists_per_game',                              
                                  'd_rebounds':'two_d_rebounds',
                                  'fg_pct':'two_fg_pct',
                                  'd_fg_pct':'two_d_fg_pct',
                                  'ft_pct':'two_ft_pct',
                                  'o_rebounds':'two_o_rebounds',
                                  'd_scoring':'two_d_scoring',
                                  'o_scoring':'two_o_scoring',
                                  'three_pct':'two_three_pct'},inplace=True)

    two_combined_df['o_rebound'] = two_combined_df['one_o_rebounds'].astype(float) - two_combined_df['two_o_rebounds'].astype(float)
    two_combined_df['at_ratio'] = two_combined_df['one_assist_turnover_ratio'].astype(float) - two_combined_df['two_assist_turnover_ratio'].astype(float)
    two_combined_df['assists'] = two_combined_df['one_assists_per_game'].astype(float) - two_combined_df['two_assists_per_game'].astype(float)
    two_combined_df['d_rebound'] = two_combined_df['one_d_rebounds'].astype(float)- two_combined_df['two_d_rebounds'].astype(float)
    two_combined_df['fg_pct'] = two_combined_df['one_fg_pct'].astype(float) - two_combined_df['two_fg_pct'].astype(float)
    two_combined_df['d_fg_pct'] = two_combined_df['one_d_fg_pct'].astype(float) - two_combined_df['one_d_fg_pct'].astype(float)
    two_combined_df['ft_pct'] = two_combined_df['one_ft_pct'].astype(float) - two_combined_df['two_ft_pct'].astype(float)
    two_combined_df['d_scoring'] = two_combined_df['one_d_scoring'].astype(float) - two_combined_df['two_d_scoring'].astype(float)
    two_combined_df['o_scoring'] = two_combined_df['one_o_scoring'].astype(float) - two_combined_df['two_o_scoring'].astype(float)
    two_combined_df['three_pct'] = two_combined_df['one_three_pct'].astype(float) - two_combined_df['two_three_pct'].astype(float)

    
    new_x = two_combined_df[['one_win_loss_pct', 'one_assist_turnover_ratio',
           'one_assists_per_game', 'one_d_rebounds', 'one_fg_pct', 'one_d_fg_pct',
           'one_ft_pct', 'one_o_rebounds', 'one_d_scoring', 'one_o_scoring',
           'one_three_pct','two_win_loss_pct', 'two_assist_turnover_ratio', 'two_assists_per_game',
           'two_d_rebounds', 'two_fg_pct', 'two_d_fg_pct', 'two_ft_pct',
           'two_o_rebounds', 'two_d_scoring', 'two_o_scoring', 'two_three_pct']].dropna()

    
    new_x = two_combined_df[['one_d_rebounds', 'one_assist_turnover_ratio','one_fg_pct', 'one_d_fg_pct','one_ft_pct', 
                 'one_o_rebounds','one_o_scoring','one_d_scoring', 'one_three_pct',
                 'two_d_rebounds', 'two_assist_turnover_ratio','two_fg_pct', 'two_d_fg_pct', 'two_ft_pct',
                 'two_o_rebounds', 'two_o_scoring','two_d_scoring', 'two_three_pct']].dropna()
    
#     new_x = two_combined_df[['one_d_rebounds', 'one_fg_pct', 'one_d_fg_pct', 
#                      'one_o_rebounds','one_three_pct',
#                      'two_d_rebounds','two_fg_pct', 'two_d_fg_pct', 
#                      'two_o_rebounds', 'two_three_pct']].dropna()
#     new_x = two_combined_df[['o_rebound','at_ratio','assists','d_rebound','fg_pct','d_fg_pct','ft_pct','d_scoring','o_scoring','three_pct']].dropna()

    
#     ########## Second Model
#     new_x = two_combined_df[['one_assist_turnover_ratio', 'one_assists_per_game', 'one_d_rebounds', 'one_fg_pct', 
#            'one_ft_pct', 'one_o_rebounds','one_three_pct',
#                  'two_assist_turnover_ratio', 'two_assists_per_game',
#            'two_d_rebounds', 'two_fg_pct', 'two_ft_pct','two_o_rebounds', 'two_three_pct']]

    new_preds = rf.predict(new_x)
    new_preds_proba = rf.predict_proba(new_x)
    mlist = np.array([probs.max() for probs in new_preds_proba])
    new_regr_preds = regr.predict(new_x)
    two_combined_df['team_one_win'] = new_preds
    two_combined_df['two_prediction_proba'] = mlist
    two_combined_df['team_one_score'] = new_regr_preds[:,0]
    two_combined_df['team_two_score'] = new_regr_preds[:,1]
    def new_winner(row):
        if row['team_one_win'] == True:
            return row['team_two']
        else:
            return row['team_one']
    two_combined_df['pred_winner'] = two_combined_df.apply(new_winner, axis=1)
    two_combined_df['total_score'] = two_combined_df['team_one_score'] + two_combined_df['team_two_score']
    two_combined_df['one-two'] = two_combined_df['team_one_score'] - two_combined_df['team_two_score']

    first_combined_df['pred_winner_two'] = two_combined_df['pred_winner']
    first_combined_df['one-two-two'] = two_combined_df['one-two']
    first_combined_df['two_prediction_proba'] = two_combined_df['two_prediction_proba']
    def total_winner(row):
        if row['one_prediction_proba'] >= row['two_prediction_proba']:
            return row['pred_winner']
        else:
            return row['pred_winner_two']
    first_combined_df['total_winner'] = first_combined_df.apply(total_winner,axis=1)
    print(first_combined_df['team_one'][0], 'Vs', first_combined_df['team_two'][0])
    print(first_combined_df['total_winner'][0], 'WINS')
    return first_combined_df['total_winner'][0]


In [250]:
game_33 = pd.DataFrame({'team_one':[play_game(game_1)],'team_two':[play_game(game_2)]})
game_34 = pd.DataFrame({'team_one':[play_game(game_3)],'team_two':[play_game(game_4)]})
game_35 = pd.DataFrame({'team_one':[play_game(game_5)],'team_two':[play_game(game_6)]})
game_36 = pd.DataFrame({'team_one':[play_game(game_7)],'team_two':[play_game(game_8)]})
game_37 = pd.DataFrame({'team_one':[play_game(game_9)],'team_two':[play_game(game_10)]})
game_38 = pd.DataFrame({'team_one':[play_game(game_11)],'team_two':[play_game(game_12)]})
game_39 = pd.DataFrame({'team_one':[play_game(game_13)],'team_two':[play_game(game_14)]})
game_40 = pd.DataFrame({'team_one':[play_game(game_15)],'team_two':[play_game(game_16)]})
game_41 = pd.DataFrame({'team_one':[play_game(game_17)],'team_two':[play_game(game_18)]})
game_42 = pd.DataFrame({'team_one':[play_game(game_19)],'team_two':[play_game(game_20)]})
game_43 = pd.DataFrame({'team_one':[play_game(game_21)],'team_two':[play_game(game_22)]})
game_44 = pd.DataFrame({'team_one':[play_game(game_23)],'team_two':[play_game(game_24)]})
game_45 = pd.DataFrame({'team_one':[play_game(game_25)],'team_two':[play_game(game_26)]})
game_46 = pd.DataFrame({'team_one':[play_game(game_27)],'team_two':[play_game(game_28)]})
game_47 = pd.DataFrame({'team_one':[play_game(game_29)],'team_two':[play_game(game_30)]})
game_48 = pd.DataFrame({'team_one':[play_game(game_31)],'team_two':[play_game(game_32)]})

game_49 = pd.DataFrame({'team_one':[play_game(game_33)],'team_two':[play_game(game_34)]})
game_50 = pd.DataFrame({'team_one':[play_game(game_35)],'team_two':[play_game(game_36)]})
game_51 = pd.DataFrame({'team_one':[play_game(game_37)],'team_two':[play_game(game_38)]})
game_52 = pd.DataFrame({'team_one':[play_game(game_39)],'team_two':[play_game(game_40)]})
game_53 = pd.DataFrame({'team_one':[play_game(game_41)],'team_two':[play_game(game_42)]})
game_54 = pd.DataFrame({'team_one':[play_game(game_43)],'team_two':[play_game(game_44)]})
game_55 = pd.DataFrame({'team_one':[play_game(game_45)],'team_two':[play_game(game_46)]})
game_56 = pd.DataFrame({'team_one':[play_game(game_47)],'team_two':[play_game(game_48)]})

game_57 = pd.DataFrame({'team_one':[play_game(game_49)],'team_two':[play_game(game_50)]})
game_58 = pd.DataFrame({'team_one':[play_game(game_51)],'team_two':[play_game(game_52)]})
game_59 = pd.DataFrame({'team_one':[play_game(game_53)],'team_two':[play_game(game_54)]})
game_60 = pd.DataFrame({'team_one':[play_game(game_55)],'team_two':[play_game(game_56)]})

game_61 = pd.DataFrame({'team_one':[play_game(game_57)],'team_two':[play_game(game_58)]})
game_62 = pd.DataFrame({'team_one':[play_game(game_59)],'team_two':[play_game(game_60)]})

game_63 = pd.DataFrame({'team_one':[play_game(game_61)],'team_two':[play_game(game_62)]})

play_game(game_63)

Georgia St. Vs Gonzaga
Gonzaga WINS
Memphis Vs Boise St.
Memphis WINS
New Mexico St. Vs UConn
New Mexico St. WINS
Vermont Vs Arkansas
Vermont WINS
Rutgers Vs Alabama
Alabama WINS
Montana St. Vs Texas Tech
Texas Tech WINS
Davidson Vs Michigan St.
Davidson WINS
Cal St. Fullerton Vs Duke
Duke WINS
Norfolk St. Vs Baylor
Norfolk St. WINS
Marquette Vs North Carolina
Marquette WINS
Indiana Vs Saint Mary's (CA)
Saint Mary's (CA) WINS
Akron Vs UCLA
UCLA WINS
Virginia Tech Vs Texas
Virginia Tech WINS
Yale Vs Purdue
Purdue WINS
San Francisco Vs Murray St.
Murray St. WINS
Saint Peter's Vs Kentucky
Kentucky WINS
Wright St. Vs Arizona
Arizona WINS
TCU Vs Seton Hall
Seton Hall WINS
UAB Vs Houston
UAB WINS
Chattanooga Vs Illinois
Chattanooga WINS
Michigan Vs Colorado St.
Colorado St. WINS
Longwood Vs Tennessee
Tennessee WINS
Loyola Chicago Vs Ohio St.
Loyola Chicago WINS
Delaware Vs Villanova
Villanova WINS
Texas Southern Vs Kansas
Kansas WINS
Creighton Vs San Diego St.
San Diego St. WINS
Richmond Vs 

'Arizona'

In [153]:
df[df['team_name']=='Wright St.']

Unnamed: 0,team_name,win_loss_pct,assist_turnover_ratio,assists_per_game,d_rebounds,fg_pct,d_fg_pct,ft_pct,o_rebounds,d_scoring,o_scoring,three_pct
116,Wright St.,61.8,1.13,13.9,24.35,46.47,44.995,76.79,10.53,75.5,75.5,32.87


In [164]:
def make_game(team_one,team_two):
    this_game = pd.DataFrame({'team_one':[team_one],'team_two':[team_two]})
    play_game(this_game)

In [251]:
make_game('Notre Dame',"Texas Tech")

Notre Dame Vs Texas Tech
Texas Tech WINS
