# Decision Trees
In this notebook, we will be using the Decision Trees in order to try and predict individual games. We will be using the data that was collected from the sportsreference API from the summer of 2020. Models will be trained on the moving averages that we generated.

In [88]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

from tqdm import tqdm
from sklearn.model_selection import train_test_split

pd.set_option("display.max_rows", None, "display.max_columns", None)

# Getting Data Ready

In [89]:
# Lead data.csv
data_ma = pd.read_csv('../assets/data/cleaned_data/cleaned_data.csv')
data_ma.head()

Unnamed: 0,date,location,home,away,winner,home_assist_percentage_SMA,home_assist_percentage_CMA,home_assist_percentage_EMA,home_assists_SMA,home_assists_CMA,home_assists_EMA,home_block_percentage_SMA,home_block_percentage_CMA,home_block_percentage_EMA,home_blocks_SMA,home_blocks_CMA,home_blocks_EMA,home_defensive_rating_SMA,home_defensive_rating_CMA,home_defensive_rating_EMA,home_defensive_rebound_percentage_SMA,home_defensive_rebound_percentage_CMA,home_defensive_rebound_percentage_EMA,home_defensive_rebounds_SMA,home_defensive_rebounds_CMA,home_defensive_rebounds_EMA,home_effective_field_goal_percentage_SMA,home_effective_field_goal_percentage_CMA,home_effective_field_goal_percentage_EMA,home_field_goal_attempts_SMA,home_field_goal_attempts_CMA,home_field_goal_attempts_EMA,home_field_goal_percentage_SMA,home_field_goal_percentage_CMA,home_field_goal_percentage_EMA,home_field_goals_SMA,home_field_goals_CMA,home_field_goals_EMA,home_free_throw_attempt_rate_SMA,home_free_throw_attempt_rate_CMA,home_free_throw_attempt_rate_EMA,home_free_throw_attempts_SMA,home_free_throw_attempts_CMA,home_free_throw_attempts_EMA,home_free_throw_percentage_SMA,home_free_throw_percentage_CMA,home_free_throw_percentage_EMA,home_free_throws_SMA,home_free_throws_CMA,home_free_throws_EMA,home_losses_SMA,home_losses_CMA,home_losses_EMA,home_minutes_played_SMA,home_minutes_played_CMA,home_minutes_played_EMA,home_offensive_rating_SMA,home_offensive_rating_CMA,home_offensive_rating_EMA,home_offensive_rebound_percentage_SMA,home_offensive_rebound_percentage_CMA,home_offensive_rebound_percentage_EMA,home_offensive_rebounds_SMA,home_offensive_rebounds_CMA,home_offensive_rebounds_EMA,home_personal_fouls_SMA,home_personal_fouls_CMA,home_personal_fouls_EMA,home_points_SMA,home_points_CMA,home_points_EMA,home_steal_percentage_SMA,home_steal_percentage_CMA,home_steal_percentage_EMA,home_steals_SMA,home_steals_CMA,home_steals_EMA,home_three_point_attempt_rate_SMA,home_three_point_attempt_rate_CMA,home_three_point_attempt_rate_EMA,home_three_point_field_goal_attempts_SMA,home_three_point_field_goal_attempts_CMA,home_three_point_field_goal_attempts_EMA,home_three_point_field_goal_percentage_SMA,home_three_point_field_goal_percentage_CMA,home_three_point_field_goal_percentage_EMA,home_three_point_field_goals_SMA,home_three_point_field_goals_CMA,home_three_point_field_goals_EMA,home_total_rebound_percentage_SMA,home_total_rebound_percentage_CMA,home_total_rebound_percentage_EMA,home_total_rebounds_SMA,home_total_rebounds_CMA,home_total_rebounds_EMA,home_true_shooting_percentage_SMA,home_true_shooting_percentage_CMA,home_true_shooting_percentage_EMA,home_turnover_percentage_SMA,home_turnover_percentage_CMA,home_turnover_percentage_EMA,home_turnovers_SMA,home_turnovers_CMA,home_turnovers_EMA,home_two_point_field_goal_attempts_SMA,home_two_point_field_goal_attempts_CMA,home_two_point_field_goal_attempts_EMA,home_two_point_field_goal_percentage_SMA,home_two_point_field_goal_percentage_CMA,home_two_point_field_goal_percentage_EMA,home_two_point_field_goals_SMA,home_two_point_field_goals_CMA,home_two_point_field_goals_EMA,home_win_percentage_SMA,home_win_percentage_CMA,home_win_percentage_EMA,home_wins_SMA,home_wins_CMA,home_wins_EMA,away_assist_percentage_SMA,away_assist_percentage_CMA,away_assist_percentage_EMA,away_assists_SMA,away_assists_CMA,away_assists_EMA,away_block_percentage_SMA,away_block_percentage_CMA,away_block_percentage_EMA,away_blocks_SMA,away_blocks_CMA,away_blocks_EMA,away_defensive_rating_SMA,away_defensive_rating_CMA,away_defensive_rating_EMA,away_defensive_rebound_percentage_SMA,away_defensive_rebound_percentage_CMA,away_defensive_rebound_percentage_EMA,away_defensive_rebounds_SMA,away_defensive_rebounds_CMA,away_defensive_rebounds_EMA,away_effective_field_goal_percentage_SMA,away_effective_field_goal_percentage_CMA,away_effective_field_goal_percentage_EMA,away_field_goal_attempts_SMA,away_field_goal_attempts_CMA,away_field_goal_attempts_EMA,away_field_goal_percentage_SMA,away_field_goal_percentage_CMA,away_field_goal_percentage_EMA,away_field_goals_SMA,away_field_goals_CMA,away_field_goals_EMA,away_free_throw_attempt_rate_SMA,away_free_throw_attempt_rate_CMA,away_free_throw_attempt_rate_EMA,away_free_throw_attempts_SMA,away_free_throw_attempts_CMA,away_free_throw_attempts_EMA,away_free_throw_percentage_SMA,away_free_throw_percentage_CMA,away_free_throw_percentage_EMA,away_free_throws_SMA,away_free_throws_CMA,away_free_throws_EMA,away_losses_SMA,away_losses_CMA,away_losses_EMA,away_minutes_played_SMA,away_minutes_played_CMA,away_minutes_played_EMA,away_offensive_rating_SMA,away_offensive_rating_CMA,away_offensive_rating_EMA,away_offensive_rebound_percentage_SMA,away_offensive_rebound_percentage_CMA,away_offensive_rebound_percentage_EMA,away_offensive_rebounds_SMA,away_offensive_rebounds_CMA,away_offensive_rebounds_EMA,away_personal_fouls_SMA,away_personal_fouls_CMA,away_personal_fouls_EMA,away_points_SMA,away_points_CMA,away_points_EMA,away_steal_percentage_SMA,away_steal_percentage_CMA,away_steal_percentage_EMA,away_steals_SMA,away_steals_CMA,away_steals_EMA,away_three_point_attempt_rate_SMA,away_three_point_attempt_rate_CMA,away_three_point_attempt_rate_EMA,away_three_point_field_goal_attempts_SMA,away_three_point_field_goal_attempts_CMA,away_three_point_field_goal_attempts_EMA,away_three_point_field_goal_percentage_SMA,away_three_point_field_goal_percentage_CMA,away_three_point_field_goal_percentage_EMA,away_three_point_field_goals_SMA,away_three_point_field_goals_CMA,away_three_point_field_goals_EMA,away_total_rebound_percentage_SMA,away_total_rebound_percentage_CMA,away_total_rebound_percentage_EMA,away_total_rebounds_SMA,away_total_rebounds_CMA,away_total_rebounds_EMA,away_true_shooting_percentage_SMA,away_true_shooting_percentage_CMA,away_true_shooting_percentage_EMA,away_turnover_percentage_SMA,away_turnover_percentage_CMA,away_turnover_percentage_EMA,away_turnovers_SMA,away_turnovers_CMA,away_turnovers_EMA,away_two_point_field_goal_attempts_SMA,away_two_point_field_goal_attempts_CMA,away_two_point_field_goal_attempts_EMA,away_two_point_field_goal_percentage_SMA,away_two_point_field_goal_percentage_CMA,away_two_point_field_goal_percentage_EMA,away_two_point_field_goals_SMA,away_two_point_field_goals_CMA,away_two_point_field_goals_EMA,away_win_percentage_SMA,away_win_percentage_CMA,away_win_percentage_EMA,away_wins_SMA,away_wins_CMA,away_wins_EMA
0,2009-11-28,"The Palestra, Philadelphia, Pennsylvania",Brown,Siena,Away,62.98,62.98,62.004938,14.8,14.8,15.037037,8.92,8.92,9.353086,4.2,4.2,4.37037,106.24,106.24,105.667901,55.4,55.4,53.988889,21.4,21.4,21.580247,0.5278,0.5278,0.542457,51.6,51.6,52.012346,0.4588,0.4588,0.46884,23.6,23.6,24.308642,0.326,0.326,0.336951,16.4,16.4,17.111111,0.6874,0.6874,0.716889,12.0,12.0,13.012346,1.8,1.8,2.061728,200.0,200.0,200.0,100.66,100.66,106.697531,31.38,31.38,32.645679,7.6,7.6,7.728395,15.6,15.6,15.197531,66.2,66.2,69.197531,6.26,6.26,5.653086,4.2,4.2,3.790123,0.3854,0.3854,0.404148,20.2,20.2,21.271605,0.3514,0.3514,0.360296,7.0,7.0,7.567901,44.6,44.6,44.724691,29.0,29.0,29.308642,0.559,0.559,0.576741,20.9,20.9,19.002469,15.6,15.6,14.160494,31.4,31.4,30.740741,0.5302,0.5302,0.546012,16.6,16.6,16.740741,0.4966,0.4966,0.485136,1.2,1.2,1.333333,58.98,58.98,59.146914,16.0,16.0,16.419753,9.12,9.12,8.658025,3.6,3.6,3.382716,89.5,89.5,91.364198,69.74,69.74,71.012346,24.8,24.8,24.604938,0.5096,0.5096,0.511,57.6,57.6,59.296296,0.4794,0.4794,0.478296,27.2,27.2,27.82716,0.46,0.46,0.439531,25.8,25.8,25.296296,0.6358,0.6358,0.635605,16.4,16.4,16.074074,0.8,0.8,1.037037,200.0,200.0,200.0,100.62,100.62,101.174074,41.2,41.2,41.734568,12.0,12.0,12.259259,16.0,16.0,16.851852,74.2,74.2,75.506173,13.06,13.06,12.179012,9.6,9.6,9.08642,0.2328,0.2328,0.242407,13.6,13.6,14.617284,0.2612,0.2612,0.273926,3.4,3.4,3.777778,55.5,55.5,55.933333,36.8,36.8,36.864198,0.5348,0.5348,0.534086,19.52,19.52,19.382716,16.4,16.4,16.62963,44.0,44.0,44.679012,0.544,0.544,0.542716,23.8,23.8,24.049383,0.8034,0.8034,0.761778,2.2,2.2,2.358025
1,2009-11-28,"Hytche Athletic Center, Princess Anne, Maryland",Maryland-Eastern Shore,American,Home,54.46,54.46,52.204938,10.4,10.4,9.925926,4.4,4.4,3.781481,1.6,1.6,1.320988,119.42,119.42,121.093827,61.14,61.14,63.783951,19.6,19.6,19.82716,0.414,0.414,0.406852,52.2,52.2,52.728395,0.3654,0.3654,0.360926,19.0,19.0,18.962963,0.4338,0.4338,0.45142,22.4,22.4,23.530864,0.6358,0.6358,0.634691,13.6,13.6,14.358025,3.0,3.0,3.395062,200.0,200.0,200.0,85.54,85.54,86.409877,30.68,30.68,31.037037,9.0,9.0,9.432099,18.8,18.8,18.592593,56.6,56.6,57.037037,6.3,6.3,5.969136,4.2,4.2,3.975309,0.3244,0.3244,0.328235,17.0,17.0,17.345679,0.3114,0.3114,0.288222,5.0,5.0,4.753086,44.76,44.76,45.819753,28.6,28.6,29.259259,0.4518,0.4518,0.44716,18.64,18.64,17.402469,14.0,14.0,13.123457,35.2,35.2,35.382716,0.3992,0.3992,0.403074,14.0,14.0,14.209877,0.0,0.0,0.0,0.0,0.0,0.0,64.06,64.06,64.554321,14.2,14.2,14.08642,9.14,9.14,9.62716,3.8,3.8,4.234568,103.34,103.34,100.969136,68.84,68.84,70.982716,24.8,24.8,26.123457,0.4594,0.4594,0.466395,53.8,53.8,52.246914,0.4142,0.4142,0.419284,22.2,22.2,21.814815,0.3294,0.3294,0.346432,17.6,17.6,17.91358,0.655,0.655,0.650889,10.6,10.6,10.851852,4.0,4.0,4.395062,200.0,200.0,200.0,89.4,89.4,87.774074,33.66,33.66,33.150617,9.0,9.0,8.530864,17.6,17.6,16.938272,59.8,59.8,59.333333,5.74,5.74,4.992593,3.8,3.8,3.296296,0.2618,0.2618,0.267222,14.0,14.0,13.888889,0.3418,0.3418,0.348457,4.8,4.8,4.851852,51.8,51.8,53.04321,33.8,33.8,34.654321,0.4816,0.4816,0.488914,20.36,20.36,22.316049,15.8,15.8,17.395062,39.8,39.8,38.358025,0.438,0.438,0.442778,17.4,17.4,16.962963,0.0,0.0,0.0,0.0,0.0,0.0
2,2009-11-28,"UIC Pavilion, Chicago, Illinois",Tennessee State,Liberty,Away,54.72,54.72,54.185185,12.6,12.6,12.925926,6.94,6.94,5.85679,2.0,2.0,1.765432,111.08,111.08,109.480247,56.52,56.52,56.780247,17.8,17.8,18.345679,0.47,0.47,0.480593,56.6,56.6,56.518519,0.4074,0.4074,0.421605,23.0,23.0,23.790123,0.27,0.27,0.271901,15.2,15.2,15.308642,0.639,0.639,0.672556,9.8,9.8,10.382716,2.8,2.8,3.061728,200.0,200.0,200.0,90.22,90.22,93.345679,41.92,41.92,45.044444,11.2,11.2,12.012346,20.8,20.8,20.271605,62.8,62.8,64.555556,14.38,14.38,14.22963,10.2,10.2,10.012346,0.3428,0.3428,0.325037,19.4,19.4,18.382716,0.3616,0.3616,0.360346,7.0,7.0,6.592593,47.42,47.42,48.97284,29.0,29.0,30.358025,0.4932,0.4932,0.506667,23.46,23.46,23.519753,18.6,18.6,18.790123,37.2,37.2,38.135802,0.4298,0.4298,0.450247,16.0,16.0,17.197531,0.04,0.04,0.066667,0.2,0.2,0.333333,49.02,49.02,52.893827,9.6,9.6,10.82716,5.54,5.54,5.544444,2.2,2.2,2.0,105.24,105.24,101.125926,63.34,63.34,65.146914,22.6,22.6,23.469136,0.41,0.41,0.424222,49.8,49.8,50.950617,0.3728,0.3728,0.385,18.8,18.8,19.864198,0.394,0.394,0.393753,19.6,19.6,20.012346,0.7106,0.7106,0.714049,14.0,14.0,14.382716,3.8,3.8,4.061728,200.0,200.0,200.0,80.86,80.86,86.391358,32.3,32.3,36.834568,8.4,8.4,9.481481,15.4,15.4,15.419753,55.4,55.4,58.197531,9.88,9.88,10.017284,6.8,6.8,6.814815,0.3394,0.3394,0.346025,17.0,17.0,17.740741,0.2144,0.2144,0.223457,3.8,3.8,4.08642,48.68,48.68,51.791358,31.0,31.0,32.950617,0.4632,0.4632,0.475827,24.44,24.44,23.044444,18.8,18.8,17.91358,32.8,32.8,33.209877,0.4554,0.4554,0.472395,15.0,15.0,15.777778,0.2472,0.2472,0.252543,1.2,1.2,1.333333
3,2009-11-28,"UIC Pavilion, Chicago, Illinois",Iowa State,Northwestern,Away,59.64,60.65,57.632099,18.0,18.833333,17.37037,11.92,11.5,9.925514,5.0,5.0,4.26749,80.52,83.066667,81.50535,76.38,73.333333,72.783539,27.6,26.666667,26.176955,0.5788,0.582667,0.564086,59.0,59.833333,59.119342,0.5032,0.5105,0.499214,29.8,30.666667,29.539095,0.3574,0.336833,0.340782,20.4,19.5,19.641975,0.6248,0.642833,0.666638,12.8,12.5,12.983539,0.0,0.0,0.0,200.0,200.0,200.0,115.2,116.65,113.055144,37.72,40.95,38.20823,10.8,11.0,10.296296,16.2,15.833333,15.687243,81.4,82.5,79.773663,9.56,9.616667,8.278601,6.8,6.833333,5.91358,0.289,0.279833,0.265947,17.2,16.833333,15.81893,0.5126,0.505,0.478457,9.0,8.666667,7.711934,57.64,57.633333,56.039095,38.4,37.666667,36.473251,0.5924,0.596833,0.582523,16.84,17.266667,16.885597,13.8,14.333333,13.823045,41.8,43.0,43.300412,0.4968,0.509167,0.505342,20.8,22.0,21.82716,1.0,1.0,1.0,4.0,3.5,4.263374,77.06,77.06,75.711111,17.0,17.0,17.271605,12.76,12.76,14.239506,3.8,3.8,4.320988,93.92,93.92,91.638272,59.78,59.78,59.535802,20.2,20.2,21.160494,0.533,0.533,0.531827,49.6,49.6,51.283951,0.4434,0.4434,0.44342,22.0,22.0,22.740741,0.4482,0.4482,0.434938,21.6,21.6,21.481481,0.703,0.703,0.717741,15.4,15.4,15.604938,0.8,0.8,0.802469,200.0,200.0,200.0,107.78,107.78,110.622222,31.3,31.3,30.783951,7.0,7.0,6.851852,18.2,18.2,18.419753,68.2,68.2,70.098765,11.26,11.26,11.935802,7.2,7.2,7.62963,0.5052,0.5052,0.490123,24.8,24.8,24.901235,0.3482,0.3482,0.356148,8.8,8.8,9.012346,46.6,46.6,46.661728,27.2,27.2,28.012346,0.5708,0.5708,0.571457,16.32,16.32,14.166667,11.2,11.2,9.654321,24.8,24.8,26.382716,0.54,0.54,0.528519,13.2,13.2,13.728395,0.7434,0.7434,0.779062,2.2,2.2,2.592593
4,2009-11-28,"Winfield Dunn Center, Clarksville, Tennessee",Austin Peay,Drake,Away,55.28,55.28,56.783951,13.0,13.0,13.716049,5.02,5.02,6.462963,1.8,1.8,2.160494,107.42,107.42,105.4,62.24,62.24,65.585185,21.4,21.4,21.555556,0.4744,0.4744,0.48079,55.2,55.2,54.753086,0.4322,0.4322,0.443049,23.6,23.6,24.049383,0.3682,0.3682,0.335568,20.2,20.2,18.283951,0.6422,0.6422,0.63784,13.0,13.0,11.654321,2.0,2.0,2.259259,200.0,200.0,200.0,95.04,95.04,93.859259,35.24,35.24,34.855556,9.4,9.4,9.333333,17.4,17.4,17.493827,65.0,65.0,64.012346,6.2,6.2,5.759259,4.2,4.2,3.888889,0.2644,0.2644,0.24684,15.0,15.0,13.864198,0.3142,0.3142,0.299988,4.8,4.8,4.259259,48.22,48.22,49.959259,30.8,30.8,30.888889,0.5022,0.5022,0.505519,19.64,19.64,20.4,15.6,15.6,16.049383,40.2,40.2,40.888889,0.4606,0.4606,0.47684,18.8,18.8,19.790123,0.2966,0.2966,0.287605,1.0,1.0,1.135802,51.36,51.36,51.782716,11.6,11.6,11.320988,4.5,4.5,4.22963,1.8,1.8,1.567901,112.26,112.26,108.185185,60.86,60.86,64.091358,17.2,17.2,17.481481,0.545,0.545,0.511728,50.2,50.2,51.580247,0.4568,0.4568,0.429432,22.4,22.4,21.604938,0.3056,0.3056,0.27921,14.6,14.6,13.62963,0.8204,0.8204,0.841457,11.6,11.6,10.938272,2.4,2.4,2.691358,200.0,200.0,200.0,102.48,102.48,98.804938,24.82,24.82,26.822222,5.0,5.0,5.407407,16.4,16.4,17.320988,65.2,65.2,62.518519,12.98,12.98,14.297531,8.2,8.2,9.0,0.4548,0.4548,0.458198,22.8,22.8,23.604938,0.3892,0.3892,0.359074,8.8,8.8,8.37037,42.3,42.3,43.865432,22.2,22.2,22.888889,0.578,0.578,0.545667,17.7,17.7,17.308642,11.8,11.8,11.666667,27.4,27.4,27.975309,0.5148,0.5148,0.489543,13.6,13.6,13.234568,0.1566,0.1566,0.171556,0.6,0.6,0.703704


In [90]:
# These fields aren't necessary to train the model
DROPPING = ['date',
            'location',
            'home',
            'away']

ma = data_ma.copy()
ma['target'] = np.where(ma['winner'] == "Home", 0, 1)
ma.drop(columns=DROPPING, inplace=True)
ma.dropna(inplace=True)
ma.drop_duplicates(inplace=True)
ma.pop('winner')

target = ma.pop('target')
ma = ma.filter(regex="EMA", axis=1)
ma['target'] = target
ma.head()

Unnamed: 0,home_assist_percentage_EMA,home_assists_EMA,home_block_percentage_EMA,home_blocks_EMA,home_defensive_rating_EMA,home_defensive_rebound_percentage_EMA,home_defensive_rebounds_EMA,home_effective_field_goal_percentage_EMA,home_field_goal_attempts_EMA,home_field_goal_percentage_EMA,home_field_goals_EMA,home_free_throw_attempt_rate_EMA,home_free_throw_attempts_EMA,home_free_throw_percentage_EMA,home_free_throws_EMA,home_losses_EMA,home_minutes_played_EMA,home_offensive_rating_EMA,home_offensive_rebound_percentage_EMA,home_offensive_rebounds_EMA,home_personal_fouls_EMA,home_points_EMA,home_steal_percentage_EMA,home_steals_EMA,home_three_point_attempt_rate_EMA,home_three_point_field_goal_attempts_EMA,home_three_point_field_goal_percentage_EMA,home_three_point_field_goals_EMA,home_total_rebound_percentage_EMA,home_total_rebounds_EMA,home_true_shooting_percentage_EMA,home_turnover_percentage_EMA,home_turnovers_EMA,home_two_point_field_goal_attempts_EMA,home_two_point_field_goal_percentage_EMA,home_two_point_field_goals_EMA,home_win_percentage_EMA,home_wins_EMA,away_assist_percentage_EMA,away_assists_EMA,away_block_percentage_EMA,away_blocks_EMA,away_defensive_rating_EMA,away_defensive_rebound_percentage_EMA,away_defensive_rebounds_EMA,away_effective_field_goal_percentage_EMA,away_field_goal_attempts_EMA,away_field_goal_percentage_EMA,away_field_goals_EMA,away_free_throw_attempt_rate_EMA,away_free_throw_attempts_EMA,away_free_throw_percentage_EMA,away_free_throws_EMA,away_losses_EMA,away_minutes_played_EMA,away_offensive_rating_EMA,away_offensive_rebound_percentage_EMA,away_offensive_rebounds_EMA,away_personal_fouls_EMA,away_points_EMA,away_steal_percentage_EMA,away_steals_EMA,away_three_point_attempt_rate_EMA,away_three_point_field_goal_attempts_EMA,away_three_point_field_goal_percentage_EMA,away_three_point_field_goals_EMA,away_total_rebound_percentage_EMA,away_total_rebounds_EMA,away_true_shooting_percentage_EMA,away_turnover_percentage_EMA,away_turnovers_EMA,away_two_point_field_goal_attempts_EMA,away_two_point_field_goal_percentage_EMA,away_two_point_field_goals_EMA,away_win_percentage_EMA,away_wins_EMA,target
0,62.004938,15.037037,9.353086,4.37037,105.667901,53.988889,21.580247,0.542457,52.012346,0.46884,24.308642,0.336951,17.111111,0.716889,13.012346,2.061728,200.0,106.697531,32.645679,7.728395,15.197531,69.197531,5.653086,3.790123,0.404148,21.271605,0.360296,7.567901,44.724691,29.308642,0.576741,19.002469,14.160494,30.740741,0.546012,16.740741,0.485136,1.333333,59.146914,16.419753,8.658025,3.382716,91.364198,71.012346,24.604938,0.511,59.296296,0.478296,27.82716,0.439531,25.296296,0.635605,16.074074,1.037037,200.0,101.174074,41.734568,12.259259,16.851852,75.506173,12.179012,9.08642,0.242407,14.617284,0.273926,3.777778,55.933333,36.864198,0.534086,19.382716,16.62963,44.679012,0.542716,24.049383,0.761778,2.358025,1
1,52.204938,9.925926,3.781481,1.320988,121.093827,63.783951,19.82716,0.406852,52.728395,0.360926,18.962963,0.45142,23.530864,0.634691,14.358025,3.395062,200.0,86.409877,31.037037,9.432099,18.592593,57.037037,5.969136,3.975309,0.328235,17.345679,0.288222,4.753086,45.819753,29.259259,0.44716,17.402469,13.123457,35.382716,0.403074,14.209877,0.0,0.0,64.554321,14.08642,9.62716,4.234568,100.969136,70.982716,26.123457,0.466395,52.246914,0.419284,21.814815,0.346432,17.91358,0.650889,10.851852,4.395062,200.0,87.774074,33.150617,8.530864,16.938272,59.333333,4.992593,3.296296,0.267222,13.888889,0.348457,4.851852,53.04321,34.654321,0.488914,22.316049,17.395062,38.358025,0.442778,16.962963,0.0,0.0,0
2,54.185185,12.925926,5.85679,1.765432,109.480247,56.780247,18.345679,0.480593,56.518519,0.421605,23.790123,0.271901,15.308642,0.672556,10.382716,3.061728,200.0,93.345679,45.044444,12.012346,20.271605,64.555556,14.22963,10.012346,0.325037,18.382716,0.360346,6.592593,48.97284,30.358025,0.506667,23.519753,18.790123,38.135802,0.450247,17.197531,0.066667,0.333333,52.893827,10.82716,5.544444,2.0,101.125926,65.146914,23.469136,0.424222,50.950617,0.385,19.864198,0.393753,20.012346,0.714049,14.382716,4.061728,200.0,86.391358,36.834568,9.481481,15.419753,58.197531,10.017284,6.814815,0.346025,17.740741,0.223457,4.08642,51.791358,32.950617,0.475827,23.044444,17.91358,33.209877,0.472395,15.777778,0.252543,1.333333,1
3,57.632099,17.37037,9.925514,4.26749,81.50535,72.783539,26.176955,0.564086,59.119342,0.499214,29.539095,0.340782,19.641975,0.666638,12.983539,0.0,200.0,113.055144,38.20823,10.296296,15.687243,79.773663,8.278601,5.91358,0.265947,15.81893,0.478457,7.711934,56.039095,36.473251,0.582523,16.885597,13.823045,43.300412,0.505342,21.82716,1.0,4.263374,75.711111,17.271605,14.239506,4.320988,91.638272,59.535802,21.160494,0.531827,51.283951,0.44342,22.740741,0.434938,21.481481,0.717741,15.604938,0.802469,200.0,110.622222,30.783951,6.851852,18.419753,70.098765,11.935802,7.62963,0.490123,24.901235,0.356148,9.012346,46.661728,28.012346,0.571457,14.166667,9.654321,26.382716,0.528519,13.728395,0.779062,2.592593,1
4,56.783951,13.716049,6.462963,2.160494,105.4,65.585185,21.555556,0.48079,54.753086,0.443049,24.049383,0.335568,18.283951,0.63784,11.654321,2.259259,200.0,93.859259,34.855556,9.333333,17.493827,64.012346,5.759259,3.888889,0.24684,13.864198,0.299988,4.259259,49.959259,30.888889,0.505519,20.4,16.049383,40.888889,0.47684,19.790123,0.287605,1.135802,51.782716,11.320988,4.22963,1.567901,108.185185,64.091358,17.481481,0.511728,51.580247,0.429432,21.604938,0.27921,13.62963,0.841457,10.938272,2.691358,200.0,98.804938,26.822222,5.407407,17.320988,62.518519,14.297531,9.0,0.458198,23.604938,0.359074,8.37037,43.865432,22.888889,0.545667,17.308642,11.666667,27.975309,0.489543,13.234568,0.171556,0.703704,1


In [91]:
# create training and test samples
train_ma, test_ma = train_test_split(ma, test_size=0.2)
print(len(train_ma), 'train examples')
print(len(test_ma), 'test examples')

# extract training and test labels
train_true = train_ma.pop('target')
test_true = test_ma.pop('target')

32588 train examples
8147 test examples


# Random Forest Classifier

In [125]:
parameters = {'verbose':0,
              'n_jobs':-1,
              'bootstrap': False,
              'min_samples_leaf': 3,
              'n_estimators': 50,
              'min_samples_split': 10,
              'max_features': 'sqrt',
              'max_depth': 20}
clf = RandomForestClassifier(**parameters)
clf.fit(train_ma, train_true)
accuracy = clf.score(test_ma, test_true)
print(f'Accuracy of model: {accuracy*100:.2f}%')

Accuracy of model: 70.13%


# Gradient Boosting Classifier

In [137]:
parameters = {'verbose':0,
              'loss': 'exponential',
              'n_estimators':100,
              'min_samples_leaf': 3,
              'min_samples_split': 10,
              'max_features': 'sqrt',
              'max_depth': 20}
clf = GradientBoostingClassifier(**parameters)
clf.fit(train_ma, train_true)
accuracy = clf.score(test_ma, test_true)
print(f'Accuracy of model: {accuracy*100:.2f}%')

Accuracy of model: 70.55%


# Analysis
As seen above, we were able to surpass 70%, which is quite impressive when predicting sports models. I believe we will be able to increase accuracy when we retrain these models on a larger dataset. Stay tuned when we retrain on our 2010-2020 dataset versus our current 2020 dataset.

In [12]:
from joblib import dump, load

parameters = {'verbose':0,
              'n_jobs':-1,
              'bootstrap': False,
              'min_samples_leaf': 3,
              'n_estimators': 50,
              'min_samples_split': 10,
              'max_features': 'sqrt',
              'max_depth': 20}

accuracy = 0
while accuracy < .7:
    clf = RandomForestClassifier(**parameters)
    clf.fit(train_ma, train_true)
    accuracy = clf.score(test_ma, test_true)
    print(f'Accuracy of model: {accuracy*100:.2f}%')

dump(clf, 'rfc.joblib') 

Accuracy of model: 68.56%
Accuracy of model: 69.84%
Accuracy of model: 70.13%


['rfc.joblib']

In [14]:
clf.predict_proba(np.array([[ 50.18853319,  12.83365005,   7.39975925,   2.8530138 ,
        103.21410579,  69.34915761,  21.54884196,   0.51349807,
         56.43708545,   0.4528056 ,  25.50194843,   0.33992502,
         19.12640233,   0.66247622,  12.5104348 ,  10.6712573 ,
        200.28978332, 102.28501627,  33.95708128,  10.89455019,
         14.32178333,  70.38661533,  11.33309182,   7.81303643,
          0.36325609,  20.55728647,   0.33363987,   6.87228367,
         51.39635693,  32.44339216,   0.53815396,  18.28670799,
         14.67612811,  35.87979898,   0.52098572,  18.62966476,
          0.63308065,  18.32875313,  41.66260908,   9.4091232 ,
         10.98609931,   3.67935434, 111.15390629,  67.04092165,
         21.07336026,   0.40108225,  59.40800058,   0.36711136,
         21.88518278,   0.30188099,  17.64448853,   0.63085532,
         12.46386906,  14.5807768 , 200.02966541,  85.99923067,
         31.58889834,  11.65310409,  18.82367792,  60.30295817,
          7.63371439,   5.3567371 ,   0.29073411,  17.23424975,
          0.23374772,   4.06872355,  47.92145449,  32.72646435,
          0.44322548,  16.99650986,  13.79067675,  42.17375082,
          0.42040243,  17.81645923,   0.49954837,  14.41923363]]))

array([[0.32494444, 0.67505556]])

In [15]:
from joblib import dump, load

parameters = {'verbose':0,
              'loss': 'exponential',
              'n_estimators':100,
              'min_samples_leaf': 3,
              'min_samples_split': 10,
              'max_features': 'sqrt',
              'max_depth': 20}

accuracy = 0
while accuracy < .7:
    clf = GradientBoostingClassifier(**parameters)
    clf.fit(train_ma, train_true)
    accuracy = clf.score(test_ma, test_true)
    print(f'Accuracy of model: {accuracy*100:.2f}%')

dump(clf, 'rfc.joblib') 

Accuracy of model: 68.14%
Accuracy of model: 69.56%
Accuracy of model: 71.12%


['rfc.joblib']

In [16]:
clf.predict_proba(np.array([[ 50.18853319,  12.83365005,   7.39975925,   2.8530138 ,
        103.21410579,  69.34915761,  21.54884196,   0.51349807,
         56.43708545,   0.4528056 ,  25.50194843,   0.33992502,
         19.12640233,   0.66247622,  12.5104348 ,  10.6712573 ,
        200.28978332, 102.28501627,  33.95708128,  10.89455019,
         14.32178333,  70.38661533,  11.33309182,   7.81303643,
          0.36325609,  20.55728647,   0.33363987,   6.87228367,
         51.39635693,  32.44339216,   0.53815396,  18.28670799,
         14.67612811,  35.87979898,   0.52098572,  18.62966476,
          0.63308065,  18.32875313,  41.66260908,   9.4091232 ,
         10.98609931,   3.67935434, 111.15390629,  67.04092165,
         21.07336026,   0.40108225,  59.40800058,   0.36711136,
         21.88518278,   0.30188099,  17.64448853,   0.63085532,
         12.46386906,  14.5807768 , 200.02966541,  85.99923067,
         31.58889834,  11.65310409,  18.82367792,  60.30295817,
          7.63371439,   5.3567371 ,   0.29073411,  17.23424975,
          0.23374772,   4.06872355,  47.92145449,  32.72646435,
          0.44322548,  16.99650986,  13.79067675,  42.17375082,
          0.42040243,  17.81645923,   0.49954837,  14.41923363]]))

array([[3.21587005e-06, 9.99996784e-01]])

In [4]:
from sportsipy.ncaab.teams import Teams

for team in Teams(2021):
    print(team, team.name, team.abbreviation)

Abilene Christian (ABILENE-CHRISTIAN) - 2021 Abilene Christian ABILENE-CHRISTIAN
Air Force (AIR-FORCE) - 2021 Air Force AIR-FORCE
Akron (AKRON) - 2021 Akron AKRON
Alabama A&M (ALABAMA-AM) - 2021 Alabama A&M ALABAMA-AM
Alabama-Birmingham (ALABAMA-BIRMINGHAM) - 2021 Alabama-Birmingham ALABAMA-BIRMINGHAM
Alabama State (ALABAMA-STATE) - 2021 Alabama State ALABAMA-STATE
Alabama (ALABAMA) - 2021 Alabama ALABAMA
Albany (NY) (ALBANY-NY) - 2021 Albany (NY) ALBANY-NY
Alcorn State (ALCORN-STATE) - 2021 Alcorn State ALCORN-STATE
American (AMERICAN) - 2021 American AMERICAN
Appalachian State (APPALACHIAN-STATE) - 2021 Appalachian State APPALACHIAN-STATE
Arizona State (ARIZONA-STATE) - 2021 Arizona State ARIZONA-STATE
Arizona (ARIZONA) - 2021 Arizona ARIZONA
Little Rock (ARKANSAS-LITTLE-ROCK) - 2021 Little Rock ARKANSAS-LITTLE-ROCK
Arkansas-Pine Bluff (ARKANSAS-PINE-BLUFF) - 2021 Arkansas-Pine Bluff ARKANSAS-PINE-BLUFF
Arkansas State (ARKANSAS-STATE) - 2021 Arkansas State ARKANSAS-STATE
Arkansas (AR

In [5]:
ma = {}
for team in Teams(2021):
    try:
        dataset = pd.read_csv(f'./team_ma/{team.abbreviation}_ma.csv')
        dataset = dataset.tail(1)
        dataset = dataset.filter(regex="EMA", axis=1)
        # dataset.drop(columns=['date', 'location', 'away', 'home', 'winner'], inplace=True)
        # print(dataset.values.tolist()[0])
        ma[team.name] = dataset.values.tolist()[0]
    except:
        print(f'Did not find: {team.abbreviation}')

In [34]:
dataset = pd.read_csv(f'./team_ma/CONNECTICUT_ma.csv')
dataset = dataset.tail(1)
dataset = dataset.filter(regex="EMA", axis=1)
dataset.drop(columns=['date', 'location', 'away', 'home', 'winner'], inplace=True)
dataset.style

KeyError: "['date' 'location' 'away' 'home' 'winner'] not found in axis"

In [6]:
ma

{'Abilene Christian': [68.78546170419655,
  20.52530945192172,
  8.331377903428756,
  2.775699349433268,
  91.83629680864608,
  80.87978618442426,
  22.952510332696143,
  0.5627146295522558,
  61.01895555610673,
  0.4887032818523159,
  29.77609612326477,
  0.349583996101608,
  20.464160348009937,
  0.7516310843902458,
  15.041659142894112,
  3.796619583204828,
  200.00000763090983,
  114.81567149013934,
  32.402133971478335,
  10.03123753106822,
  18.38624777405365,
  83.81996811109289,
  13.906547238831209,
  10.22419350403314,
  0.3706343661802985,
  22.946347630777765,
  0.4025295242491961,
  9.226116721669223,
  55.64463389793092,
  32.983747863764364,
  0.5913901865606139,
  15.90625256236792,
  13.173948744759066,
  38.07260792532895,
  0.5399867843678758,
  20.54997940159555,
  0.8415347704622544,
  20.20427806590112],
 'Air Force': [57.001019929392015,
  10.613583654369862,
  10.89080740888036,
  2.882016663266901,
  110.50873744389088,
  63.240213847602206,
  16.76910399766253

In [7]:
import json
with open("ma.json", "w") as write_file:
    json.dump(ma, write_file, sort_keys=True, indent=4)

In [92]:
from joblib import dump, load

lr_clf = load('./models/logisticreg.joblib')
svm_clf = load('./models/svm.joblib')
rfc_clf = load('./models/rfc.joblib')
gbc_clf = load('./models/gbc.joblib')

In [93]:
labels = ma.pop('target')

In [94]:
ma.head()

Unnamed: 0,home_assist_percentage_EMA,home_assists_EMA,home_block_percentage_EMA,home_blocks_EMA,home_defensive_rating_EMA,home_defensive_rebound_percentage_EMA,home_defensive_rebounds_EMA,home_effective_field_goal_percentage_EMA,home_field_goal_attempts_EMA,home_field_goal_percentage_EMA,home_field_goals_EMA,home_free_throw_attempt_rate_EMA,home_free_throw_attempts_EMA,home_free_throw_percentage_EMA,home_free_throws_EMA,home_losses_EMA,home_minutes_played_EMA,home_offensive_rating_EMA,home_offensive_rebound_percentage_EMA,home_offensive_rebounds_EMA,home_personal_fouls_EMA,home_points_EMA,home_steal_percentage_EMA,home_steals_EMA,home_three_point_attempt_rate_EMA,home_three_point_field_goal_attempts_EMA,home_three_point_field_goal_percentage_EMA,home_three_point_field_goals_EMA,home_total_rebound_percentage_EMA,home_total_rebounds_EMA,home_true_shooting_percentage_EMA,home_turnover_percentage_EMA,home_turnovers_EMA,home_two_point_field_goal_attempts_EMA,home_two_point_field_goal_percentage_EMA,home_two_point_field_goals_EMA,home_win_percentage_EMA,home_wins_EMA,away_assist_percentage_EMA,away_assists_EMA,away_block_percentage_EMA,away_blocks_EMA,away_defensive_rating_EMA,away_defensive_rebound_percentage_EMA,away_defensive_rebounds_EMA,away_effective_field_goal_percentage_EMA,away_field_goal_attempts_EMA,away_field_goal_percentage_EMA,away_field_goals_EMA,away_free_throw_attempt_rate_EMA,away_free_throw_attempts_EMA,away_free_throw_percentage_EMA,away_free_throws_EMA,away_losses_EMA,away_minutes_played_EMA,away_offensive_rating_EMA,away_offensive_rebound_percentage_EMA,away_offensive_rebounds_EMA,away_personal_fouls_EMA,away_points_EMA,away_steal_percentage_EMA,away_steals_EMA,away_three_point_attempt_rate_EMA,away_three_point_field_goal_attempts_EMA,away_three_point_field_goal_percentage_EMA,away_three_point_field_goals_EMA,away_total_rebound_percentage_EMA,away_total_rebounds_EMA,away_true_shooting_percentage_EMA,away_turnover_percentage_EMA,away_turnovers_EMA,away_two_point_field_goal_attempts_EMA,away_two_point_field_goal_percentage_EMA,away_two_point_field_goals_EMA,away_win_percentage_EMA,away_wins_EMA
0,62.004938,15.037037,9.353086,4.37037,105.667901,53.988889,21.580247,0.542457,52.012346,0.46884,24.308642,0.336951,17.111111,0.716889,13.012346,2.061728,200.0,106.697531,32.645679,7.728395,15.197531,69.197531,5.653086,3.790123,0.404148,21.271605,0.360296,7.567901,44.724691,29.308642,0.576741,19.002469,14.160494,30.740741,0.546012,16.740741,0.485136,1.333333,59.146914,16.419753,8.658025,3.382716,91.364198,71.012346,24.604938,0.511,59.296296,0.478296,27.82716,0.439531,25.296296,0.635605,16.074074,1.037037,200.0,101.174074,41.734568,12.259259,16.851852,75.506173,12.179012,9.08642,0.242407,14.617284,0.273926,3.777778,55.933333,36.864198,0.534086,19.382716,16.62963,44.679012,0.542716,24.049383,0.761778,2.358025
1,52.204938,9.925926,3.781481,1.320988,121.093827,63.783951,19.82716,0.406852,52.728395,0.360926,18.962963,0.45142,23.530864,0.634691,14.358025,3.395062,200.0,86.409877,31.037037,9.432099,18.592593,57.037037,5.969136,3.975309,0.328235,17.345679,0.288222,4.753086,45.819753,29.259259,0.44716,17.402469,13.123457,35.382716,0.403074,14.209877,0.0,0.0,64.554321,14.08642,9.62716,4.234568,100.969136,70.982716,26.123457,0.466395,52.246914,0.419284,21.814815,0.346432,17.91358,0.650889,10.851852,4.395062,200.0,87.774074,33.150617,8.530864,16.938272,59.333333,4.992593,3.296296,0.267222,13.888889,0.348457,4.851852,53.04321,34.654321,0.488914,22.316049,17.395062,38.358025,0.442778,16.962963,0.0,0.0
2,54.185185,12.925926,5.85679,1.765432,109.480247,56.780247,18.345679,0.480593,56.518519,0.421605,23.790123,0.271901,15.308642,0.672556,10.382716,3.061728,200.0,93.345679,45.044444,12.012346,20.271605,64.555556,14.22963,10.012346,0.325037,18.382716,0.360346,6.592593,48.97284,30.358025,0.506667,23.519753,18.790123,38.135802,0.450247,17.197531,0.066667,0.333333,52.893827,10.82716,5.544444,2.0,101.125926,65.146914,23.469136,0.424222,50.950617,0.385,19.864198,0.393753,20.012346,0.714049,14.382716,4.061728,200.0,86.391358,36.834568,9.481481,15.419753,58.197531,10.017284,6.814815,0.346025,17.740741,0.223457,4.08642,51.791358,32.950617,0.475827,23.044444,17.91358,33.209877,0.472395,15.777778,0.252543,1.333333
3,57.632099,17.37037,9.925514,4.26749,81.50535,72.783539,26.176955,0.564086,59.119342,0.499214,29.539095,0.340782,19.641975,0.666638,12.983539,0.0,200.0,113.055144,38.20823,10.296296,15.687243,79.773663,8.278601,5.91358,0.265947,15.81893,0.478457,7.711934,56.039095,36.473251,0.582523,16.885597,13.823045,43.300412,0.505342,21.82716,1.0,4.263374,75.711111,17.271605,14.239506,4.320988,91.638272,59.535802,21.160494,0.531827,51.283951,0.44342,22.740741,0.434938,21.481481,0.717741,15.604938,0.802469,200.0,110.622222,30.783951,6.851852,18.419753,70.098765,11.935802,7.62963,0.490123,24.901235,0.356148,9.012346,46.661728,28.012346,0.571457,14.166667,9.654321,26.382716,0.528519,13.728395,0.779062,2.592593
4,56.783951,13.716049,6.462963,2.160494,105.4,65.585185,21.555556,0.48079,54.753086,0.443049,24.049383,0.335568,18.283951,0.63784,11.654321,2.259259,200.0,93.859259,34.855556,9.333333,17.493827,64.012346,5.759259,3.888889,0.24684,13.864198,0.299988,4.259259,49.959259,30.888889,0.505519,20.4,16.049383,40.888889,0.47684,19.790123,0.287605,1.135802,51.782716,11.320988,4.22963,1.567901,108.185185,64.091358,17.481481,0.511728,51.580247,0.429432,21.604938,0.27921,13.62963,0.841457,10.938272,2.691358,200.0,98.804938,26.822222,5.407407,17.320988,62.518519,14.297531,9.0,0.458198,23.604938,0.359074,8.37037,43.865432,22.888889,0.545667,17.308642,11.666667,27.975309,0.489543,13.234568,0.171556,0.703704


In [95]:
avg = np.mean( np.array([lr_clf.predict_proba(ma), svm_clf.predict_proba(ma), rfc_clf.predict_proba(ma), gbc_clf.predict_proba(ma)]), axis=0 )


In [96]:
print(avg)

[[0.69136992 0.30863008]
 [0.45840657 0.54159343]
 [0.57792409 0.42207591]
 [0.79473303 0.20526697]
 [0.70411229 0.29588771]
 [0.93995628 0.06004372]
 [0.58034748 0.41965252]
 [0.84655828 0.15344172]
 [0.73731182 0.26268818]
 [0.88170943 0.11829057]
 [0.41531759 0.58468241]
 [0.71231145 0.28768855]
 [0.65130088 0.34869912]
 [0.69611306 0.30388694]
 [0.55837433 0.44162567]
 [0.58815313 0.41184687]
 [0.45857805 0.54142195]
 [0.75549879 0.24450121]
 [0.67411635 0.32588365]
 [0.63023672 0.36976328]
 [0.89215014 0.10784986]
 [0.57401047 0.42598953]
 [0.92037322 0.07962678]
 [0.81214963 0.18785037]
 [0.51719614 0.48280386]
 [0.54461393 0.45538607]
 [0.72300405 0.27699595]
 [0.84906199 0.15093801]
 [0.76367137 0.23632863]
 [0.44980458 0.55019542]
 [0.7008096  0.2991904 ]
 [0.92886493 0.07113507]
 [0.87453012 0.12546988]
 [0.65435623 0.34564377]
 [0.77964581 0.22035419]
 [0.76470209 0.23529791]
 [0.77654114 0.22345886]
 [0.86471005 0.13528995]
 [0.94199875 0.05800125]
 [0.86582868 0.13417132]


In [97]:
avg[:,1].round()

array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1.,
       0., 0., 1., 0., 1., 0., 1., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0.,
       0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 0., 0., 1.,
       0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 1.,
       0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       1., 0., 0., 0., 0.

In [98]:
accuracy_score(avg[:,1].round(), labels)

0.6983429483245367

In [68]:
train_true.values

array([1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0,
       0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0,

In [20]:
lr_clf.predict_proba(ma)

array([[0.60826199, 0.39173801],
       [0.88277012, 0.11722988],
       [0.37067934, 0.62932066],
       ...,
       [0.39907044, 0.60092956],
       [0.60932703, 0.39067297],
       [0.88546   , 0.11454   ]])

In [22]:
svm_clf.predict_proba(ma)

array([[0.61266827, 0.38733173],
       [0.85593622, 0.14406378],
       [0.5       , 0.5       ],
       ...,
       [0.50620377, 0.49379623],
       [0.59013673, 0.40986327],
       [0.79903225, 0.20096775]])

In [23]:
rfc_clf.predict_proba(ma)

array([[0.19294375, 0.80705625],
       [0.95109524, 0.04890476],
       [0.83838095, 0.16161905],
       ...,
       [0.13653175, 0.86346825],
       [0.81397863, 0.18602137],
       [0.32494444, 0.67505556]])

In [24]:
gbc_clf.predict_proba(ma)

array([[2.67891165e-06, 9.99997321e-01],
       [9.99999815e-01, 1.85123324e-07],
       [9.99998667e-01, 1.33275534e-06],
       ...,
       [1.46475906e-06, 9.99998535e-01],
       [9.99999180e-01, 8.20303562e-07],
       [3.21587005e-06, 9.99996784e-01]])

In [55]:
ma.values[:,0]

array([60.74814815, 47.31646091, 58.08641975, ..., 37.75432806,
       50.78054505, 50.18853319])

In [87]:
c = np.array([[0.86962275, 0.13037725]])