In [420]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.model_selection import KFold
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [3]:
df=pd.read_csv("owl-with-odds.csv")

In [4]:
subset = ['t1_odds', 't2_odds']
print(len(df))
df.dropna(subset=subset ,inplace=True)
print(len(df))

df['t1_odds'] = pd.to_numeric(df['t1_odds'], errors='coerce')
df['t2_odds'] = pd.to_numeric(df['t2_odds'], errors='coerce')
df.dropna(subset=subset ,inplace=True)
print(len(df))

651
538
536


In [7]:
#Split the validation set
df_train = df[:-60]

df_test = df[-60:]
print(len(df_test))
print(len(df_train))

60
476


In [8]:
#inclusive features
features = ['corona_virus_isolation', 't1_wins_season',
       't1_losses_season', 't2_wins_season', 't2_losses_season',
       't1_matches_season', 't2_matches_season', 't1_win_percent_season',
       't2_win_percent_season', 't1_wins_alltime', 't1_losses_alltime',
       't2_wins_alltime', 't2_losses_alltime', 't1_matches_alltime',
       't2_matches_alltime', 't1_win_percent_alltime',
       't2_win_percent_alltime', 't1_wins_last_3', 't1_losses_last_3',
       't2_wins_last_3', 't2_losses_last_3', 't1_win_percent_last_3',
       't2_win_percent_last_3', 't1_wins_last_5', 't1_losses_last_5',
       't2_wins_last_5', 't2_losses_last_5', 't1_win_percent_last_5',
       't2_win_percent_last_5', 't1_wins_last_10', 't1_losses_last_10',
       't2_wins_last_10', 't2_losses_last_10', 't1_win_percent_last_10',
       't2_win_percent_last_10', 
        't1_wins_vs_t2', 't1_losses_vs_t2',
       't1_matches_vs_t2', 't1_odds', 't2_odds', 'winner_label']

In [16]:
df_test_filtered = df_test[features].copy()
df_train_filtered = df_train[features].copy()

(476, 41)

In [17]:
df_test_filtered.dropna(inplace=True)
df_train_filtered.dropna(inplace=True)

In [200]:
def get_bet_ev(odds, prob):
    if odds>0:
        return ((odds * prob) - (100 * (1-prob)) )
    else:
        return ((100 / abs(odds))*100*prob - (100 * (1-prob)))

In [199]:
def get_bet_return(odds):
    if odds>0:
        return odds
    else:
        return (100 / abs(odds))*100

In [192]:
#Input DF must have these columns:
#t1_odds (American)
#t2_odds (American)
#t1_prob (0->1)
#t2_prob (0->1)
#winner (0 or 1)



def get_ev_from_df(ev_df, print_stats = False):
    num_matches = 0
    num_bets = 0
    num_wins = 0
    num_losses= 0
    num_under= 0
    num_under_losses = 0
    num_under_wins = 0
    num_even = 0
    num_even_losses = 0
    num_even_wins = 0
    num_fav = 0
    num_fav_wins = 0
    num_fav_losses = 0
    profit = 0
    profit_per_bet = 0
    profit_per_match = 0    

    for index, row in ev_df.iterrows():
        num_matches = num_matches+1
        t1_bet_ev = get_bet_ev(row['t1_odds'], row['t1_prob'])
        #print(f"ODDS:{row['t1_odds']} PROB: {row['t1_prob']} EV: {t1_bet_ev}")
        t2_bet_ev = get_bet_ev(row['t2_odds'], row['t2_prob'])
        #print(f"ODDS:{row['t2_odds']} PROB: {row['t2_prob']} EV: {t2_bet_ev}")
        #print()
        
        t1_bet_return = get_bet_return(row['t1_odds'])
        t2_bet_return = get_bet_return(row['t2_odds'])
        
        
        if (t1_bet_ev > 0 or t2_bet_ev > 0):
            num_bets = num_bets+1

            
        if t1_bet_ev > 0:
            if row['winner'] == 0:
                num_wins += 1
                profit = profit + t1_bet_return
                #print(t1_bet_return)
            elif row['winner'] == 1:
                num_losses += 1
                profit = profit - 100
            if (t1_bet_return > t2_bet_return):
                num_under += 1
                if row['winner'] == 0:
                    num_under_wins += 1
                elif row['winner'] == 1:
                    num_under_losses += 1
            elif (t1_bet_return < t2_bet_return):
                num_fav += 1
                if row['winner'] == 0:
                    num_fav_wins += 1
                elif row['winner'] == 1:
                    num_fav_losses += 1
            else:
                num_even += 1
                if row['winner'] == 0:
                    num_even_wins += 1
                elif row['winner'] == 1:
                    num_even_losses += 1

        if t2_bet_ev > 0:
            if row['winner'] == 1:
                num_wins += 1                    
                profit = profit + t2_bet_return
            elif row['winner'] == 0:
                num_losses += 1
                profit = profit - 100
            if (t2_bet_return > t1_bet_return):
                num_under += 1
                if row['winner'] == 1:
                    num_under_wins += 1
                elif row['winner'] == 0:
                    num_under_losses += 1
            elif (t2_bet_return < t1_bet_return):
                num_fav += 1
                if row['winner'] == 1:
                    num_fav_wins += 1
                elif row['winner'] == 0:
                    num_fav_losses += 1
            else:
                num_even += 1
                if row['winner'] == 1:
                    num_even_wins += 1
                elif row['winner'] == 0:
                    num_even_losses += 1
            
    profit_per_bet = profit / num_bets
    profit_per_match = profit / num_matches
        
        
    if print_stats:
        print(f"""
          Number of matches: {num_matches}
          Number of bets: {num_bets}
          Number of winning bets: {num_wins}
          Number of losing bets: {num_losses}
          Number of underdog bets: {num_under}
          Number of underdog wins: {num_under_wins}
          Number of underdog losses: {num_under_losses}
          Number of Favorite bets: {num_fav}
          Number of favorite wins: {num_fav_wins}
          Number of favorite losses: {num_fav_losses}
          Number of even bets: {num_even}
          Number of even wins: {num_even_wins}
          Number of even losses: {num_even_losses}
          Profit: {profit}
          Profit per bet: {profit_per_bet}
          Profit per match: {profit_per_match}
          
          """)
        
    return (profit_per_bet)
        

In [359]:
#Input the train df and model and we will return a customer 5x cross validation score based off of expected value
#t1_odds and t2_odd MUST be the last 2 columns or this will break.

def custom_cv_eval(df, m):
    #We need to split away the winner...
    y = df['winner_label'].copy()
    #display(y)
    X = df.drop('winner_label', axis=1)
    
    ##We need a numpy array it seems like...
    X = np.array(X)
    y = np.array(y)
    #display(X)
    running_total = 0
    count=1
    kf = KFold(n_splits=5, shuffle=True, random_state=75)
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        #print(test_index)
        model.fit(X_train, y_train)
        probs=model.predict_proba(X_test)
        #We need to prep the dataframe to evaluate....
        #X_odds = X_test[['t1_odds', 't2_odds']]
        #print(X_test)
        #print(X_test[:, -1])
        #print(X_test[:, -2])
        X_odds = list(zip(X_test[:, -2], X_test[:, -1], probs[:, 0], probs[:, 1], y_test))
        ev_prepped_df = pd.DataFrame(X_odds, columns=['t1_odds', 't2_odds', 't1_prob', 't2_prob', 'winner'])
        #display(temp_df)
        #print(f"{count}: {get_ev_from_df(ev_prepped_df, print_stats = False)}")
        count=count+1
        running_total = running_total + get_ev_from_df(ev_prepped_df, print_stats = False)
    return running_total

In [360]:
model = tree.DecisionTreeClassifier(max_depth=10, min_samples_leaf=3, random_state=75)

In [361]:
print(f"total: {custom_cv_eval(df_train_filtered, model)}")

total: -26.38405656100527


In [362]:
###Let's automate finding features!
pos_features = ['corona_virus_isolation', 't1_wins_season',
       't1_losses_season', 't2_wins_season', 't2_losses_season',
       't1_matches_season', 't2_matches_season', 't1_win_percent_season',
       't2_win_percent_season', 't1_wins_alltime', 't1_losses_alltime',
       't2_wins_alltime', 't2_losses_alltime', 't1_matches_alltime',
       't2_matches_alltime', 't1_win_percent_alltime',
       't2_win_percent_alltime', 't1_wins_last_3', 't1_losses_last_3',
       't2_wins_last_3', 't2_losses_last_3', 't1_win_percent_last_3',
       't2_win_percent_last_3', 't1_wins_last_5', 't1_losses_last_5',
       't2_wins_last_5', 't2_losses_last_5', 't1_win_percent_last_5',
       't2_win_percent_last_5', 't1_wins_last_10', 't1_losses_last_10',
       't2_wins_last_10', 't2_losses_last_10', 't1_win_percent_last_10',
       't2_win_percent_last_10', 
        't1_wins_vs_t2', 't1_losses_vs_t2',
       't1_matches_vs_t2']

In [363]:
current_features = ['winner_label', 't1_odds', 't2_odds']

In [364]:
def get_best_features(features, model, df, current_features, scale=False):
    best_feature = ""
    winner_labels = df['winner_label'].copy()
    initial_df = df[current_features]
    #display(initial_df)
    #display(winner_labels)
    
    best_score = custom_cv_eval(df[current_features], model)
    best_feature = ""
    
    print(f"Current best score is: {best_score}")
    for f in features:
        if f not in current_features:
            new_features = [f] + current_features
            df_sel=df[new_features]
            if scale == True:
                sc = StandardScaler()
                df_sel = sc.fit_transform(df_sel)
            new_score = custom_cv_eval(df_sel, model)
            #print(f"Total score for {f} is: {new_score}")
            if new_score > best_score:
                best_score = new_score
                best_feature = f
            #print()
    #Keep running until we don't improve
    if best_feature != "":
        print(f"The best feature was {best_feature}.  It scored {best_score}")
        current_features = [best_feature] + current_features
        
        return(get_best_features(features, model, df, current_features, scale))
    else:
        print("NO IMPROVEMENT")
        print(f"FINAL BEST SCORE: {best_score}")
        return current_features

In [365]:
f=get_best_features(pos_features, model,  df_train_filtered, current_features)

Current best score is: -30.218773001761235
The best feature was t2_matches_alltime.  It scored 30.874776448625802
Current best score is: 30.874776448625802
The best feature was t1_wins_last_3.  It scored 74.52979394547228
Current best score is: 74.52979394547228
NO IMPROVEMENT
FINAL BEST SCORE: 74.52979394547228


In [366]:
print(f)

['t1_wins_last_3', 't2_matches_alltime', 'winner_label', 't1_odds', 't2_odds']


In [367]:


custom_cv_eval(df_train_filtered[f], model)

74.52979394547228

In [368]:
#Try linear regression
model = LogisticRegression(random_state=75, max_iter=1000)

In [369]:
f_lr=get_best_features(pos_features, model,  df_train_filtered, current_features)

Current best score is: -32.01901772935724
The best feature was t1_losses_alltime.  It scored 5.611100018475831
Current best score is: 5.611100018475831
The best feature was t2_win_percent_last_10.  It scored 11.127997276752339
Current best score is: 11.127997276752339
NO IMPROVEMENT
FINAL BEST SCORE: 11.127997276752339


In [370]:
print(f_lr)

['t2_win_percent_last_10', 't1_losses_alltime', 'winner_label', 't1_odds', 't2_odds']


In [379]:
model = RandomForestClassifier(random_state=75)

In [380]:
f_rf=get_best_features(pos_features, model,  df_train_filtered, current_features)

Current best score is: -12.261964022635624
The best feature was t1_wins_vs_t2.  It scored 34.165873463357855
Current best score is: 34.165873463357855
The best feature was t2_losses_last_5.  It scored 60.097907893308125
Current best score is: 60.097907893308125
The best feature was t1_losses_season.  It scored 106.61346749144504
Current best score is: 106.61346749144504
The best feature was t1_win_percent_season.  It scored 109.27608407019953
Current best score is: 109.27608407019953
The best feature was t2_wins_last_5.  It scored 128.6612151144792
Current best score is: 128.6612151144792
NO IMPROVEMENT
FINAL BEST SCORE: 128.6612151144792


In [381]:
print(f_rf)

['t2_wins_last_5', 't1_win_percent_season', 't1_losses_season', 't2_losses_last_5', 't1_wins_vs_t2', 'winner_label', 't1_odds', 't2_odds']


In [374]:
model=MLPClassifier(random_state=75)
f_nn=f_rf=get_best_features(pos_features, model,  df_train_filtered, current_features)

Current best score is: -79.48500001896443
The best feature was t1_matches_season.  It scored 15.772090093861332
Current best score is: 15.772090093861332
NO IMPROVEMENT
FINAL BEST SCORE: 15.772090093861332


In [377]:
print(f_nn)

['t1_matches_season', 'winner_label', 't1_odds', 't2_odds']


In [382]:
#Let's pick some hyperparameters
model = tree.DecisionTreeClassifier(max_depth=10, min_samples_leaf=3, random_state=75)
features = ['t1_wins_last_3', 't2_matches_alltime', 'winner_label', 't1_odds', 't2_odds']

In [388]:
criterion_pos = ['gini', 'entropy']
max_depth_pos = [3, 6, 9, 12, None, 10]
min_samples_leaf_pos = [1,2,3,4,5]


In [396]:
best_score = 0
best_features = []
for c in criterion_pos:
    for md in max_depth_pos:
        for msl in min_samples_leaf_pos:
            model = tree.DecisionTreeClassifier(max_depth=md, min_samples_leaf=msl, criterion=c, random_state=75)
            possible_score=custom_cv_eval(df_train_filtered[features], model)
            if possible_score > best_score:
                best_score=possible_score
                best_features=[c, md, msl]
                best_model = model
                print(f"{best_score} {best_features}")

            
            

53.821078751411875 ['gini', 6, 2]
66.96480219928249 ['gini', 6, 3]
74.68854411482693 ['entropy', 12, 2]
74.96142463876302 ['entropy', 12, 4]


In [394]:
model = tree.DecisionTreeClassifier(max_depth=10, min_samples_leaf=3, random_state=75)


In [395]:
print(custom_cv_eval(df_train_filtered[features], model))

74.52979394547228


In [397]:
print(best_model)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                       max_depth=12, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=4, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=75, splitter='best')


In [398]:
model = LogisticRegression(random_state=75, max_iter=1000)
features = ['t2_win_percent_last_10', 't1_losses_alltime', 'winner_label', 't1_odds', 't2_odds']


In [406]:
tol_pos = [1e-4, 1e-3, 1e-2]
fit_intercept_pos = [True, False]
solver_pos = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
max_iter = [10000]


In [407]:
best_score = 0
best_features = 0
for tp in tol_pos:
    for fi in fit_intercept_pos:
        for s in solver_pos:
            for mi in max_iter:
                model = LogisticRegression(random_state=75, tol=tp, fit_intercept=fi, solver=s, max_iter=mi)
                possible_score=custom_cv_eval(df_train_filtered[features], model)
                if possible_score > best_score:
                    best_score=possible_score
                    best_features=[c, md, msl]
                    best_model = model
                    print(f"{best_score} {best_features}")                

7.557676825909205 ['entropy', None, 5]
11.127997276752339 ['entropy', None, 5]


In [408]:
model = RandomForestClassifier(random_state=75)
features = ['t2_wins_last_5', 't1_win_percent_season', 't1_losses_season', 't2_losses_last_5', 't1_wins_vs_t2',
            'winner_label', 't1_odds', 't2_odds']

In [410]:
n_estimators_pos=[10,50,100,150]
criterion_pos=['gini', 'entropy']
max_depth_pos=[None, 2,4,6,8,10]
min_samples_leaf_pos=[1,2,3,4,5,6]


In [412]:
best_score=0
best_features=0
for ne in n_estimators_pos:
    for c in criterion_pos:
        for md in max_depth_pos:
            for msl in min_samples_leaf_pos:
                model = RandomForestClassifier(random_state=75, n_estimators=ne, criterion=c, max_depth=md,
                                              min_samples_leaf=msl)
                possible_score=custom_cv_eval(df_train_filtered[features], model)
                if possible_score > best_score:
                    best_score=possible_score
                    best_features=[ne, c, md, msl]
                    best_model = model
                    print(f"{best_score} {best_features}")                          

26.488971559339962 [10, 'gini', None, 1]
83.33567705218252 [10, 'gini', None, 2]
88.54367924927315 [10, 'gini', 2, 1]
94.45318697450092 [10, 'gini', 2, 2]
111.02744486691587 [10, 'gini', 2, 3]
120.69470875257205 [10, 'gini', 6, 1]
136.77540311682964 [50, 'gini', 6, 1]


In [413]:
print(best_model)
print(features)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=6, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=75, verbose=0,
                       warm_start=False)
['t2_wins_last_5', 't1_win_percent_season', 't1_losses_season', 't2_losses_last_5', 't1_wins_vs_t2', 'winner_label', 't1_odds', 't2_odds']


In [414]:
model=MLPClassifier(random_state=75) 
features = ['t1_matches_season', 'winner_label', 't1_odds', 't2_odds']


In [418]:
hidden_layer_sizes_pos = [(50,50,50), (50,100,50), (100,)]
activation_pos = ['tanh', 'relu', 'logistic', 'identity']
solver_pos = ['lbfgs', 'sgd', 'adam']
alpha_pos = [.0001, .001, .01]
max_iter = [500]


In [419]:
best_score=0
best_features=0
for hls in hidden_layer_sizes_pos:
    for a in activation_pos:
        for s in solver_pos:
            for al in alpha_pos:
                for mi in max_iter:
                    model = MLPClassifier(random_state=75, hidden_layer_sizes=hls, activation=a, solver=s, alpha=al,
                                          max_iter=mi)
                    possible_score=custom_cv_eval(df_train_filtered[features], model)
                    if possible_score > best_score:
                        best_score=possible_score
                        best_features=[hls, a, s, al, mi]
                        best_model = model
                        print(f"{best_score} {best_features}")                                              

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

5.4824612494733564 [(50, 50, 50), 'tanh', 'sgd', 0.001, 500]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

21.62378994273036 [(50, 50, 50), 'logistic', 'sgd', 0.0001, 500]


  ret = a @ b
  ret = a @ b


ZeroDivisionError: division by zero

In [421]:
features = ['corona_virus_isolation', 't1_wins_season',
       't1_losses_season', 't2_wins_season', 't2_losses_season',
       't1_matches_season', 't2_matches_season', 't1_win_percent_season',
       't2_win_percent_season', 't1_wins_alltime', 't1_losses_alltime',
       't2_wins_alltime', 't2_losses_alltime', 't1_matches_alltime',
       't2_matches_alltime', 't1_win_percent_alltime',
       't2_win_percent_alltime', 't1_wins_last_3', 't1_losses_last_3',
       't2_wins_last_3', 't2_losses_last_3', 't1_win_percent_last_3',
       't2_win_percent_last_3', 't1_wins_last_5', 't1_losses_last_5',
       't2_wins_last_5', 't2_losses_last_5', 't1_win_percent_last_5',
       't2_win_percent_last_5', 't1_wins_last_10', 't1_losses_last_10',
       't2_wins_last_10', 't2_losses_last_10', 't1_win_percent_last_10',
       't2_win_percent_last_10', 
        't1_wins_vs_t2', 't1_losses_vs_t2',
       't1_matches_vs_t2', 'winner_label', 't1_odds', 't2_odds']

In [423]:
model=GradientBoostingClassifier(random_state=75)

In [426]:
learning_rate_pos=[.05, 0.1, .2]
n_estimators_pos=[25, 50, 100, 200]
min_samples_leaf_pos=[1,2,3,4,5,6]
max_depth_pos=[2,3,4,6,8,10,12,14,16]
max_features_pos=['sqrt', 'log2', None]


In [427]:
best_score=0
best_features=0
for lr in learning_rate_pos:
    for ne in n_estimators_pos:
        for ms in min_samples_leaf_pos:
            for md in max_depth_pos:
                for mf in max_features_pos:
                    model=GradientBoostingClassifier(random_state=75, learning_rate=lr, n_estimators=ne,
                                                    min_samples_leaf=ms, max_depth=md, max_features=mf)
                    possible_score=custom_cv_eval(df_train_filtered[features], model)
                    if possible_score > best_score:
                        best_score=possible_score
                        best_features=[lr, ne, ms, md, mf]
                        best_model=model
                        print(f"{best_score} {best_features}")

56.526795644613316 [0.05, 25, 1, 2, 'sqrt']
62.995473791949244 [0.05, 25, 1, 3, 'log2']
65.8596120835575 [0.05, 25, 1, 4, 'log2']
85.70035624211746 [0.05, 25, 1, 8, 'sqrt']
92.64616006551302 [0.05, 25, 1, 10, 'sqrt']
101.07348973671972 [0.05, 25, 1, 12, 'log2']


In [428]:
print(model)

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.2, loss='deviance', max_depth=16,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=6, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=200,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=75, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)


In [429]:
print(features)

['corona_virus_isolation', 't1_wins_season', 't1_losses_season', 't2_wins_season', 't2_losses_season', 't1_matches_season', 't2_matches_season', 't1_win_percent_season', 't2_win_percent_season', 't1_wins_alltime', 't1_losses_alltime', 't2_wins_alltime', 't2_losses_alltime', 't1_matches_alltime', 't2_matches_alltime', 't1_win_percent_alltime', 't2_win_percent_alltime', 't1_wins_last_3', 't1_losses_last_3', 't2_wins_last_3', 't2_losses_last_3', 't1_win_percent_last_3', 't2_win_percent_last_3', 't1_wins_last_5', 't1_losses_last_5', 't2_wins_last_5', 't2_losses_last_5', 't1_win_percent_last_5', 't2_win_percent_last_5', 't1_wins_last_10', 't1_losses_last_10', 't2_wins_last_10', 't2_losses_last_10', 't1_win_percent_last_10', 't2_win_percent_last_10', 't1_wins_vs_t2', 't1_losses_vs_t2', 't1_matches_vs_t2', 'winner_label', 't1_odds', 't2_odds']
