# Regression Forest

Our main model will be a regression forest. First, we'll design a generic regression tree model that can be applied to any player based on the Federer pilot tree. We'll then populate a forest with any number of these trees to obtain a distribution of predictions. Hyperparameter selection will be done through GridSearchCV. I'm chosing this approach instead of a classic Forest Regressor because, due to the volatile nature of our problem, it's hard to say whether any individual tree's prediction is better than another's. Thus, I'd like to visualize the whole range of predictions offered by my trees.

In [1]:
### IMPORTS ###

import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

from sklearn import tree
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score


!jupyter nbextension enable --py widgetsnbextension --sys-prefix
!jupyter serverextension enable voila --sys-prefix

We select the player and generate a table containing that player's matches which we will store in "player.csv", as to avoid creating one table for each player.  
Note : This setup step has to be repeated each time we change players.

In [2]:
def preprocess_atp(ATP):
    """
    Args:
        ATP (pandas dataframe): ATP table
    """
    
    ATP = ATP.drop(['tourney_id', 'draw_size', 'match_num', 'winner_id', 'winner_seed', 'winner_entry', 'loser_id', 'loser_seed', 'loser_entry'], axis=1)
    ATP = ATP.dropna(subset=['minutes', 'surface'])
    ATP = ATP.drop(labels=["w_ace", "w_df", "w_svpt", "w_1stIn", "w_1stWon", "w_2ndWon","w_SvGms", "w_bpSaved", "w_bpFaced",
                        "l_ace", "l_df", "l_svpt", "l_1stIn", "l_1stWon", "l_2ndWon","l_SvGms", "l_bpSaved", "l_bpFaced"], axis=1)
    return ATP

In [3]:
def preprocessing(player_table, scale_x=False, scale_y=False) :
    target='minutes'

    scale_x = False
    scale_y = False
    # 1. X, y unscaled
    X = player_table.drop([target], axis=1)
    y = np.asarray(player_table[target])
    y = y.reshape(-1,1)

    xscaler = StandardScaler()
    yscaler = StandardScaler()

    if (scale_x) :
        print("Scaling X...")
        xscaler.fit(X[['opp_ht', 'opp_age', 'rank_diff']])
        X[['opp_ht', 'opp_age', 'rank_diff']] = xscaler.transform(X[['opp_ht', 'opp_age', 'rank_diff']])

    if (scale_y) :
        print("Scaling y...")
        yscaler.fit(y)
        y = yscaler.transform(y)

    #display(pd.DataFrame(X).head())
    #display(pd.DataFrame(y).head())

    return X, y, xscaler, yscaler

In [4]:
### CUSTOM PREDICTION ###

def custom_predict(dtr, scale_x, scale_y, height, age, rank_diff, consecutive, surface, best_of, hand, level, round) :
    # returns the predicted length of a match given a set of match conditions

    # surface : clay(1), grass(2), hard(3)
    # best of : 3(1), 5(2)
    # hand : left(1), right(2)
    # level : A(1), D(2), G(3), M(4)
    # round : F(1), QF(2), R128(3), R16(4), R32(5), R64(6), R(7), SF(8)

    surface_input = [0, 0, 0]
    surface_input[surface - 1] = 1

    bo_input = [0, 0]
    bo_input[best_of - 1] = 1

    hand_input = [0, 0]
    hand_input[hand - 1] = 1

    level_input = [0, 0, 0]
    level_input[level - 1] = 1

    round_input = [0, 0, 0, 0, 0, 0, 0, 0]
    round_input[round - 1] = 1

    X_custom = pd.DataFrame(columns=X.columns)


    if (not scale_x and not scale_y) :  # 1. X, y unscaled
        input = [height, age, rank_diff] + [consecutive] + surface_input + bo_input + hand_input + level_input + round_input
        X_custom = pd.DataFrame(columns=X.columns)
        print(input)
        X_custom.loc[0] = input
        display(X_custom)

        print("Prediction : ", dtr.predict(X_custom), " minutes")

        return(dtr.predict(X_custom)[0])

    elif (scale_x and scale_y) :    # 2. X, y scaled
        input = xscaler.transform([[height, age, rank_diff]])
        print(input)
        input = np.append(input[0], [consecutive] + surface_input + bo_input + hand_input + level_input + round_input)

        X_custom.loc[0] = input
        # print("Scaled input : ")
        # display(X_custom)

        print("Scaled Prediction : ", dtr.predict(X_custom), " minutes")
        print("Prediction : ", yscaler.inverse_transform(dtr.predict(X_custom)), " minutes")

        return(yscaler.inverse_transform(dtr.predict(X_custom))[0])

    elif (scale_x and not scale_y) :    # 3. X scaled, y unscaled
        input = xscaler.transform([[height, age, rank_diff]]).tolist()
        input = np.append(input[0], [consecutive] + surface_input + bo_input + hand_input + level_input + round_input)

        X_custom.loc[0] = input
        # print("Scaled input : ")
        # display(X_custom)

        print("Prediction : ", dtr.predict(X_custom)[0], " minutes")

        return(dtr.predict(X_custom)[0])


# e.g. custom_predict(dtr, scale_x, scale_y, 180, 22, 0, 0, 4, 1, 2, 4, 5)

In [5]:
df = pd.DataFrame(
[[4, 7, 10],
[5, 8, 11],
[6, 9, 12]],
index=[1, 2, 3],
columns=['a', 'b', 'c'])


In [6]:
df = pd.DataFrame(df, columns=['a','b','d'])

In [7]:
df.dropna(axis=1, how='any')


Unnamed: 0,a,b
1,4,7
2,5,8
3,6,9


In [None]:
### REGRESSION FOREST ###

test_scores = []
predictions = []
average_prediction = -1

iter = 10

# UI

iter_label = widgets.Label('Nb of trees')
iter_slider = widgets.IntSlider(value=iter, min=2, max=100)
iter_text = widgets.Text()
widgets.jslink((iter_slider, 'value'), (iter_text, 'value'))
iter_input = widgets.HBox([iter_label, iter_slider, iter_text])

loading_bar = widgets.IntProgress(
    value=0,
    min=0,
    max=iter,
    bar_style='',
    orientation='horizontal'
)
loading_label = widgets.Label(value='Planting Trees...')
loading = widgets.HBox([loading_label, loading_bar])

# The Loop

def random_forest_loop() :
    
    global average_prediction

    for i in range(iter) :

        Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.3)
        dtr_cv.fit(Xtrain, ytrain)

        best_params = dtr_cv.best_params_
        print(f"Best parameters: {best_params})")

        dtr = DecisionTreeRegressor(**best_params)
        dtr.fit(Xtrain, ytrain)

        # Testing :

        ypred = dtr.predict(Xtest)

        print(f"Training score {i+1} : ", dtr.score(Xtrain, ytrain))
        print(f"Testing score {i+1} : ", dtr.score(Xtest, ytest))
        test_scores.append(dtr.score(Xtest, ytest))

        predictions.append(custom_predict(dtr, scale_x, scale_y, ht, a, rk, c, s, bo, h, l, r))     # ???

        # visualize_tree(dtr)

        loading_bar.value += 1
        if (i==iter-1) :
            loading_bar.bar_style='success'


        print("------------------------------------------------------------------------------------------------")

    print("------------------------------------------------------------------------------------------------")
    print("Test scores : ", test_scores)
    print("Test scores average : ", sum(test_scores)/len(test_scores))

    average_prediction = sum(predictions) / len(predictions)

    print("------------------------------------------------------------------------------------------------")
    print(f"Predictions : {predictions}")
    print(f"Average predicted length : {average_prediction}")


launch_btn = widgets.Button(description='Plant Forest')

def launch_btn_onclick(change) :
    global iter
    iter = iter_slider.value
    loading_bar.max = iter
    display(loading)
    random_forest_loop()

launch_btn.on_click(launch_btn_onclick)

display(iter_input)
display(launch_btn)

# launch_btn.click()

HBox(children=(Label(value='Nb of trees'), IntSlider(value=10, min=2), Text(value='')))

Button(description='Plant Forest', style=ButtonStyle())

TraitError: The 'value' trait of a Text instance expected a unicode string, not the int 10.

In [9]:
def preprocess_atp_for_player(atp, PLAYER="Felix Auger Aliassime", train_df_columns=[]):

    ### CLEAN PLAYER MATCHES TABLES ###
    
    setup = True

    if (setup) :    # Creating the table (if the player.csv table currently contains information for another player)

        
        # atp = atp[atp.loser_hand != 'U']
        # atp.to_csv("atp_cat.csv", index=False)
        player_won = atp[atp["winner_name"] == PLAYER]
        player_lost = atp[atp["loser_name"] == PLAYER]
        
        if player_won.shape[0] == 0 and player_lost.shape[0] == 0:
            return pd.DataFrame(), np.array([]) 

        # We'll drop all of the player's information except his age and rank
        player_won = player_won.drop(labels=["winner_name", "winner_hand", "winner_ht", "winner_ioc"], axis=1)
        player_won = player_won.rename(columns={"winner_age": "player_age", "winner_rank": "player_rank", "winner_rank_points": "player_rank_points",
                                        "loser_name":"opp_name", "loser_hand":"opp_hand", "loser_ht":"opp_ht", "loser_ioc":"opp_ioc", "loser_age":"opp_age",
                                        "loser_rank": "opp_rank", "loser_rank_points": "opp_rank_points"})
        player_won["index2"] = player_won.index
        player_won["player_won"] = "1"

        player_lost = player_lost.drop(labels=["loser_name", "loser_hand", "loser_ht", "loser_ioc"], axis=1)
        player_lost = player_lost.rename(columns={"loser_age": "player_age", "loser_rank": "player_rank", "loser_rank_points": "player_rank_points",
                                            "winner_name":"opp_name", "winner_hand":"opp_hand", "winner_ht":"opp_ht", "winner_ioc":"opp_ioc", "winner_age":"opp_age",
                                            "winner_rank": "opp_rank", "winner_rank_points": "opp_rank_points"})
        player_lost["index2"] = player_lost.index
        player_lost["player_won"] = "0"

        player = pd.concat([player_won, player_lost])
        # player.drop(list(player.filter(regex = 'Unnamed')), axis = 1, inplace = True)
        player.sort_index(inplace=True)

        # Win streaks
        result = player.player_won.astype(int)
        consecutive = result.groupby((result != result.shift()).cumsum()).cumcount()
        wins = pd.DataFrame({"win" : result, "consecutive" : consecutive})
        m = wins.win == 1
        wins.consecutive = wins.consecutive.where(m, 0)
        player["consecutive"] = wins.consecutive

        # player.to_csv("player.csv", index=False)     # Saving the table for ease of use

        # atp_players = pd.read_csv("atp_players.csv")
        # atp_players['name'] = atp_players['name_first'] + ' ' + atp_players['name_last']
        # atp_players.to_csv("atp_players.csv", index=False)     # Saving the table for ease of use


    else :  # player.csv already contains this player's information

        player = pd.read_csv("player.csv")

        #atp_players = pd.read_csv("atp_players.csv")
        
    # atp_players = pd.read_csv("atp_players.csv") #todo: check if atp_players is actually useless later
    # display(player.tail())
    print(f"{PLAYER} has {len(player)} recorded matches.")

    player1_pf = pd.DataFrame(player[["minutes", "surface", "best_of", "opp_hand", "opp_ht", "opp_age", "tourney_level", "round"]])
    player1_pf.loc[:,"rank_diff"] = player.loc[:,"player_rank"] - player.loc[:,"opp_rank"]
    player1_pf.loc[:,"consecutive"] = player.loc[:,"consecutive"]

    #todo : check which columns have been dumped 
    needed_columns = ['minutes', 'opp_ht', 'opp_age', 'rank_diff', 'consecutive',
        'surface_Clay', 'surface_Grass', 'surface_Hard', 
        'best_of_3', 'best_of_5', 'opp_hand_L', 'opp_hand_R', 
        'tourney_level_A', 'tourney_level_G', 'tourney_level_M', 
        'round_F', 'round_QF', 'round_R128', 'round_R16',
        'round_R32', 'round_R64', 'round_RR', 'round_SF']

    # One-Hot Encoding
    player1 = pd.get_dummies(data=player1_pf, columns=["surface", "best_of", "opp_hand", "tourney_level", "round"])
    player1 = player1.dropna(axis=0, how='any')

    is_train_set = len(train_df_columns) == 0
    if is_train_set:
        if not (set(needed_columns)).issubset(set(player1.columns.tolist())):
            print("Not enough columns data but still training a model")
            #print (player1.columns)
            #raise SystemExit(0)    
        player1 = pd.DataFrame(player1, columns=needed_columns)
        # Dropping empty columns
        player1 = player1.dropna(axis=1, how='any')
    
    else:
        player1 = pd.DataFrame(player1, columns=train_df_columns)
        player1 = player1.fillna(0)
        # todo : fillna: point à surveiller!! 

    #player1.tail(5)
    ### PRE-PROCESSING ###

    scale_x = False
    scale_y = False

    X, y, xscaler, yscaler = preprocessing(player1, scale_x, scale_y)
    # print(y.shape)
    
    return X, y

In [10]:
def perform_regression_for_player_rf_sklearn(Xtrain, ytrain, Xtest, ytest):
    params = {
    # "criterion":("squared_error", "friedman_mse", "absolute_error", "poisson"), 
    "max_depth":np.arange(3, 8, 2), 
    "min_samples_leaf":np.arange(1, 102, 50), 
    # "min_weight_fraction_leaf":[0.5, 0.4, 0.3, 0.2, 0.1, 0.05, 0.075, 0.001], 
    # "max_features":np.arange(1, 25)
    }
    #todo : add n_estimators as an hyperparameter.
    #todo : probably 100 by default

    #dtr = DecisionTreeRegressor(random_state=42)
    #dtr_cv = GridSearchCV(dtr, params, scoring="r2", n_jobs=-1, verbose=0, cv=10)
    #rf_regr = RandomForestClassifier(random_state=42, max_depth = 6)
    rf_regr = RandomForestRegressor(random_state=42)
    rf_regr_cv = GridSearchCV(rf_regr, params, scoring="r2", n_jobs=-1, verbose=0, cv=10)

    # Perform hyperparameter optimisation if necessary
    perform_Grid_Search = False
    if perform_Grid_Search:
        rf_regr_cv.fit(Xtrain, ytrain)
        best_params = rf_regr_cv.best_params_
        
    else: 
        best_params = {'max_depth': 7, 'min_samples_leaf': 51}
    #print(f"Best parameters: {best_params})")


    # Fitting the Regressor
    rf_regr = RandomForestRegressor(**best_params)
    rf_regr.fit(Xtrain, ytrain)

    # Testing and Computing scores:

    ypred = rf_regr.predict(Xtest)
    train_score = rf_regr.score(Xtrain, ytrain)
    #print(f"Training score : ", rf_regr.score(Xtrain, ytrain))
    test_score = rf_regr.score(Xtest, ytest)
    #print(f"Testing score : ", test_score)

    #test_scores.append(rf_regr.score(Xtest, ytest))
    #predictions.append(custom_predict(dtr, scale_x, scale_y, ht, a, rk, c, s, bo, h, l, r))     # ???
        
    return train_score, test_score, ypred


In [11]:
def perform_regression_for_player_rf_Bowen(Xtrain, ytrain, Xtest, ytest):
    ### PARAMETER SELECTION ###

    params = {
        # "criterion":("squared_error", "friedman_mse", "absolute_error", "poisson"), 
        "max_depth":np.arange(3, 6), 
        "min_samples_leaf":np.arange(1, 100), 
        # "min_weight_fraction_leaf":[0.5, 0.4, 0.3, 0.2, 0.1, 0.05, 0.075, 0.001], 
        # "max_features":np.arange(1, 25)
    }

    dtr = DecisionTreeRegressor(random_state=42)
    dtr_cv = GridSearchCV(dtr, params, scoring="r2", n_jobs=-1, verbose=0, cv=10)

    # Perform hyperparameter optimisation if necessary
    perform_Grid_Search = False
    if perform_Grid_Search:
        dtr_cv.fit(Xtrain, ytrain)
        best_params = dtr_cv.best_params_
        
    else: 
        best_params = {'max_depth': 7, 'min_samples_leaf': 51}
        # Parameters used for Leo's first random forest model
    print(f"Best parameters: {best_params})")

    ### REGRESSION FOREST ###

    test_scores = []
    predictions = []
    average_prediction = -1

    iter = 10  #todo : original Bowen's model
    iter = 100 #todo : to compare with Leo's first random forest.

    # UI

    # iter_label = widgets.Label('Nb of trees')
    # iter_slider = widgets.IntSlider(value=iter, min=2, max=100)
    # iter_text = widgets.Text()
    # widgets.jslink((iter_slider, 'value'), (iter_text, 'value'))
    # iter_input = widgets.HBox([iter_label, iter_slider, iter_text])

    # loading_bar = widgets.IntProgress(
    #     value=0,
    #     min=0,
    #     max=iter,
    #     bar_style='',
    #     orientation='horizontal'
    # )
    # loading_label = widgets.Label(value='Planting Trees...')
    # loading = widgets.HBox([loading_label, loading_bar])

    # The Loop

    for i in range(iter) :

        # Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.3)
        
        dtr = DecisionTreeRegressor(**best_params)
        dtr.fit(Xtrain, ytrain)

        # Testing :

        ypred = dtr.predict(Xtest)
        predictions.append(ypred)

        # print(f"Training score {i+1} : ", dtr.score(Xtrain, ytrain))
        # print(f"Testing score {i+1} : ", dtr.score(Xtest, ytest))
        # test_scores.append(dtr.score(Xtest, ytest))
        # predictions.append(custom_predict(dtr, scale_x, scale_y, ht, a, rk, c, s, bo, h, l, r))     # ???

        # visualize_tree(dtr)

        # loading_bar.value += 1
        # if (i==iter-1) :
        #     loading_bar.bar_style='success'


        # print("------------------------------------------------------------------------------------------------")

    #print("------------------------------------------------------------------------------------------------")
    
    #test_scores = np.array(test_scores)
    #average_score = np.sum(test_scores, axis=0)/len(test_scores)
    
    #print("Test scores : ", test_scores)
    #print("Test scores average : ", average_score)
    # todo : implement real test score using average prediction.

    predictions = np.array(predictions)
    average_prediction = np.sum(predictions, axis=0) / len(predictions)

    #print("------------------------------------------------------------------------------------------------")
    #print(f"Predictions : {predictions}")
    #print(f"Average predicted length : {average_prediction}")
    average_score = 0
    return average_prediction, average_score


In [51]:
list(atp_test.columns)

['tourney_name',
 'surface',
 'tourney_level',
 'tourney_date',
 'winner_name',
 'winner_hand',
 'winner_ht',
 'winner_ioc',
 'winner_age',
 'loser_name',
 'loser_hand',
 'loser_ht',
 'loser_ioc',
 'loser_age',
 'score',
 'best_of',
 'round',
 'minutes',
 'winner_rank',
 'winner_rank_points',
 'loser_rank',
 'loser_rank_points']

In [58]:
a = [name in all_player_names for name in test_names]
len(test_names) - np.count_nonzero(a)

54

In [13]:
atp_test.head(5)

Unnamed: 0,tourney_name,surface,tourney_level,tourney_date,winner_name,winner_hand,winner_ht,winner_ioc,winner_age,loser_name,...,loser_ioc,loser_age,score,best_of,round,minutes,winner_rank,winner_rank_points,loser_rank,loser_rank_points
1938,Canada Masters,Hard,M,20220808,Pablo Carreno Busta,R,188.0,ESP,31.0,Hubert Hurkacz,...,POL,25.4,3-6 6-3 6-3,3,F,105.0,23.0,1555.0,10.0,3015.0
1939,Canada Masters,Hard,M,20220808,Hubert Hurkacz,R,196.0,POL,25.4,Casper Ruud,...,NOR,23.6,5-7 6-3 6-2,3,SF,122.0,10.0,3015.0,7.0,4685.0
1940,Canada Masters,Hard,M,20220808,Pablo Carreno Busta,R,188.0,ESP,31.0,Daniel Evans,...,GBR,32.2,7-5 6-7(7) 6-2,3,SF,178.0,23.0,1555.0,39.0,1170.0
1941,Canada Masters,Hard,M,20220808,Hubert Hurkacz,R,196.0,POL,25.4,Nick Kyrgios,...,AUS,27.2,7-6(4) 6-7(5) 6-1,3,QF,106.0,10.0,3015.0,37.0,1205.0
1942,Canada Masters,Hard,M,20220808,Casper Ruud,R,183.0,NOR,23.6,Felix Auger Aliassime,...,CAN,21.9,6-1 6-2,3,QF,74.0,7.0,4685.0,9.0,3490.0


In [18]:
Xtrain.columns

Index(['opp_ht', 'opp_age', 'rank_diff', 'consecutive', 'surface_Clay',
       'surface_Grass', 'surface_Hard', 'best_of_3', 'best_of_5', 'opp_hand_L',
       'opp_hand_R', 'tourney_level_A', 'tourney_level_G', 'tourney_level_M',
       'round_F', 'round_QF', 'round_R128', 'round_R16', 'round_R32',
       'round_R64', 'round_RR', 'round_SF'],
      dtype='object')

In [14]:
atp_all_players = pd.read_csv("atp_cat.csv")
Xtrain, ytrain = preprocess_atp_for_player(atp_all_players, PLAYER="Roger Federer")
Xtrain.head(5)

Roger Federer has 1424 recorded matches.


Unnamed: 0,opp_ht,opp_age,rank_diff,consecutive,surface_Clay,surface_Grass,surface_Hard,best_of_3,best_of_5,opp_hand_L,...,tourney_level_G,tourney_level_M,round_F,round_QF,round_R128,round_R16,round_R32,round_R64,round_RR,round_SF
24174,180.0,23.731691,614.0,0,1,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
24932,180.0,28.618754,833.0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
24941,196.0,28.418891,835.0,1,0,0,1,1,0,0,...,0,0,0,0,0,1,0,0,0,0
24946,183.0,28.457221,858.0,0,0,0,1,1,0,1,...,0,0,0,1,0,0,0,0,0,0
24954,180.0,28.435318,388.0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0


In [92]:
atp_test = pd.read_csv("ATP_matches/atp_matches_2022.csv")
atp_test = atp_test[atp_test['tourney_date'] > 20220801] # keeping rows that are not in the training test
atp_test = preprocess_atp(atp_test)

atp_all_players = pd.read_csv("atp_cat.csv")

tested_player_names = pd.concat([atp_test["winner_name"], atp_test["loser_name"]])
tested_player_names = pd.unique(tested_player_names)

weighted_test_score = 0
weighted_train_score = 0
total_length_test = 0
total_length_train = 0
ypred_list = []
ytest_list = []

for player_name in tested_player_names:
    print(player_name)
    Xtrain, ytrain = preprocess_atp_for_player(atp_all_players, PLAYER=player_name) # atp_player
        
    if Xtrain.shape[0]!=0:
        columns_used_by_model = ['minutes'] + list(Xtrain.columns)
        Xtest, ytest = preprocess_atp_for_player(atp_test, PLAYER=player_name, train_df_columns=columns_used_by_model) # atp_player_test
        # If we only have non-empty dataframes
        train_score_player, test_score_player, ypred = perform_regression_for_player_rf_sklearn(Xtrain, ytrain, Xtest, ytest)
        
        ypred_list.append(ypred)
        ytest_list.append(ytest) #todo : add ytest when Xtrain.shape[0]==0
        
        if not math.isnan(test_score_player): 
            weighted_test_score += test_score_player * len(ytest)
        if not math.isnan(train_score_player):
            weighted_train_score += train_score_player * len(ytrain)
        total_length_test += len(ytest)
        total_length_train += len(ytrain)
        
    else:
        shape1 = atp_test[atp_test["winner_name"] == player_name].shape[0]
        shape2 = atp_test[atp_test["loser_name"] == player_name].shape[0]
        total_length_test += shape1 + shape2
        print(f"no {player_name} in train data")
        # weighted_test_score += 0
        # weighted_train_score += 0
    
    

print(f"Training score : ", weighted_train_score/total_length_train)
print(f"Testing score : ", weighted_test_score/total_length_test)




Pablo Carreno Busta
Pablo Carreno Busta has 389 recorded matches.
Pablo Carreno Busta has 11 recorded matches.
minutes             0
opp_ht              0
opp_age             0
rank_diff           0
consecutive         0
surface_Clay       11
surface_Grass      11
surface_Hard        0
best_of_3           0
best_of_5           0
opp_hand_L          0
opp_hand_R          0
tourney_level_A    11
tourney_level_G     0
tourney_level_M     0
round_F             0
round_QF            0
round_R128          0
round_R16           0
round_R32           0
round_R64           0
round_RR           11
round_SF            0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Hubert Hurkacz
Hubert Hurkacz has 154 recorded matches.
Hubert Hurkacz has 8 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       8
surface_Grass      8
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    8
tourney_level_G    0
tourney_level_M    0
round_F            0
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           8
round_SF           0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Casper Ruud
Casper Ruud has 166 recorded matches.
Casper Ruud has 12 recorded matches.
minutes             0
opp_ht              0
opp_age             0
rank_diff           0
consecutive         0
surface_Clay       11
surface_Grass      11
surface_Hard        0
best_of_3           0
best_of_5           0
opp_hand_L          0
opp_hand_R          0
tourney_level_A    11
tourney_level_G     0
tourney_level_M     0
round_F             0
round_QF            0
round_R128          0
round_R16           0
round_R32           0
round_R64           0
round_RR           11
round_SF            0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Daniel Evans
Daniel Evans has 187 recorded matches.
Daniel Evans has 9 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       9
surface_Grass      9
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    9
tourney_level_G    0
tourney_level_M    0
round_F            9
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           9
round_SF           0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Nick Kyrgios
Nick Kyrgios has 239 recorded matches.
Nick Kyrgios has 11 recorded matches.
minutes             0
opp_ht              0
opp_age             0
rank_diff           0
consecutive         0
surface_Clay       11
surface_Grass      11
surface_Hard        0
best_of_3           0
best_of_5           0
opp_hand_L         11
opp_hand_R          0
tourney_level_A    11
tourney_level_G     0
tourney_level_M     0
round_F            11
round_QF            0
round_R128          0
round_R16           0
round_R32           0
round_R64           0
round_RR           11
round_SF           11
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Felix Auger Aliassime
Felix Auger Aliassime has 171 recorded matches.
Felix Auger Aliassime has 8 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       7
surface_Grass      7
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    7
tourney_level_G    0
tourney_level_M    0
round_F            7
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           7
round_SF           7
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Jack Draper
Jack Draper has 5 recorded matches.
Not enough columns data but still training a model
Jack Draper has 10 recorded matches.
minutes             0
opp_ht              0
opp_age             0
rank_diff           0
consecutive         0
surface_Grass      10
surface_Hard        0
best_of_3           0
best_of_5           0
opp_hand_L          0
opp_hand_R          0
tourney_level_A     0
tourney_level_G     0
tourney_level_M     0
round_QF            0
round_R128          0
round_R16           0
round_R32           0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Tommy Paul
Tommy Paul has 95 recorded matches.
Not enough columns data but still training a model
Tommy Paul has 9 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       9
surface_Grass      9
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    9
tourney_level_G    0
tourney_level_M    0
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           9
round_SF           9
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Alex De Minaur
Alex De Minaur has 183 recorded matches.
Alex De Minaur has 8 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       8
surface_Grass      8
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    8
tourney_level_G    0
tourney_level_M    0
round_F            8
round_QF           8
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           8
round_SF           8
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Albert Ramos
Albert Ramos has 491 recorded matches.
Albert Ramos has 8 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       8
surface_Grass      8
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            8
round_QF           8
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           8
round_SF           8
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Roberto Bautista Agut
Roberto Bautista Agut has 501 recorded matches.
Roberto Bautista Agut has 7 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       7
surface_Grass      7
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         7
opp_hand_R         0
tourney_level_A    7
tourney_level_G    0
tourney_level_M    0
round_F            7
round_QF           7
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           7
round_SF           7
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Cameron Norrie
Cameron Norrie has 181 recorded matches.
Cameron Norrie has 12 recorded matches.
minutes             0
opp_ht              0
opp_age             0
rank_diff           0
consecutive         0
surface_Clay       11
surface_Grass      11
surface_Hard        0
best_of_3           0
best_of_5           0
opp_hand_L          0
opp_hand_R          0
tourney_level_A    11
tourney_level_G     0
tourney_level_M     0
round_F            11
round_QF            0
round_R128          0
round_R16           0
round_R32           0
round_R64           0
round_RR           11
round_SF            0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Jannik Sinner
Jannik Sinner has 106 recorded matches.
Jannik Sinner has 10 recorded matches.
minutes             0
opp_ht              0
opp_age             0
rank_diff           0
consecutive         0
surface_Clay       10
surface_Grass      10
surface_Hard        0
best_of_3           0
best_of_5           0
opp_hand_L          0
opp_hand_R          0
tourney_level_A    10
tourney_level_G     0
tourney_level_M     0
round_F            10
round_QF            0
round_R128          0
round_R16           0
round_R32           0
round_R64           0
round_RR           10
round_SF           10
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Gael Monfils
Gael Monfils has 771 recorded matches.
Gael Monfils has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          3
opp_hand_L         0
opp_hand_R         0
tourney_level_A    3
tourney_level_G    3
tourney_level_M    0
round_F            3
round_QF           3
round_R128         3
round_R16          0
round_R32          0
round_R64          0
round_RR           3
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Taylor Fritz
Taylor Fritz has 240 recorded matches.
Taylor Fritz has 8 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       7
surface_Grass      7
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         7
opp_hand_R         0
tourney_level_A    7
tourney_level_G    0
tourney_level_M    0
round_F            7
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           7
round_SF           7
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Marin Cilic
Marin Cilic has 775 recorded matches.
Marin Cilic has 10 recorded matches.
minutes             0
opp_ht              0
opp_age             0
rank_diff           0
consecutive         0
surface_Clay       10
surface_Grass      10
surface_Hard        0
best_of_3           0
best_of_5           0
opp_hand_L          0
opp_hand_R          0
tourney_level_A    10
tourney_level_G     0
tourney_level_M     0
round_F            10
round_QF           10
round_R128          0
round_R16           0
round_R32           0
round_R64           0
round_RR           10
round_SF           10
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Grigor Dimitrov
Grigor Dimitrov has 539 recorded matches.
Grigor Dimitrov has 6 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       5
surface_Grass      5
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            5
round_QF           5
round_R128         0
round_R16          5
round_R32          0
round_R64          0
round_RR           5
round_SF           5
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Diego Schwartzman
Diego Schwartzman has 346 recorded matches.
Diego Schwartzman has 8 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       8
surface_Grass      8
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    8
tourney_level_G    0
tourney_level_M    0
round_F            8
round_QF           8
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           8
round_SF           8
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Emil Ruusuvuori
Emil Ruusuvuori has 67 recorded matches.
Not enough columns data but still training a model
Emil Ruusuvuori has 6 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       6
surface_Grass      6
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_QF           6
round_R128         0
round_R16          6
round_R32          0
round_R64          0
round_RR           6
round_SF           6
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Alex Molcan
Alex Molcan has 15 recorded matches.
Not enough columns data but still training a model
Alex Molcan has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    4
tourney_level_G    0
round_F            4
round_QF           4
round_R128         0
round_R16          4
round_R32          0
round_R64          0
round_SF           4
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Jenson Brooksby
Jenson Brooksby has 21 recorded matches.
Not enough columns data but still training a model
Jenson Brooksby has 6 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       6
surface_Grass      6
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_R         0
tourney_level_A    6
tourney_level_G    0
tourney_level_M    0
round_F            6
round_QF           6
round_R128         0
round_R16          6
round_R32          0
round_R64          0
round_SF           6
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Botic Van De Zandschulp
Botic Van De Zandschulp has 4 recorded matches.
Not enough columns data but still training a model
Botic Van De Zandschulp has 10 recorded matches.


  rf_regr.fit(Xtrain, ytrain)


Yoshihito Nishioka
Yoshihito Nishioka has 163 recorded matches.
Yoshihito Nishioka has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         3
opp_hand_R         0
tourney_level_A    3
tourney_level_G    0
tourney_level_M    0
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          0
round_R64          0
round_RR           3
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Adrian Mannarino
Adrian Mannarino has 441 recorded matches.
Adrian Mannarino has 9 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       8
surface_Grass      8
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            0
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           8
round_SF           0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Holger Rune
Holger Rune has 19 recorded matches.
Not enough columns data but still training a model
Holger Rune has 5 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       5
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    5
tourney_level_G    0
tourney_level_M    0
round_QF           5
round_R128         0
round_R16          5
round_R32          0
round_R64          0
round_RR           5
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Maxime Cressy
Maxime Cressy has 22 recorded matches.
Not enough columns data but still training a model
Maxime Cressy has 7 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Grass      7
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            7
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_SF           7
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Frances Tiafoe
Frances Tiafoe has 199 recorded matches.
Frances Tiafoe has 10 recorded matches.
minutes             0
opp_ht              0
opp_age             0
rank_diff           0
consecutive         0
surface_Clay       10
surface_Grass      10
surface_Hard        0
best_of_3           0
best_of_5           0
opp_hand_L          0
opp_hand_R          0
tourney_level_A    10
tourney_level_G     0
tourney_level_M     0
round_F            10
round_QF            0
round_R128          0
round_R16           0
round_R32           0
round_R64           0
round_RR           10
round_SF            0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Karen Khachanov
Karen Khachanov has 293 recorded matches.
Karen Khachanov has 9 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       9
surface_Grass      9
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    9
tourney_level_G    0
tourney_level_M    0
round_F            9
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           9
round_SF           0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Borna Coric
Borna Coric has 276 recorded matches.
Borna Coric has 9 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       9
surface_Grass      9
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    9
tourney_level_G    0
tourney_level_M    0
round_F            0
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           9
round_SF           0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Stefanos Tsitsipas
Stefanos Tsitsipas has 275 recorded matches.
Stefanos Tsitsipas has 7 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       7
surface_Grass      7
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    7
tourney_level_G    0
tourney_level_M    0
round_F            0
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          7
round_RR           7
round_SF           0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Daniil Medvedev
Daniil Medvedev has 297 recorded matches.
Daniil Medvedev has 9 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       9
surface_Grass      9
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    9
tourney_level_G    0
tourney_level_M    0
round_F            9
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           9
round_SF           0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


John Isner
John Isner has 683 recorded matches.
John Isner has 5 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       5
surface_Grass      5
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    5
tourney_level_G    0
tourney_level_M    0
round_F            5
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           5
round_SF           5
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Carlos Alcaraz
no Carlos Alcaraz in train data
Denis Shapovalov
Denis Shapovalov has 236 recorded matches.
Denis Shapovalov has 7 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       7
surface_Grass      7
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    7
tourney_level_G    0
tourney_level_M    0
round_F            7
round_QF           7
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           7
round_SF           7
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Andrey Rublev
Andrey Rublev has 268 recorded matches.
Andrey Rublev has 8 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       8
surface_Grass      8
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    8
tourney_level_G    0
tourney_level_M    0
round_F            8
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           8
round_SF           8
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Sebastian Korda
Sebastian Korda has 46 recorded matches.
Not enough columns data but still training a model
Sebastian Korda has 5 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       5
surface_Grass      5
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    5
tourney_level_G    0
tourney_level_M    0
round_F            5
round_QF           5
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_SF           5
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Ben Shelton
no Ben Shelton in train data
Fabio Fognini
Fabio Fognini has 669 recorded matches.
Fabio Fognini has 7 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       7
surface_Grass      7
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            7
round_QF           7
round_R128         0
round_R16          7
round_R32          0
round_R64          0
round_RR           7
round_SF           7
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Filip Krajinovic
Filip Krajinovic has 190 recorded matches.
Filip Krajinovic has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         4
opp_hand_R         0
tourney_level_A    4
tourney_level_G    0
tourney_level_M    0
round_F            4
round_QF           4
round_R128         0
round_R16          4
round_R32          0
round_R64          0
round_RR           4
round_SF           4
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Aslan Karatsev
Aslan Karatsev has 61 recorded matches.
Aslan Karatsev has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         4
opp_hand_R         0
tourney_level_A    4
tourney_level_G    0
tourney_level_M    0
round_F            4
round_QF           4
round_R128         0
round_R16          4
round_R32          0
round_R64          0
round_RR           4
round_SF           4
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Andy Murray
Andy Murray has 838 recorded matches.
Andy Murray has 6 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       6
surface_Grass      6
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    6
tourney_level_G    0
tourney_level_M    0
round_F            6
round_QF           6
round_R128         0
round_R16          6
round_R32          0
round_R64          0
round_RR           6
round_SF           6
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Mackenzie Mcdonald
Mackenzie Mcdonald has 3 recorded matches.
Not enough columns data but still training a model
Mackenzie Mcdonald has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Hard       0
best_of_3          0
opp_hand_R         0
tourney_level_A    4
round_R16          4
round_R32          0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)
  rf_regr.fit(Xtrain, ytrain)


Miomir Kecmanovic
Miomir Kecmanovic has 109 recorded matches.
Not enough columns data but still training a model
Miomir Kecmanovic has 5 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         4
opp_hand_R         0
tourney_level_A    4
tourney_level_G    0
tourney_level_M    0
round_F            4
round_QF           4
round_R128         0
round_R16          4
round_R32          0
round_R64          0
round_SF           4
dtype: int64
Marcos Giron
Marcos Giron has 68 recorded matches.
Not enough columns data but still training a model
Marcos Giron has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         4


  rf_regr.fit(Xtrain, ytrain)


Laslo Djere
Laslo Djere has 156 recorded matches.
Laslo Djere has 7 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       7
surface_Grass      7
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    7
round_F            0
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           7
round_SF           0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Marc Andrea Huesler
Marc Andrea Huesler has 14 recorded matches.
Not enough columns data but still training a model
Marc Andrea Huesler has 5 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       5
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
round_QF           0
round_R16          0
round_R32          0
round_RR           5
round_SF           0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Richard Gasquet
Richard Gasquet has 854 recorded matches.
Richard Gasquet has 7 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       7
surface_Grass      7
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    7
round_F            7
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           7
round_SF           7
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Benjamin Bonzi
Benjamin Bonzi has 18 recorded matches.
Not enough columns data but still training a model
Benjamin Bonzi has 7 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       7
surface_Grass      7
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Dominic Thiem
Dominic Thiem has 429 recorded matches.
Dominic Thiem has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    4
round_F            4
round_QF           4
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           4
round_SF           4
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Ilya Ivashka
Ilya Ivashka has 96 recorded matches.
Ilya Ivashka has 6 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       6
surface_Grass      6
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    6
round_F            6
round_QF           6
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           6
round_SF           6
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Jason Kubler
Jason Kubler has 11 recorded matches.
Not enough columns data but still training a model
Jason Kubler has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         4
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Steve Johnson
Steve Johnson has 330 recorded matches.
Steve Johnson has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         4
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    4
round_F            4
round_QF           4
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           4
round_SF           4
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Lorenzo Sonego
Lorenzo Sonego has 130 recorded matches.
Not enough columns data but still training a model
Lorenzo Sonego has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            3
round_QF           3
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Thiago Monteiro
Thiago Monteiro has 136 recorded matches.
Not enough columns data but still training a model
Thiago Monteiro has 5 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       5
surface_Grass      5
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    5
round_QF           5
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           5
round_SF           5
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Jaume Munar
Jaume Munar has 113 recorded matches.
Jaume Munar has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         4
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            4
round_QF           4
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           4
round_SF           4
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Peter Gojowczyk
Peter Gojowczyk has 153 recorded matches.
Not enough columns data but still training a model
Peter Gojowczyk has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         3
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    3
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          0
round_R64          0
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Mikael Ymer
Mikael Ymer has 75 recorded matches.
Mikael Ymer has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    3
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          0
round_R64          0
round_RR           3
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Soon Woo Kwon
Soon Woo Kwon has 64 recorded matches.
Soon Woo Kwon has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    4
round_F            4
round_QF           4
round_R128         0
round_R16          4
round_R32          0
round_R64          0
round_RR           4
round_SF           4
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


John Millman
John Millman has 221 recorded matches.
John Millman has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    3
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          0
round_R64          0
round_RR           3
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Alejandro Tabilo
Alejandro Tabilo has 15 recorded matches.
Not enough columns data but still training a model
Alejandro Tabilo has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    3
round_R128         0
round_R16          3
round_R32          0
round_R64          0
round_RR           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


James Duckworth
James Duckworth has 106 recorded matches.
James Duckworth has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         4
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    4
round_F            4
round_QF           4
round_R128         0
round_R16          4
round_R32          0
round_R64          0
round_RR           4
round_SF           4
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Kyle Edmund
Kyle Edmund has 226 recorded matches.
Kyle Edmund has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         3
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    3
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          0
round_R64          0
round_RR           3
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Chun Hsin Tseng
Chun Hsin Tseng has 5 recorded matches.
Not enough columns data but still training a model
Chun Hsin Tseng has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Hard       0
best_of_3          0
opp_hand_R         0
tourney_level_A    0
tourney_level_M    3
round_R128         0
round_R32          0
round_RR           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Tallon Griekspoor
Tallon Griekspoor has 24 recorded matches.
Not enough columns data but still training a model
Tallon Griekspoor has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         3
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
round_QF           3
round_R128         0
round_R16          3
round_R32          0
round_R64          0
round_RR           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Arthur Rinderknech
Arthur Rinderknech has 36 recorded matches.
Not enough columns data but still training a model
Arthur Rinderknech has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    3
tourney_level_G    0
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          3
round_R64          0
round_RR           3
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Nuno Borges
Nuno Borges has 2 recorded matches.
Not enough columns data but still training a model
Nuno Borges has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
best_of_3          1
opp_hand_R         0
tourney_level_A    1
round_R16          1
round_R32          1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)
  rf_regr.fit(Xtrain, ytrain)


Yibing Wu
Yibing Wu has 10 recorded matches.
Not enough columns data but still training a model
Yibing Wu has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Hard       0
best_of_3          3
opp_hand_L         3
opp_hand_R         0
tourney_level_A    3
tourney_level_M    3
round_R32          0
round_R64          0
round_RR           3
dtype: int64
J J Wolf
no J J Wolf in train data
Alexander Bublik
Alexander Bublik has 131 recorded matches.
Alexander Bublik has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    3
tourney_level_G    0
tourney_level_M    0
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          3
round_R

  rf_regr.fit(Xtrain, ytrain)


Cristian Garin
Cristian Garin has 141 recorded matches.
Cristian Garin has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       2
surface_Grass      2
surface_Hard       0
best_of_3          2
best_of_5          0
opp_hand_L         2
opp_hand_R         0
tourney_level_A    2
tourney_level_G    0
tourney_level_M    2
round_F            2
round_QF           2
round_R128         0
round_R16          2
round_R32          2
round_R64          0
round_RR           2
round_SF           2
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Daniel Elahi Galan
Daniel Elahi Galan has 40 recorded matches.
Not enough columns data but still training a model
Daniel Elahi Galan has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          3
best_of_5          0
opp_hand_L         3
opp_hand_R         0
tourney_level_A    3
tourney_level_G    0
tourney_level_M    3
round_QF           3
round_R128         0
round_R16          3
round_R32          0
round_R64          0
round_RR           3
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Jordan Thompson
Jordan Thompson has 172 recorded matches.
Jordan Thompson has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       2
surface_Grass      2
surface_Hard       0
best_of_3          2
best_of_5          0
opp_hand_L         2
opp_hand_R         0
tourney_level_A    2
tourney_level_G    0
tourney_level_M    2
round_F            2
round_QF           2
round_R128         0
round_R16          2
round_R32          2
round_R64          0
round_RR           2
round_SF           2
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Alejandro Davidovich Fokina
Alejandro Davidovich Fokina has 75 recorded matches.
Not enough columns data but still training a model
Alejandro Davidovich Fokina has 6 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       6
surface_Grass      6
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    6
tourney_level_G    0
tourney_level_M    0
round_QF           6
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_SF           6
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Marton Fucsovics
Marton Fucsovics has 190 recorded matches.
Marton Fucsovics has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         3
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    3
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          3
round_R64          0
round_RR           3
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Emilio Nava
Emilio Nava has 3 recorded matches.
Not enough columns data but still training a model
Emilio Nava has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    3
round_R128         0
round_R32          3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Hugo Grenier
Hugo Grenier has 2 recorded matches.
Not enough columns data but still training a model
Hugo Grenier has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Hard       0
best_of_3          2
opp_hand_L         2
opp_hand_R         0
tourney_level_A    2
round_R16          2
round_R32          2
dtype: int64


  rf_regr.fit(Xtrain, ytrain)
  rf_regr.fit(Xtrain, ytrain)


Matteo Berrettini
Matteo Berrettini has 174 recorded matches.
Matteo Berrettini has 7 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       7
surface_Grass      7
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         7
opp_hand_R         0
tourney_level_A    7
tourney_level_G    0
tourney_level_M    0
round_F            7
round_QF           0
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           7
round_SF           7
dtype: int64
Brandon Holt
no Brandon Holt in train data
Pedro Cachin
Pedro Cachin has 7 recorded matches.
Not enough columns data but still training a model
Pedro Cachin has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       2
best_of_3          2
opp_hand_L         0
opp_hand_R         0
tourney_level_A    2
round_QF 

  rf_regr.fit(Xtrain, ytrain)


Corentin Moutet
Corentin Moutet has 81 recorded matches.
Not enough columns data but still training a model
Corentin Moutet has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          4
best_of_5          0
opp_hand_L         4
opp_hand_R         0
tourney_level_A    4
tourney_level_G    0
tourney_level_M    4
round_F            4
round_QF           4
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_SF           4
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Tim Van Rijthoven
Tim Van Rijthoven has 1 recorded matches.
Not enough columns data but still training a model
Tim Van Rijthoven has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Hard       0
best_of_3          2
opp_hand_L         2
tourney_level_A    2
round_R64          0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)
  rf_regr.fit(Xtrain, ytrain)


Gijs Brouwer
no Gijs Brouwer in train data
Lorenzo Musetti
Lorenzo Musetti has 43 recorded matches.
Not enough columns data but still training a model
Lorenzo Musetti has 5 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_QF           4
round_R128         0
round_R16          4
round_R32          0
round_R64          0
round_SF           4
dtype: int64
Brandon Nakashima
Brandon Nakashima has 30 recorded matches.
Not enough columns data but still training a model
Brandon Nakashima has 5 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Grass      5
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L  

  rf_regr.fit(Xtrain, ytrain)


Christopher Eubanks
Christopher Eubanks has 21 recorded matches.
Not enough columns data but still training a model
Christopher Eubanks has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Grass      2
surface_Hard       0
best_of_3          2
best_of_5          0
opp_hand_R         0
tourney_level_A    2
tourney_level_G    0
tourney_level_M    2
round_QF           2
round_R128         0
round_R16          2
round_R32          2
round_R64          0
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Federico Coria
Federico Coria has 47 recorded matches.
Not enough columns data but still training a model
Federico Coria has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         3
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    3
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          3
round_R64          0
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Joao Sousa
Joao Sousa has 370 recorded matches.
Joao Sousa has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    3
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          0
round_R64          0
round_RR           3
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Roberto Carballes Baena
Roberto Carballes Baena has 143 recorded matches.
Not enough columns data but still training a model
Roberto Carballes Baena has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       2
surface_Grass      2
surface_Hard       0
best_of_3          2
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    2
tourney_level_G    0
tourney_level_M    2
round_F            2
round_QF           2
round_R128         0
round_R16          2
round_R32          2
round_R64          0
round_SF           2
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Alexei Popyrin
Alexei Popyrin has 84 recorded matches.
Alexei Popyrin has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       2
surface_Grass      2
surface_Hard       0
best_of_3          2
best_of_5          0
opp_hand_L         2
opp_hand_R         0
tourney_level_A    2
tourney_level_G    0
tourney_level_M    2
round_F            2
round_QF           2
round_R128         0
round_R16          2
round_R32          2
round_R64          0
round_RR           2
round_SF           2
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Rafael Nadal
Rafael Nadal has 1176 recorded matches.
Rafael Nadal has 5 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         4
opp_hand_R         0
tourney_level_A    4
tourney_level_G    0
tourney_level_M    0
round_F            4
round_QF           4
round_R128         0
round_R16          0
round_R32          0
round_R64          0
round_RR           4
round_SF           4
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Sebastian Baez
Sebastian Baez has 5 recorded matches.
Not enough columns data but still training a model
Sebastian Baez has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Hard       0
best_of_3          0
opp_hand_L         3
opp_hand_R         0
tourney_level_A    3
round_R16          3
round_R32          3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Alexis Galarneau
no Alexis Galarneau in train data
David Goffin
David Goffin has 441 recorded matches.
David Goffin has 4 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       4
surface_Grass      4
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            4
round_QF           4
round_R128         0
round_R16          4
round_R32          4
round_R64          0
round_RR           4
round_SF           4
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Stan Wawrinka
Stan Wawrinka has 771 recorded matches.
Stan Wawrinka has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    3
tourney_level_G    0
tourney_level_M    0
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          3
round_R64          0
round_RR           3
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Benoit Paire
Benoit Paire has 474 recorded matches.
Benoit Paire has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       2
surface_Grass      2
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         2
tourney_level_A    2
tourney_level_G    0
tourney_level_M    0
round_F            2
round_QF           2
round_R128         0
round_R16          2
round_R32          2
round_R64          0
round_RR           2
round_SF           2
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Pedro Martinez
Pedro Martinez has 53 recorded matches.
Not enough columns data but still training a model
Pedro Martinez has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         3
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          0
round_R64          0
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Hugo Gaston
Hugo Gaston has 19 recorded matches.
Not enough columns data but still training a model
Hugo Gaston has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          3
round_R64          0
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Francisco Cerundolo
Francisco Cerundolo has 15 recorded matches.
Not enough columns data but still training a model
Francisco Cerundolo has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         3
opp_hand_R         0
tourney_level_A    3
tourney_level_G    0
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          3
round_SF           3
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Vasek Pospisil
Vasek Pospisil has 251 recorded matches.
Vasek Pospisil has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          0
best_of_5          1
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    1
tourney_level_M    0
round_F            1
round_QF           1
round_R128         1
round_R16          1
round_R32          1
round_R64          0
round_RR           1
round_SF           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)
  rf_regr.fit(Xtrain, ytrain)


Nikoloz Basilashvili
Nikoloz Basilashvili has 252 recorded matches.
Nikoloz Basilashvili has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          0
round_R64          0
round_RR           3
round_SF           3
dtype: int64
Henri Laaksonen
Henri Laaksonen has 95 recorded matches.
Not enough columns data but still training a model
Henri Laaksonen has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          0
best_of_5          1
opp_hand_L         1
opp_hand_R  

  rf_regr.fit(Xtrain, ytrain)
  rf_regr.fit(Xtrain, ytrain)


Thanasi Kokkinakis
Thanasi Kokkinakis has 70 recorded matches.
Not enough columns data but still training a model
Thanasi Kokkinakis has 3 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       3
surface_Grass      3
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         3
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    0
round_F            3
round_QF           3
round_R128         0
round_R16          3
round_R32          3
round_R64          0
round_SF           3
dtype: int64
Dusan Lajovic
Dusan Lajovic has 321 recorded matches.
Dusan Lajovic has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       2
surface_Grass      2
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         2
opp_hand_R         0
tourney_level_A    0
tou

  rf_regr.fit(Xtrain, ytrain)


Jiri Lehecka
Jiri Lehecka has 1 recorded matches.
Not enough columns data but still training a model
no Jiri Lehecka in train data
Daniel Altmaier
Daniel Altmaier has 29 recorded matches.
Not enough columns data but still training a model
Daniel Altmaier has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       2
surface_Grass      2
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         2
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    2
round_QF           2
round_R128         0
round_R16          2
round_R32          2
round_R64          0
round_SF           2
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Kamil Majchrzak
Kamil Majchrzak has 36 recorded matches.
Not enough columns data but still training a model
Kamil Majchrzak has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       2
surface_Grass      2
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    2
round_QF           2
round_R128         0
round_R16          2
round_R32          2
round_R64          0
round_RR           2
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Christopher Oconnell
Christopher Oconnell has 2 recorded matches.
Not enough columns data but still training a model
Christopher Oconnell has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Hard       0
best_of_3          0
opp_hand_R         0
tourney_level_A    0
round_R32          2
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Shintaro Mochizuki
Shintaro Mochizuki has 2 recorded matches.
Not enough columns data but still training a model
no Shintaro Mochizuki in train data
Denis Kudla
Denis Kudla has 157 recorded matches.
Not enough columns data but still training a model
Denis Kudla has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       2
surface_Grass      2
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         0
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    2
round_QF           2
round_R128         0
round_R16          2
round_R32          2
round_R64          0
round_SF           2
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Michail Pervolarakis
Michail Pervolarakis has 8 recorded matches.
Not enough columns data but still training a model
Michail Pervolarakis has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Hard       0
best_of_3          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    0
round_RR           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Taro Daniel
Taro Daniel has 138 recorded matches.
Taro Daniel has 2 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       2
surface_Grass      2
surface_Hard       0
best_of_3          0
best_of_5          0
opp_hand_L         2
opp_hand_R         0
tourney_level_A    0
tourney_level_G    0
tourney_level_M    2
round_F            2
round_QF           2
round_R128         0
round_R16          2
round_R32          2
round_R64          0
round_RR           2
round_SF           2
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Stefan Kozlov
Stefan Kozlov has 23 recorded matches.
Not enough columns data but still training a model
Stefan Kozlov has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_M    1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)
  rf_regr.fit(Xtrain, ytrain)


Quentin Halys
Quentin Halys has 30 recorded matches.
Not enough columns data but still training a model
Quentin Halys has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
round_R128         0
round_R16          1
round_R32          1
round_R64          1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Ugo Humbert
Ugo Humbert has 114 recorded matches.
Ugo Humbert has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_F            1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
round_RR           1
round_SF           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Alexander Ritschard
no Alexander Ritschard in train data
Tomas Martin Etcheverry
Tomas Martin Etcheverry has 3 recorded matches.
Not enough columns data but still training a model
Tomas Martin Etcheverry has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Hard       0
best_of_3          1
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
round_R16          1
round_R32          1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Nicolas Jarry
Nicolas Jarry has 105 recorded matches.
Nicolas Jarry has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_F            1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
round_RR           1
round_SF           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Aljaz Bedene
Aljaz Bedene has 254 recorded matches.
Aljaz Bedene has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_F            1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
round_RR           1
round_SF           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Tomas Machac
Tomas Machac has 4 recorded matches.
Not enough columns data but still training a model
Tomas Machac has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
round_R128         0
round_R64          1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Bernabe Zapata Miralles
Bernabe Zapata Miralles has 19 recorded matches.
Not enough columns data but still training a model
Bernabe Zapata Miralles has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
round_R128         0
round_R16          1
round_R32          1
round_R64          1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Facundo Bagnis
Facundo Bagnis has 90 recorded matches.
Not enough columns data but still training a model
Facundo Bagnis has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_F            1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
round_SF           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Zhizhen Zhang
Zhizhen Zhang has 11 recorded matches.
Not enough columns data but still training a model
Zhizhen Zhang has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
dtype: int64




Oscar Otte
Oscar Otte has 19 recorded matches.
Not enough columns data but still training a model
Oscar Otte has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
round_R128         0
round_R16          1
round_R32          1
round_R64          1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)
  rf_regr.fit(Xtrain, ytrain)


Sam Querrey
Sam Querrey has 659 recorded matches.
Sam Querrey has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_F            1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
round_RR           1
round_SF           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Pavel Kotov
Pavel Kotov has 2 recorded matches.
Not enough columns data but still training a model
Pavel Kotov has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Hard       0
best_of_3          1
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
round_R32          1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Maximilian Marterer
Maximilian Marterer has 75 recorded matches.
Not enough columns data but still training a model
Maximilian Marterer has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
round_SF           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Norbert Gombos
Norbert Gombos has 59 recorded matches.
Not enough columns data but still training a model
Norbert Gombos has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         0
opp_hand_R         1
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
round_RR           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Jiri Vesely
Jiri Vesely has 251 recorded matches.
Jiri Vesely has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_F            1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
round_RR           1
round_SF           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Enzo Couacaud
Enzo Couacaud has 4 recorded matches.
Not enough columns data but still training a model
Enzo Couacaud has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
round_R128         0
round_R32          1
round_R64          1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Federico Delbonis
Federico Delbonis has 315 recorded matches.
Federico Delbonis has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_F            1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
round_RR           1
round_SF           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)


Fernando Verdasco
Fernando Verdasco has 947 recorded matches.
Fernando Verdasco has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_F            1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
round_RR           1
round_SF           1
dtype: int64




Jack Sock
Jack Sock has 272 recorded matches.
Jack Sock has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Clay       1
surface_Grass      1
surface_Hard       0
best_of_3          1
best_of_5          0
opp_hand_L         1
opp_hand_R         0
tourney_level_A    1
tourney_level_G    0
tourney_level_M    1
round_F            1
round_QF           1
round_R128         0
round_R16          1
round_R32          1
round_R64          1
round_RR           1
round_SF           1
dtype: int64


  rf_regr.fit(Xtrain, ytrain)
  rf_regr.fit(Xtrain, ytrain)


Learner Tien
no Learner Tien in train data
Rinky Hijikata
Rinky Hijikata has 1 recorded matches.
Not enough columns data but still training a model
Rinky Hijikata has 1 recorded matches.
minutes            0
opp_ht             0
opp_age            0
rank_diff          0
consecutive        0
surface_Hard       0
best_of_3          1
opp_hand_R         1
tourney_level_A    1
round_R32          1
dtype: int64
Training score :  0.17753484715859413
Testing score :  -7.320937496339719




In [93]:
len(ytest_list)

124

In [94]:
len(ypred_list)

124

In [103]:
ypred = []
for ypred_player in ypred_list:
    for pred in ypred_player:
        ypred.append(pred)
        
ytest = []
for ytest_player in ytest_list:
    for test in ytest_player:
        ytest.append(test[0])

In [104]:
ypred[:10]

[113.67166161315592,
 112.58074790611323,
 113.31878101923766,
 113.90221430410016,
 113.90221430410016,
 113.90221430410016,
 113.2578435998127,
 120.64835195064465,
 120.58753760143372,
 120.43322472014273]

In [105]:
ytest[:10]

[105.0, 178.0, 113.0, 84.0, 55.0, 76.0, 175.0, 198.0, 180.0, 179.0]

In [108]:
from sklearn.metrics import r2_score

r2_score(ytest, ypred)

-0.18498178029905787

In [None]:
### REGRESSION FOREST ###

test_scores = []
predictions = []
average_prediction = -1

iter = 10

# UI

iter_label = widgets.Label('Nb of trees')
iter_slider = widgets.IntSlider(value=iter, min=2, max=100)
iter_text = widgets.Text()
widgets.jslink((iter_slider, 'value'), (iter_text, 'value'))
iter_input = widgets.HBox([iter_label, iter_slider, iter_text])

loading_bar = widgets.IntProgress(
    value=0,
    min=0,
    max=iter,
    bar_style='',
    orientation='horizontal'
)
loading_label = widgets.Label(value='Planting Trees...')
loading = widgets.HBox([loading_label, loading_bar])

# The Loop

def random_forest_loop() :
    
    global average_prediction

    for i in range(iter) :

        Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.3)
        dtr_cv.fit(Xtrain, ytrain)

        best_params = dtr_cv.best_params_
        print(f"Best parameters: {best_params})")

        dtr = DecisionTreeRegressor(**best_params)
        dtr.fit(Xtrain, ytrain)

        # Testing :

        ypred = dtr.predict(Xtest)

        print(f"Training score {i+1} : ", dtr.score(Xtrain, ytrain))
        print(f"Testing score {i+1} : ", dtr.score(Xtest, ytest))
        test_scores.append(dtr.score(Xtest, ytest))

        predictions.append(custom_predict(dtr, scale_x, scale_y, ht, a, rk, c, s, bo, h, l, r))     # ???

        # visualize_tree(dtr)

        loading_bar.value += 1
        if (i==iter-1) :
            loading_bar.bar_style='success'


        print("------------------------------------------------------------------------------------------------")

    print("------------------------------------------------------------------------------------------------")
    print("Test scores : ", test_scores)
    print("Test scores average : ", sum(test_scores)/len(test_scores))

    average_prediction = sum(predictions) / len(predictions)

    print("------------------------------------------------------------------------------------------------")
    print(f"Predictions : {predictions}")
    print(f"Average predicted length : {average_prediction}")


launch_btn = widgets.Button(description='Plant Forest')

def launch_btn_onclick(change) :
    global iter
    iter = iter_slider.value
    loading_bar.max = iter
    display(loading)
    random_forest_loop()

launch_btn.on_click(launch_btn_onclick)

display(iter_input)
display(launch_btn)

# launch_btn.click()

HBox(children=(Label(value='Nb of trees'), IntSlider(value=10, min=2), Text(value='')))

Button(description='Plant Forest', style=ButtonStyle())

TraitError: The 'value' trait of a Text instance expected a unicode string, not the int 10.

## Regression Decision Tree

We will create a decision tree using these features :
- Surface  
- Best of  
- Opponent hand  
- Opponent height  
- Opponent country  
- Ranking difference  
- Tournament level  
- Match round (QF, SF, F, etc...)
- Player form  

Some of these features seem irrelevant, and they probably are. However, since we will use GridSearchCV to perform feature selection, we will feed the model all the information we have and let it select what is important (ie. the optimal splits in terms of information gain).

### Formatting

The "player.csv" table contains many unecessary columns. Here, we create a player1 table containing all the features we potentially need for our model and format them correctly. Specifically, we use one-hot encoding to split categorical data into multiple boolean columns.


#### Pre-processing

For pre-processing we have three options :
- not scaling x or y
- scaling both x and y
- scaling x but not y   
  
I feel like scaling gives us better results but adds a hurdle in interpreting the tree's visualization since it displays scaled values. We can scale both input and output values back, just not display them with sklearns's plot_tree (or any tree visualization that I've found so far). For now, the roundabout solution is just to print out the scaled input and output.

#### Building the tree

We will setup 3 functions in order to build and exploit our tree model :  

**1. Parameter Selection** : To build a good tree, we need to select values for the tree's parameters (e.g. max depth, minimum samples per leaf). We'll perform this "Hyperparameter Tuning" using GridSearchCV, a library which selects the best tree by trying every combination of parameters we give it and performing a cross validation. The trees are commpared based on the R² criteria. We could say this is the actual building of the tree.
  
![score formula](res/score.png "sklearn score")  

**2. Custom Prediction** : Our end goal is to predict a match with any given variables. This function does that by building input and output wrapping around the native *dtr.predict* function. It looks complicated because we distinguish 3 cases depending on if we're scaling x and y. The usefulness of this disjunction remains to be discussed.

**3. Tree Visualization** : Sklearn provides a native *dtr.plot* function but doesn't label the branches with True/False. I've looked for another library - pydot - which does that.

In [10]:
### TREE VISUALIZATION ###

# Without True/False (sklearn)

# x_ax = range(len(ytest))
# plt.plot(x_ax, ytest, linewidth=1, label="original")
# plt.plot(x_ax, ypred, linewidth=1.1, label="predicted")
# plt.title("Y-test and y-predicted data")
# plt.xlabel('X-axis')
# plt.ylabel('Y-axis')
# plt.legend(loc='best',fancybox=True, shadow=True)
# plt.grid(True)
# plt.show()

# plt.figure(figsize=(30,15))
# tree.plot_tree(dtr,
#           filled=True,
#           rounded=True,
#           fontsize=10,
#           feature_names=["opp_ht", "opp_age", "rank_diff", "consecutive", "surface_Carpet", "surface_Clay", "surface_Grass", "surface_Hard", 
#                          "best_of_3", "best_of_5", "opp_hand_L", "opp_hand_R", 
#                          'tourney_level_A', 'tourney_level_D', 'tourney_level_G', 'tourney_level_M', 
#                          'round_F', 'round_QF', 'round_R128', 'round_R16', 'round_R32', 'round_R64', 'round_RR', 'round_SF'])

# # plt.savefig('tree_high_dpi', dpi=600)

# # With True/False label (pydot)

from IPython.display import Image
from six import StringIO
from sklearn.tree import export_graphviz
import pydot

def visualize_tree(dtr) :

    features = list(player1.columns)
    features.remove("minutes")

    dot_data = StringIO()
    export_graphviz(dtr, out_file=dot_data, feature_names=features, filled=True)
    graph = pydot.graph_from_dot_data(dot_data.getvalue())
    display(Image(graph[0].create_png()))

ModuleNotFoundError: No module named 'pydot'

### User Interface

We're building a simple UI to input match and player variables using ipywidgets.

In [12]:
import ipywidgets as widgets
from IPython.display import display

In [13]:
### INTERFACE ###

PLAYER1 = PLAYER
PLAYER2 = 'Rafael Nadal'

# Match Settings

match_settings_title = widgets.Label(value='MATCH CONDITIONS')

surface_radio = widgets.RadioButtons(
    options=['Hard', 'Grass', 'Clay'],
    description='Surface',
    disabled=False
)

best_of_radio = widgets.RadioButtons(
    options=['3', '5'],
    description='Best of',
    disabled=False
)

level_dropdown = widgets.Dropdown(
    description="Level",
    options=['Grand Slam (G)', 'Masters 1000s (M)', 'Other (A)']
)

round_dropdown = widgets.Dropdown(
    description="Round",
    options=['Final', 'Semifinals', 'Quarterfinals', 'R16', 'R32', 'R64', 'R128', 'R']
)

match_inputs_1 = widgets.HBox([surface_radio, best_of_radio])
match_inputs_2 = widgets.HBox([level_dropdown, round_dropdown])

# Players

player1_title = widgets.Label(value='PLAYER 1')

player1_text = widgets.Text(value=PLAYER1, placeholder="Player 1", description='Name')

player1_height_display = widgets.Text(description='Height')
player1_hand_display = widgets.Text(description='Hand')
player1_rank_display = widgets.Text(description='Rank')
player1_age_display = widgets.Text(description='Age')
player1_cons_display = widgets.Text(description='Win streak')

player1_height_display.value = str(atp_players.loc[atp_players.name==PLAYER1, 'height'].values[0])
player1_hand_display.value = str(atp_players.loc[atp_players.name==PLAYER1, 'hand'].values[0])
player1_rank_display.value = str(atp_players.loc[atp_players.name==PLAYER1, 'rank'].values[0])
player1_age_display.value = '25'
player1_cons_display.value = '0'

player1_widgets = widgets.VBox([player1_title, player1_text, player1_height_display, player1_hand_display, player1_rank_display, player1_age_display, player1_cons_display])


player2_title = widgets.Label(value='PLAYER 2')

player2_text = widgets.Text(value=PLAYER2, placeholder="Player 2", description='Name')

player2_height_display = widgets.Text(description='Height')
player2_hand_display = widgets.Text(description='Hand')
player2_rank_display = widgets.Text(description='Rank')
player2_age_display = widgets.Text(description='Age')
player2_cons_display = widgets.Text(description='Win streak')

player2_height_display.value = str(atp_players.loc[atp_players.name==PLAYER2, 'height'].values[0])
player2_hand_display.value = str(atp_players.loc[atp_players.name==PLAYER2, 'hand'].values[0])
player2_rank_display.value = str(atp_players.loc[atp_players.name==PLAYER2, 'rank'].values[0])
player2_age_display.value = '25'
player2_cons_display.value = '0'

player2_widgets = widgets.VBox([player2_title, player2_text, player2_height_display, player2_hand_display, player2_rank_display, player2_age_display, player2_cons_display])


player_inputs = widgets.HBox([player1_widgets, player2_widgets])

display(match_settings_title, match_inputs_1, match_inputs_2)
display(player_inputs)



### BEHAVIOR ###

def player1_eventhandler(change):
    global PLAYER1
    if change.new in atp_players.name.values :
        PLAYER1 = change.new
        player1_height_display.value = str(atp_players.loc[atp_players.name==PLAYER1, 'height'].values[0])
        player1_hand_display.value = str(atp_players.loc[atp_players.name==PLAYER1, 'hand'].values[0])
        player1_rank_display.value = str(atp_players.loc[atp_players.name==PLAYER1, 'rank'].values[0])
        
def player2_eventhandler(change):
    global PLAYER2
    if change.new in atp_players.name.values :
        PLAYER2 = change.new
        player2_height_display.value = str(atp_players.loc[atp_players.name==PLAYER2, 'height'].values[0])
        player2_hand_display.value = str(atp_players.loc[atp_players.name==PLAYER2, 'hand'].values[0])
        player2_rank_display.value = str(atp_players.loc[atp_players.name==PLAYER2, 'rank'].values[0])

player1_text.observe(player1_eventhandler, names='value')
player2_text.observe(player2_eventhandler, names='value')

Label(value='MATCH CONDITIONS')

HBox(children=(RadioButtons(description='Surface', options=('Hard', 'Grass', 'Clay'), value='Hard'), RadioButt…

HBox(children=(Dropdown(description='Level', options=('Grand Slam (G)', 'Masters 1000s (M)', 'Other (A)'), val…

HBox(children=(VBox(children=(Label(value='PLAYER 1'), Text(value='Felix Auger Aliassime', description='Name',…

In [14]:
print(PLAYER1)

Felix Auger Aliassime


## Regression Forest

Finally, we're gonna setup a loop to build any number of trees to form a Decision Forest. We'll then visualize the predictions via a histogram acting as a distribution chart. We can set the number of trees (to get an idea, a tree takes 8-10 seconds) as well as the time intervals for the distribution.

In [16]:
# Converting the user inputs into inputs usable by the custom_predict function

sd = { 'Clay':1, 'Grass':2, 'Hard':3 }                                                                  # Surface dictionary
bod = { '3':1, '5':2 }                                                                                  # Best of dictionary
hd = { 'L':1, 'R':2 }                                                                                   # Hand dictionary
ld = { 'Other (A)':1, 'Grand Slam (G)':2, 'Masters 1000s (M)':3 }    # Level dictionary
rd = { 'Final':1, 'Quarterfinals':2, 'R128':3, 'R16':4, 'R32':5, 'R64':6, 'R':7, 'Semifinals':8 }       # Round dictionary

def t2i(dict, text) :   # text to input
    return dict[text]

ht = float(player2_height_display.value)
a = float(player2_age_display.value)
rk = float(player1_rank_display.value) - float(player2_rank_display.value)
c = int(player1_cons_display.value)
s = t2i(sd, surface_radio.value)
bo = int(t2i(bod, best_of_radio.value))
h = t2i(hd, player2_hand_display.value)
l = t2i(ld, level_dropdown.value)
r = t2i(rd, round_dropdown.value)

In [12]:
### HISTOGRAM ###

# ATP Colors
# Dark blue : #002865
# Light blue : #00AFF0
# Rolland Garros Orange : #CB5A19
# Tennis Ball Yellow : #8CFF00

time_step = 15  # precision : 15/20/30 minutes

def plot_distribution(time_step) : 
    
    fig, ax = plt.subplots(1, figsize=(18,6))

    plt.suptitle(f'Distribution of Match Length Predictions : {PLAYER1} v. {PLAYER2}', fontweight='bold')
    plt.xlabel("Minutes")
    # plt.ylabel("Nb of predictions")

    ax.spines['bottom'].set_visible(True)
    ax.spines['top'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.yticks([])
    
    
    intervals=np.arange(0, 300, time_step)
    n, bins, patches = plt.hist(predictions, bins=intervals, color='#00AFF0')

    plt.xticks(bins)
    plt.grid(color='white', lw = 1, axis='x')

    xticks = [(bins[idx+1] + value)/2 for idx, value in enumerate(bins[:-1])]

    for idx, value in enumerate(n) :
        if value > 0 :
            plt.text(xticks[idx], value * 1.05, f"{int(100 * value / iter)}%", ha='center')
            plt.text(xticks[idx], value / 2, int(value), ha='center', color='w', fontweight='semibold')
        
    if (average_prediction >= 0) : plt.axvline(x=average_prediction, color='#CB5A19')
    # plt.axvline(x=116, color='#8CFF00')
    # plt.legend()


    plt.show()

out = widgets.Output()
time_step_slider = widgets.IntSlider(value=time_step, min=5, max=45, step=5, description='Time Step')

with out:
    plot_distribution(time_step)

def time_step_slider_eventhandler(change) :
    out.clear_output()
    with out:
        plot_distribution(change.new)

time_step_slider.observe(time_step_slider_eventhandler, names='value')

display(time_step_slider, out)

IntSlider(value=15, description='Time Step', max=45, min=5, step=5)

Output()