In [1]:
import polars as pl
import joblib
from sklearn.preprocessing import StandardScaler
import numpy as np
import os

In [2]:
# Charger le modèle et les données
model = joblib.load("../data/best_model.joblib")
player_data = pl.read_csv("../data/tennis_dataset_app.csv")


In [3]:
print("Colonnes :\n", player_data.columns)

Colonnes :
 ['player1_name', 'player1_age', 'player1_ranking', 'player1_points', 'player1_win_rate', 'player1_win_rate_3_sets', 'player1_win_rate_tiebreak', 'player1_total_matches_dure', 'player1_win_rate_dure', 'player1_total_matches_terre battue', 'player1_win_rate_terre battue', 'player1_total_matches_gazon', 'player1_win_rate_gazon', 'player1_total_matches_salle', 'player1_win_rate_salle', 'player1_total_matches_carpet', 'player1_win_rate_carpet', 'player1_total_matches_acryl', 'player1_win_rate_acryl', 'player1_avg_first_serve_pct', 'player1_avg_first_serve_won_pct', 'player1_avg_second_serve_won_pct', 'player1_avg_return_points_won_pct', 'player1_avg_break_point_won_pct', 'player1_avg_double_fautes', 'player1_avg_aces']


In [4]:
players_list = sorted(player_data["player1_name"].unique().to_list())

print(players_list)

['Adam Walton', 'Adrian Mannarino', 'Albert Ramos-Vinolas', 'Alejandro Davidovich Fokina', 'Alejandro Moro Canas', 'Alejandro Tabilo', 'Aleksandar Kovacevic', 'Aleksandar Vukic', 'Alex Bolt', 'Alex De Minaur', 'Alex Michelsen', 'Alexander Bublik', 'Alexander Ritschard', 'Alexander Shevchenko', 'Alexander Zverev', 'Alexandre Muller', 'Alexei Popyrin', 'Alibek Kachmazov', 'Andrey Rublev', 'Arthur Cazaux', 'Arthur Fils', 'Arthur Rinderknech', 'August Holmgren', 'Ben Shelton', 'Benjamin Bonzi', 'Billy Harris', 'Borna Coric', 'Botic Van De Zandschulp', 'Brandon Holt', 'Brandon Nakashima', 'Bu Yunchaokete', 'Cameron Norrie', 'Camilo Ugo Carabelli', 'Carlos Alcaraz', 'Carlos Taberner', 'Casper Ruud', 'Chak Lam Coleman Wong', 'Christopher Eubanks', "Christopher O'Connell", 'Chun Hsin Tseng', 'Clement Chidekh', 'Constant Lestienne', 'Corentin Moutet', 'Cristian Garin', 'Damir Dzumhur', 'Daniel Altmaier', 'Daniel Elahi Galan', 'Daniel Evans', 'Daniil Medvedev', 'David Goffin', 'Denis Shapovalov'

In [5]:
player1 = players_list[0]  # Premier joueur de la liste
player2 = players_list[1]  # Deuxième joueur de la liste
print(f"\nJoueurs sélectionnés : {player1} vs {player2}")


Joueurs sélectionnés : Adam Walton vs Adrian Mannarino


In [6]:
# Récupérer les données des joueurs
player1_stats = player_data.filter(pl.col(player_data.columns[0]) == player1).select(
    pl.col(player_data.columns[1:26])
).to_numpy()

player2_stats = player_data.filter(pl.col(player_data.columns[0]) == player2).select(
    pl.col(player_data.columns[1:26])
).to_numpy()


print(player1_stats)

print(player2_stats)

[[2.50000000e+01 9.30000000e+01 6.36000000e+02 5.10204082e-01
  4.11764706e-01 6.84210526e-01 3.20000000e+01 5.31250000e-01
  1.00000000e+00 0.00000000e+00 9.00000000e+00 4.44444444e-01
  7.00000000e+00 5.71428571e-01 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 6.33420064e-01 7.47603834e-01
  5.32334385e-01 3.55411499e-01 3.75939850e-01 2.22448980e+00
  6.42857143e+00]]
[[3.60000000e+01 6.60000000e+01 7.79000000e+02 3.54166667e-01
  4.61538462e-01 4.66666667e-01 2.40000000e+01 3.33333333e-01
  5.00000000e+00 0.00000000e+00 6.00000000e+00 1.66666667e-01
  1.30000000e+01 6.15384615e-01 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 5.94896332e-01 6.72475424e-01
  5.11154856e-01 3.41594828e-01 3.86029412e-01 2.31914894e+00
  4.44680851e+00]]


In [7]:
match_data = np.concatenate([player1_stats[0], player2_stats[0]]).reshape(1, -1)

print(match_data)

[[2.50000000e+01 9.30000000e+01 6.36000000e+02 5.10204082e-01
  4.11764706e-01 6.84210526e-01 3.20000000e+01 5.31250000e-01
  1.00000000e+00 0.00000000e+00 9.00000000e+00 4.44444444e-01
  7.00000000e+00 5.71428571e-01 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 6.33420064e-01 7.47603834e-01
  5.32334385e-01 3.55411499e-01 3.75939850e-01 2.22448980e+00
  6.42857143e+00 3.60000000e+01 6.60000000e+01 7.79000000e+02
  3.54166667e-01 4.61538462e-01 4.66666667e-01 2.40000000e+01
  3.33333333e-01 5.00000000e+00 0.00000000e+00 6.00000000e+00
  1.66666667e-01 1.30000000e+01 6.15384615e-01 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 5.94896332e-01
  6.72475424e-01 5.11154856e-01 3.41594828e-01 3.86029412e-01
  2.31914894e+00 4.44680851e+00]]


In [8]:
#Diff ranking
ranking_player1 = match_data[0][1]  # Correspond à 9.30000000e+01
ranking_player2 = match_data[0][26]  # Correspond à 6.60000000e+01

# Calculer la différence absolue de ranking
rank_difference = abs(ranking_player1 - ranking_player2)


In [9]:
print(ranking_player1, ranking_player2)

print(rank_difference)

93.0 66.0
27.0


In [10]:
#points_diff
points_player1 = match_data[0][2]  
points_player2 = match_data[0][27]  

# Calculer la différence absolue de ranking
points_difference = abs(points_player1 - points_player2)

In [11]:
print(points_player1, points_player2, points_difference)

636.0 779.0 143.0


In [12]:
#win_rate_diff

win_rate_player1 = match_data[0][3]
win_rate_player2 = match_data[0][28]

win_rate_difference = abs(win_rate_player1 - win_rate_player2)

In [13]:
print(win_rate_player1, win_rate_player2, win_rate_difference)

0.5102040816326531 0.3541666666666667 0.1560374149659864


In [14]:
#win_rate_diff3

win_rate3_player1 = match_data[0][4]
win_rate3_player2 = match_data[0][29]

win_rate_difference3 = abs(win_rate3_player1 - win_rate3_player2)

In [15]:
print(win_rate3_player1, win_rate3_player2, win_rate_difference3)

0.4117647058823529 0.46153846153846156 0.04977375565610864


In [16]:
#win_rate_tiebreak_diff

win_rate_tiebreak_player1 = match_data[0][5]
win_rate_tiebreak_player2 = match_data[0][30]

win_rate_tiebreak_difference = abs(win_rate_tiebreak_player1 - win_rate_tiebreak_player2)

In [17]:
print(win_rate_tiebreak_player1, win_rate_tiebreak_player2, win_rate_tiebreak_difference)

0.6842105263157895 0.4666666666666667 0.21754385964912282


In [18]:
#aces_diff

aces_player1 = match_data[0][24]
aces_player2 = match_data[0][49]

aces_difference = abs(aces_player1 - aces_player2)

In [19]:
print(aces_player1, aces_player2, aces_difference)

6.428571428571429 4.446808510638298 1.981762917933131


In [20]:
#double_faults_diff

double_faults_player1 = match_data[0][23]
double_faults_player2 = match_data[0][48]

double_faults_difference = abs(double_faults_player1 - double_faults_player2)

In [21]:
print(double_faults_player1, double_faults_player2, double_faults_difference)

2.2244897959183674 2.3191489361702127 0.0946591402518453


In [22]:
#win_rate_dure_diff

win_rate_dure_player1 = match_data[0][7]
win_rate_dure_player2 = match_data[0][32]

win_rate_dure_difference = abs(win_rate_dure_player1 - win_rate_dure_player2)

In [23]:
print(win_rate_dure_player1, win_rate_dure_player2, win_rate_dure_difference)

0.53125 0.3333333333333333 0.19791666666666669


In [24]:
#win_rate_terre_diff

win_rate_terre_player1 = match_data[0][9]
win_rate_terre_player2 = match_data[0][34]

win_rate_terre_difference = abs(win_rate_terre_player1 - win_rate_terre_player2)

In [25]:
print(win_rate_terre_player1, win_rate_terre_player2, win_rate_terre_difference)

0.0 0.0 0.0


In [26]:
#win_rate_gazon_diff

win_rate_gazon_player1 = match_data[0][11]
win_rate_gazon_player2 = match_data[0][36]

win_rate_gazon_difference = abs(win_rate_gazon_player1 - win_rate_gazon_player2)

In [27]:
print( win_rate_gazon_player1, win_rate_gazon_player2, win_rate_gazon_difference)

0.4444444444444444 0.16666666666666666 0.2777777777777778


In [28]:
#win_rate_salle_diff

win_rate_salle_player1 = match_data[0][13]
win_rate_salle_player2 = match_data[0][38]

win_rate_salle_difference = abs(win_rate_salle_player1 - win_rate_salle_player2)

In [29]:
print(win_rate_salle_player1, win_rate_salle_player2, win_rate_salle_difference)

0.5714285714285714 0.6153846153846154 0.04395604395604402


In [30]:
# Ajouter cette valeur à la fin de match_data
additional_features = [
    rank_difference,
    points_difference,
    win_rate_difference,
    win_rate_difference3,
    win_rate_tiebreak_difference,
    aces_difference,
    double_faults_difference,
    win_rate_dure_difference,
    win_rate_terre_difference,
    win_rate_gazon_difference,
    win_rate_salle_difference
]

# Ajouter ces nouvelles valeurs à la fin de match_data
match_data_with_diff = np.append(match_data, [additional_features], axis=1)

In [31]:
print(match_data_with_diff)

[[2.50000000e+01 9.30000000e+01 6.36000000e+02 5.10204082e-01
  4.11764706e-01 6.84210526e-01 3.20000000e+01 5.31250000e-01
  1.00000000e+00 0.00000000e+00 9.00000000e+00 4.44444444e-01
  7.00000000e+00 5.71428571e-01 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 6.33420064e-01 7.47603834e-01
  5.32334385e-01 3.55411499e-01 3.75939850e-01 2.22448980e+00
  6.42857143e+00 3.60000000e+01 6.60000000e+01 7.79000000e+02
  3.54166667e-01 4.61538462e-01 4.66666667e-01 2.40000000e+01
  3.33333333e-01 5.00000000e+00 0.00000000e+00 6.00000000e+00
  1.66666667e-01 1.30000000e+01 6.15384615e-01 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 5.94896332e-01
  6.72475424e-01 5.11154856e-01 3.41594828e-01 3.86029412e-01
  2.31914894e+00 4.44680851e+00 2.70000000e+01 1.43000000e+02
  1.56037415e-01 4.97737557e-02 2.17543860e-01 1.98176292e+00
  9.46591403e-02 1.97916667e-01 0.00000000e+00 2.77777778e-01
  4.39560440e-02]]


In [32]:




# Faire la prédiction
probabilities = model.predict_proba(match_data)
print(f"\nProbabilités de victoire :")
print(f"{player1}: {probabilities[0][0]*100:.1f}%")
print(f"{player2}: {probabilities[0][1]*100:.1f}%")

ValueError: X has 50 features, but RandomForestClassifier is expecting 66 features as input.