In [None]:
import requests
import pandas as pd
from data_processor import TennisDataProcessor, to_player, to_average,get_fatigue_stats

ROUND_ORDER = ["RR", "R128", "R64", "R32", "R16", "QF", "SF", "F"]

pd.options.display.max_columns = None
url = "https://raw.githubusercontent.com/JeffSackmann/tennis_atp/refs/heads/master/atp_matches_{}.csv"
data_20s = pd.concat([pd.read_csv(url.format(year)) for year in range(2000, 2025)])


In [None]:
tdp = TennisDataProcessor(data_20s.copy())
derived_data = tdp.derive_match_data()

In [None]:
player_1_name = "Novak Djokovic"
player_1_full = get_fatigue_stats(
    to_average(
        to_player(player_1_name, derived_data), 
        lookback=10)
    )
player_1_full.sort_values(
    by=["player_rank_points"],
    ascending=False
)

player_2_name = "Roger Federer"
player_2_full = get_fatigue_stats(
    to_average(
        to_player(player_2_name, derived_data), 
        lookback=10)
    )
player_2_full.sort_values(
    by=["player_rank_points"],
    ascending=False 
)

novvsfed = player_1_full.loc[player_1_full["opponent_name"]=="Roger Federer"].head(15)
fedvsnov = player_2_full.loc[player_2_full["opponent_name"]=="Novak Djokovic"].head(15)

In [None]:
match_keys = ['surface', 'tourney_level', 'tourney_date', 'tourney_name', 'round', 'best_of']
df = pd.merge(
    novvsfed,
    fedvsnov,
    on=match_keys,
    suffixes=('_djokovic', '_federer'),
    how='inner'
)
def reorder_players(row):
    if row['result_djokovic'] == 'win':
        winner = {f'winner_{col.replace("_djokovic", "")}': row[col] for col in row.index if '_djokovic' in col}
        loser = {f'loser_{col.replace("_federer", "")}': row[col] for col in row.index if '_federer' in col}
    else:
        winner = {f'winner_{col.replace("_federer", "")}': row[col] for col in row.index if '_federer' in col}
        loser = {f'loser_{col.replace("_djokovic", "")}': row[col] for col in row.index if '_djokovic' in col}

    match_data = {key: row[key] for key in match_keys}
    return pd.Series({**match_data, **winner, **loser})

df_final = df.apply(reorder_players, axis=1)
df_final

In [None]:
# Next steps: 
# Change columns on df_final
# Build model