In [66]:
# !git pull

In [104]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

from ipywidgets import interact, interact_manual, interactive
import ipywidgets as widgets

In [117]:
# import libraries
import os, glob, wget

import pandas as pd
import numpy as np

import plotly.express as px
import difflib

import warnings
warnings.simplefilter(action = 'ignore', category = FutureWarning)

In [108]:
# Create the full historical Dataset
# Folder = 'Data/Data_2/'
# Files = os.listdir(Folder)

Files = glob.glob('atp_matches_????.csv')

df = pd.DataFrame()
for file in Files:
    if file not in ['atp_players.csv']:
        df_temp = pd.read_csv(file, sep = ',')
        # df_temp = pd.read_csv(Folder + file, sep = ',')
        df = pd.concat([df, df_temp], sort = False)
df['tourney_date'] = pd.to_datetime(df['tourney_date'].astype(str), infer_datetime_format = False, yearfirst = True, format = '%Y%m%d')
df['tourney_year'] = df['tourney_date'].dt.year
df['tourney_month'] = df['tourney_date'].dt.month
df['tourney_weekday'] = df['tourney_date'].dt.day_name()
df = df.sort_values('tourney_date').reset_index(drop = True)

Date_Min = '2000-01-01'
#Date_Max =  pd.to_datetime(Date_Min) + pd.to_timedelta('1D')
Mask_Date = (df['tourney_date'] >= Date_Min)# & (df_URG['dateheuremin_deb_urg'] < Date_Max)


Tourne_Level_Dict = {'G' : 'Grand Slam', 'M' : 'Masters 1000', 'A' : 'Other tour-level event', 'C' : 'Challenger', 'S' : 'Satellite/ITF', 'F' : 'Tour final', 'D' : 'Davis Cup'}

df = df.replace(Tourne_Level_Dict)

df = df[Mask_Date]

In [109]:
df_Players = pd.read_csv('atp_players.csv', sep = ',', header = None)

df_Players.columns = ['id', 'First Name', 'Family Name', 'Hand', 'Birth Date', 'Nationality']

# df_Players['Birth Date'] = pd.to_datetime(df_Players['Birth Date'].astype(str).str.replace('.0', ''), infer_datetime_format = False, yearfirst = True, errors = 'ignore')#, yearfirst = True, format = '%Y%m%d', errors = 'ignore')
df_Players['Birth Date'] = df_Players['Birth Date'].astype(str).str.replace('.0', '')

df_Players['Full Name'] = df_Players['First Name'] + ' ' + df_Players['Family Name']

In [110]:
df = df.merge(df_Players, how = 'left', left_on = 'winner_id', right_on = 'id', suffixes = ('', '_winner')).merge(df_Players, how = 'left', left_on = 'loser_id',  right_on = 'id', suffixes = ('', '_loser'))
#df = df.merge(df_Players, how = 'left', left_on = 'loser_id',  right_on = 'id', suffixes = ('', '_loser'))

In [111]:
df = df.rename(columns = {'id':'id_winner', 'First Name':'First Name_winner', 'Family Name':'Family Name_winner', 'Full Name':'Full Name_winner', 'Hand':'Hand_winner', 'Birth Date':'Birth Date_winner', 'Nationality':'Nationality_winner'})

In [112]:
Player = 'Benoit Paire'
Mask_Player = (df['Full Name_winner'].str.contains(Player)) | (df['Full Name_winner'].str.contains(Player))

df_Player = df[Mask_Player].reset_index(drop = True)

Mask_Player_wins = df_Player['Full Name_winner'] == Player
df_Player['Player Wins Flag'] = np.where(Mask_Player_wins, 1, 0)
df_Player['time_idx'] = df_Player.index

# df_Player.to_csv('data.csv')

In [113]:
Mask_Active_Player = (df['tourney_year'] == 2021)
Top = 20
Mask_Top_Players = (df['winner_rank'] <= Top)

Player_List = df[Mask_Active_Player & Mask_Top_Players]['Full Name_winner'].unique().tolist()
Player_List.sort()

Tourney_Level_List = df['tourney_level'].unique().tolist()

In [114]:
Variables = df.columns.tolist() + ['Player Wins Flag', 'Player Loses Flag', 'Player Age', 'Opponent Age', 'Opponent', 'Opponent Nationality', 'Player Ranking', 'Opponent Ranking', 'Opponent Hand']

x_Widget = widgets.Combobox(
    value = 'tourney_year',
    placeholder = '<Bars>',
    description = '<b>Bars:<b>',
    options = Variables,
    ensure_option = True,
    disabled = False,
    continuous_update = False)

y_Widget = widgets.Combobox(
    value = 'Player Wins Flag',
    placeholder = '<Variable>',
    description = '<b>Variable:<b>',
    options = Variables,
    ensure_option = True,
    disabled = False,
    continuous_update = False)

Color_Widget = widgets.Combobox(
    value = 'surface',
    placeholder = '<Colors>',
    description = '<b>Colors:<b>',
    options = Variables,
    ensure_option = True,
    disabled = False,
    continuous_update = False)

Pattern_Widget = widgets.Combobox(
    value = 'tourney_name',
    placeholder = '<Pattern>',
    description = '<b>Pattern:<b>',
    options = Variables,
    ensure_option = True,
    disabled = False,
    continuous_update = False)

# Player_List = ['Roger Federer', 'Rafael Nadal', 'Novak Djokovic', 'Benoit Paire', 'Stefanos Tsitsipas']
# Player_List = ['Federer', 'Nadal', 'Djokovic', 'Paire']

Players_Widget = widgets.SelectMultiple(
    value = ['Roger Federer'],
    placeholder = '<Players>',
    description = '<b>Players:<b>',
    #options = df_Players['Family Name'].dropna().unique().tolist(),
    options = Player_List,
    ensure_option = True,
    disabled = False,
    continuous_update = False)

Level_Widget = widgets.SelectMultiple(
    value = Tourney_Level_List,
    placeholder = '<Level>',
    description = '<b>Level:<b>',
    #options = df_Players['Family Name'].dropna().unique().tolist(),
    options = Tourney_Level_List,
    ensure_option = True,
    disabled = False,
    continuous_update = False)

Widget_Years = widgets.IntRangeSlider(
    value = [2019, 2021],
    min = 2000,
    max = 2021,
    step = 1,
    description='Years:',
    disabled = False,
    continuous_update = False,
    orientation = 'horizontal'
)

In [116]:
display(HTML('<h2>ATP Tennis Stats Dashboard<h2>'))

@interact
def Plot(Years = Widget_Years, Tourney_Levels = Level_Widget, Players = Players_Widget, x = x_Widget, y = y_Widget, Color = Color_Widget):#, Pattern = Pattern_Widget):
    
    try:
        # Mask_Players = (df['Family Name_winner'].isin(Players))# | (df['Family Name_loser'].isin(Players))
        Mask_Players = (df['Full Name_winner'].isin(list(Players))) | (df['Full Name_loser'].isin(list(Players)))
        Mask_Levels  = (df['tourney_level'].isin(list(Tourney_Levels)))
        # Mask_Players = (df['Full Name_winner'].isin(Players))

        Years_Range = range(Years[0], Years[-1] + 1)
        Mask_Years = (df['tourney_year'].isin(Years_Range))

        df_Player = df[Mask_Players & Mask_Years & Mask_Levels].reset_index(drop = True)

        Mask_Player_Wins  = df_Player['Full Name_winner'].isin(Players)
        Mask_Player_Loses = df_Player['Full Name_loser'].isin(Players)
        df_Player['Player Wins Flag'] = np.where(Mask_Player_Wins, 1, 0)
        df_Player['Player Loses Flag'] = np.where(Mask_Player_Loses, 1, 0)

        # display(df_Player)

        Mask_Player_Win_Flag  = df_Player['Player Wins Flag'] == 1
        Mask_Player_Lose_Flag = df_Player['Player Wins Flag'] == 0

        df_Player['Player Age']           = np.where(Mask_Player_Wins, df_Player['winner_age'], df_Player['loser_age'])
        df_Player['Opponent Age']         = np.where(Mask_Player_Wins, df_Player['loser_age'], df_Player['winner_age'])
        df_Player['Opponent']             = np.where(Mask_Player_Wins, df_Player['Full Name_loser'], df_Player['Full Name_winner'])
        df_Player['Opponent Nationality'] = np.where(Mask_Player_Wins, df_Player['Nationality_loser'], df_Player['Nationality_winner'])
        df_Player['Player Ranking']       = np.where(Mask_Player_Wins, df_Player['winner_rank'], df_Player['loser_rank']).astype(int)
        df_Player['Opponent Ranking']     = np.where(Mask_Player_Wins, df_Player['loser_rank'], df_Player['winner_rank']).astype(int)
        df_Player['Opponent Hand']        = np.where(Mask_Player_Wins, df_Player['loser_hand'], df_Player['winner_hand'])

        Wins = df_Player[Mask_Player_Win_Flag]['Player Wins Flag'].count()
        Losses = df_Player[Mask_Player_Lose_Flag]['Player Wins Flag'].count()
        Win_Ratio    = int(100*df_Player[Mask_Player_Win_Flag]['Player Wins Flag'].count()/df_Player['Player Wins Flag'].count())
        Losses_Ratio = int(100*df_Player[Mask_Player_Lose_Flag]['Player Wins Flag'].count()/df_Player['Player Wins Flag'].count())
        Win_Lose_Ratio = int(100*df_Player[Mask_Player_Win_Flag]['Player Wins Flag'].count()/df_Player[Mask_Player_Lose_Flag]['Player Wins Flag'].count())
        
        # display('Win:', df_Player[Mask_Player_Win_Flag]['Player Wins Flag'].count())
        # display('Lose:', df_Player[Mask_Player_Lose_Flag]['Player Wins Flag'].count())
        # display('Win Ratio:', 100*df_Player[Mask_Player_Win_Flag]['Player Wins Flag'].count()/df_Player['Player Wins Flag'].count())
        # display('Lose Ratio:', 100*df_Player[Mask_Player_Lose_Flag]['Player Wins Flag'].count()/df_Player['Player Wins Flag'].count())
        # display('Win/Lose Ratio:', 100*df_Player[Mask_Player_Win_Flag]['Player Wins Flag'].count()/df_Player[Mask_Player_Lose_Flag]['Player Wins Flag'].count())

        
        Cols = [x, y, Color]

        # df_Groups = df[Mask_Players & Mask_Tourney][Cols].groupby([x, Color]).count().reset_index()
        df_Groups = df_Player[Cols].groupby([x, Color]).count().reset_index()
        df_Groups = df_Groups.sort_values([x]).reset_index(drop = True)

        # df_Groups = df_Player[Cols]

        # display(df_Groups)

        # fig = px.box(df_Groups, x = x, y = y, color = Color, points = 'outliers', notched = False)#, labels = Labels) # or violin, rug)
        # fig = px.box(df_Groups, x = Bars, y = Variable, color = Group, points = 'outliers', notched = False)

        Labels = {'Player Wins Flag' : 'Wins', 'Player Loses Flag' : 'Losses', 'tourney_year' : 'Year'}
        
        fig = px.bar(df_Groups, x = x, y = y, color = Color, labels = Labels)

        Players_Str = ''
        Players_Str = Players_Str.join([Player + ', ' for Player in Players])
        Title = 'Player(s): ' + Players_Str + 'Win Ratio: ' + str(Win_Ratio) + '%'
        
        fig.update_xaxes(type = 'category')
        fig.update_layout(barmode = 'group', title_text = Title)

        fig.show()
        
    except:
        pass

interactive(children=(IntRangeSlider(value=(2019, 2021), continuous_update=False, description='Years:', max=20…

In [32]:
Players = df_Players['Family Name'].astype(str).unique().tolist()

Widget_Name = widgets.Text(
    value = 'Flayac',
    placeholder = '<Type Name>',
    description = '<b>Name<b>',
    disabled = False,
    continuous_update = False)

display(HTML('<h3>Find Closest Match:<h3>'))

@interact
def get_closest_player(Name = Widget_Name):
    Matches = difflib.get_close_matches(Name,  Players, n = 3, cutoff = 0.1)
    # print('Closest Tennis Player', Matches)
    
    Results = df_Players[df_Players['Family Name'].isin(Matches)].reset_index(drop = True)
    
    Results['Birth Date'] = Results['Birth Date'].str.replace('nan', 'Unknown')
    Results['Birth Date'] = Results['Birth Date'].str[:4]
    
    display(Results)
    

interactive(children=(Text(value='Flayac', continuous_update=False, description='<b>Name<b>', placeholder='<Ty…