# ATP_Matches_2021 Tennis Data Analysis 

In [95]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

df = pd.read_csv('atp_matches_2021.csv')

## Sorting the tourney_date column 
1. Turn into string 
2. Turn into datetime 
3. Sort the dataframe by date 
4. Reset the indices

In [96]:
# converting the date into a datetime data type 

df = df.astype({'tourney_date':'string'})
df.tourney_date = pd.to_datetime(df.tourney_date)

df = df.sort_values(by =['tourney_date', 'match_num'])
df = df.reset_index()

## Dealing with the Olympics, Davis Cup, The ATP Finals, and Grand Slams

### Tourney Level Codes 
* A = Olympics, ATP 250, ATP 500
* D = Davis Cup
* F = ATP Finals 
* G = Grand Slam 
* M = ATP Masters


In [97]:
# remove the olympics and create a separate dataframe
df_olympics = df[df.tourney_name == 'Tokyo Olympics']
df = df.drop(df[df.tourney_name == 'Tokyo Olympics'].index)

In [98]:
# remove the Davis Cup and create a new dataframe
df_davis_cup = df[df.tourney_level == 'D']
df = df.drop(df[df.tourney_level == 'D'].index)

In [99]:
# remove the ATP Finals and create a new dataframe 
df_finals = df[df.tourney_level == 'F']
df = df.drop(df[df.tourney_level == 'F'].index)

In [100]:
# remove the Grand Slams and create a new dataframe
df_slams = df[df.tourney_level == 'G']
df = df.drop(df[df.tourney_level == 'G'].index)

### Leaves the overall data frame with ATP 250, 500 and 1000 events

In [101]:
df.groupby('tourney_level')['tourney_level'].count()

tourney_level
A    1505
M     512
Name: tourney_level, dtype: int64

### The Olympics

In [102]:
pd.set_option('display.max_columns', None)

df_olympics[(df_olympics.winner_name == 'Novak Djokovic') | (df_olympics.loser_name == 'Novak Djokovic')]

Unnamed: 0,index,tourney_id,tourney_name,surface,draw_size,tourney_level,tourney_date,match_num,winner_id,winner_seed,winner_entry,winner_name,winner_hand,winner_ht,winner_ioc,winner_age,loser_id,loser_seed,loser_entry,loser_name,loser_hand,loser_ht,loser_ioc,loser_age,score,best_of,round,minutes,w_ace,w_df,w_svpt,w_1stIn,w_1stWon,w_2ndWon,w_SvGms,w_bpSaved,w_bpFaced,l_ace,l_df,l_svpt,l_1stIn,l_1stWon,l_2ndWon,l_SvGms,l_bpSaved,l_bpFaced,winner_rank,winner_rank_points,loser_rank,loser_rank_points
1774,31,2021-0096,Tokyo Olympics,Hard,64,A,2021-07-24,268,104925,1.0,,Novak Djokovic,R,188.0,SRB,34.1,106198,,,Hugo Dellien,R,180.0,BOL,28.0,6-2 6-2,3,R64,,,,,,,,,,,,,,,,,,,,1.0,12113.0,139.0,558.0
1790,47,2021-0096,Tokyo Olympics,Hard,64,A,2021-07-24,284,104925,1.0,,Novak Djokovic,R,188.0,SRB,34.1,105526,,,Jan Lennard Struff,R,193.0,GER,31.2,6-4 6-3,3,R32,,,,,,,,,,,,,,,,,,,,1.0,12113.0,48.0,1410.0
1798,55,2021-0096,Tokyo Olympics,Hard,64,A,2021-07-24,292,104925,1.0,,Novak Djokovic,R,188.0,SRB,34.1,200221,16.0,,Alejandro Davidovich Fokina,R,183.0,ESP,22.1,6-3 6-1,3,R16,,,,,,,,,,,,,,,,,,,,1.0,12113.0,35.0,1723.0
1802,59,2021-0096,Tokyo Olympics,Hard,64,A,2021-07-24,296,104925,1.0,,Novak Djokovic,R,188.0,SRB,34.1,105453,,,Kei Nishikori,R,178.0,JPN,31.5,6-2 6-0,3,QF,,,,,,,,,,,,,,,,,,,,1.0,12113.0,69.0,1003.0
1804,61,2021-0096,Tokyo Olympics,Hard,64,A,2021-07-24,298,100644,4.0,,Alexander Zverev,R,198.0,GER,24.2,104925,1.0,,Novak Djokovic,R,188.0,SRB,34.1,1-6 6-3 6-1,3,SF,,,,,,,,,,,,,,,,,,,,5.0,7340.0,1.0,12113.0
1806,63,2021-0096,Tokyo Olympics,Hard,64,A,2021-07-24,300,105807,6.0,,Pablo Carreno Busta,R,188.0,ESP,30.0,104925,1.0,,Novak Djokovic,R,188.0,SRB,34.1,6-4 6-7(6) 6-3,3,BR,,,,,,,,,,,,,,,,,,,,11.0,3260.0,1.0,12113.0


### Grand Slams

In [110]:
# NaN in winner_seed just means they weren't seeded 
# NaN in winner_entry just means they were in the maindraw

df_slams[df_slams.winner_entry.isna() != True]

Unnamed: 0,index,tourney_id,tourney_name,surface,draw_size,tourney_level,tourney_date,match_num,winner_id,winner_seed,winner_entry,winner_name,winner_hand,winner_ht,winner_ioc,winner_age,loser_id,loser_seed,loser_entry,loser_name,loser_hand,loser_ht,loser_ioc,loser_age,score,best_of,round,minutes,w_ace,w_df,w_svpt,w_1stIn,w_1stWon,w_2ndWon,w_SvGms,w_bpSaved,w_bpFaced,l_ace,l_df,l_svpt,l_1stIn,l_1stWon,l_2ndWon,l_SvGms,l_bpSaved,l_bpFaced,winner_rank,winner_rank_points,loser_rank,loser_rank_points
212,1820,2021-580,Australian Open,Hard,128,G,2021-02-08,114,202195,,Q,Maxime Cressy,R,198.0,USA,23.7,106121,,LL,Taro Daniel,R,191.0,JPN,28.0,7-6(1) 7-6(3) 6-4,5,R128,147.0,26.0,4.0,91.0,59.0,52.0,21.0,17.0,0.0,1.0,4.0,4.0,129.0,83.0,62.0,18.0,17.0,6.0,8.0,172.0,389.0,118.0,591.0
219,1827,2021-580,Australian Open,Hard,128,G,2021-02-08,121,106109,,WC,Alex Bolt,L,183.0,AUS,28.0,105613,,,Norbert Gombos,R,193.0,SVK,30.4,6-2 6-2 4-6 6-3,5,R128,134.0,11.0,3.0,106.0,67.0,50.0,22.0,18.0,0.0,2.0,7.0,2.0,117.0,85.0,52.0,16.0,17.0,12.0,18.0,166.0,400.0,89.0,797.0
223,1831,2021-580,Australian Open,Hard,128,G,2021-02-08,125,106071,,Q,Bernard Tomic,R,193.0,AUS,28.3,105216,,,Yuichi Sugita,R,173.0,JPN,32.3,3-6 6-1 4-1 RET,5,R128,86.0,10.0,3.0,75.0,48.0,38.0,9.0,11.0,6.0,7.0,6.0,2.0,65.0,35.0,25.0,13.0,10.0,3.0,7.0,233.0,253.0,104.0,723.0
227,1835,2021-580,Australian Open,Hard,128,G,2021-02-08,129,106234,,Q,Aslan Karatsev,R,185.0,RUS,27.4,126149,,,Gianluca Mager,R,185.0,ITA,26.1,6-3 6-3 6-4,5,R128,98.0,11.0,3.0,69.0,42.0,37.0,17.0,14.0,1.0,2.0,6.0,2.0,86.0,55.0,36.0,12.0,14.0,6.0,11.0,114.0,642.0,96.0,770.0
228,1836,2021-580,Australian Open,Hard,128,G,2021-02-08,130,124186,,LL,Alexandre Muller,R,183.0,FRA,24.0,106228,,,Juan Ignacio Londero,R,180.0,ARG,27.4,4-6 6-3 6-0 6-3,5,R128,150.0,6.0,3.0,91.0,63.0,44.0,13.0,17.0,0.0,4.0,0.0,7.0,123.0,72.0,40.0,21.0,17.0,2.0,11.0,210.0,305.0,83.0,832.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2154,1776,2021-560,Us Open,Hard,128,G,2021-08-30,197,202385,,WC,Jenson Brooksby,R,188.0,USA,20.8,106234,21.0,,Aslan Karatsev,R,185.0,RUS,27.9,6-2 3-6 2-6 6-3 6-3,5,R32,230.0,9.0,3.0,145.0,89.0,59.0,25.0,22.0,5.0,12.0,8.0,5.0,126.0,67.0,40.0,32.0,21.0,8.0,16.0,99.0,812.0,25.0,2109.0
2155,1777,2021-560,Us Open,Hard,128,G,2021-08-30,198,106214,,Q,Oscar Otte,R,196.0,GER,28.1,104312,,,Andreas Seppi,R,190.0,ITA,37.5,6-3 6-4 2-6 7-5,5,R32,149.0,15.0,3.0,109.0,60.0,49.0,26.0,20.0,2.0,5.0,4.0,5.0,108.0,63.0,45.0,23.0,19.0,4.0,8.0,144.0,543.0,89.0,873.0
2163,1785,2021-560,Us Open,Hard,128,G,2021-08-30,206,105376,,Q,Peter Gojowczyk,R,185.0,GER,32.1,105967,,Q,Henri Laaksonen,R,185.0,SUI,29.4,3-6 6-3 6-1 6-4,5,R32,128.0,8.0,9.0,109.0,56.0,44.0,29.0,18.0,5.0,7.0,10.0,5.0,105.0,53.0,37.0,25.0,17.0,6.0,11.0,141.0,563.0,130.0,613.0
2165,1787,2021-560,Us Open,Hard,128,G,2021-08-30,208,122298,,Q,Botic Van De Zandschulp,R,188.0,NED,25.9,105487,,,Facundo Bagnis,L,183.0,ARG,31.5,3-6 6-0 6-2 6-2,5,R32,150.0,11.0,5.0,105.0,60.0,51.0,17.0,15.0,7.0,9.0,5.0,7.0,118.0,79.0,49.0,13.0,16.0,13.0,21.0,117.0,698.0,80.0,913.0
