### This is a program to identify a winner / loser in a tournament in Women's Tennis for all Grand Slams

#### Import all the libraries

In [None]:
import pandas as pd
import logging
import sys
from importlib import reload
import matplotlib as plt
import datetime

In [None]:
logger = logging.getLogger()

logging.basicConfig(format=' %(levelname)s : %(message)s',
                     level=logging.INFO, stream=sys.stdout)
reload(logging)

In [None]:
cols_of_interest = [ 'year', 'best_of', 'loser_age', 'winner_age', 
                    'draw_size', 'loser_id','loser_ioc', 'loser_name',
                    'loser_rank', 'winner_id', 'winner_name',
                    'winner_ioc','winner_rank','tourney_id', 
                    'tourney_name','tourney_date', 'score']

#### Read data from WTA ( Women's tennis) for last 20 years

In [None]:
#Matches
matches = pd.read_csv("../wta/matches.csv", low_memory=False)

In [None]:
#Get all the players
players = pd.read_csv("../wta/players.csv", sep=',', encoding='latin-1')
players.head(n=10)

In [None]:
#Get all the qualifying matches
qualifying_matches = pd.read_csv("../wta/qualifying_matches.csv", low_memory=False)

In [None]:
#Get all the Rankings
rankings = pd.read_csv("../wta/rankings.csv")

#### Enrich the matches data

In [None]:
#convert the player ID to an integer
matches['loser_id'] = matches['loser_id'].apply( lambda x: int(x))
matches['winner_id'] = matches['winner_id'].apply( lambda x: int(x))

#here, we are changing all of the N/A loser and winner ages to 0
matches['loser_age'] = matches['loser_age'].fillna(0)
matches['winner_age'] = matches['winner_age'].fillna(0)

#here, we are converting the loser and winner ages to integers
matches['loser_age'] = matches['loser_age'].apply( lambda x: int(x))
matches['winner_age'] = matches['winner_age'].apply( lambda x: int(x))
matches['year'] = matches['tourney_date'].apply( lambda x: str(x)[:4])



# Features

1. number of matches the player has won against the other mention

2. number of matches the player has LOST against the other player

3. The number of matches the player has won in the past year from the date of the search

4. The surface of the court
    - Can figure out which surface the player is best at by finding the number of matches 
    the player has won on each surface in the past year from the date of the search 
5. The age of the player - (older players are prone to have deteriorating play level, health, and stamina, which means
    they are more likely to lose)



In [None]:
def findNextWinner ( matches, player1, player2  ):
    player1_wins =  matches [( matches['winner_name'] == player1) & ( matches['loser_name'] == player2 )] 
    
    curr_year = datetime.datetime.today()
    prev_year = datetime.datetime.today() - datetime.timedelta(days= 365)
    
    player1_win_last_year =  player1_wins [player1_wins['year'] == prev_year] 
    player2_wins =  matches [( matches['winner_name'] == player2) & ( matches['loser_name'] == player1 )] 
    player2_wins_last_year = player2_wins [player2_wins['year']==  prev_year ] 

    player1_win_count = len(matches [( matches['winner_name'] == player1) & ( matches['loser_name'] == player2 )])
    player1_win_last_year_count =   len( player1_wins [player1_wins['year'] == prev_year ] )
    
    player2_win_count = len(matches [( matches['winner_name'] == player2) & ( matches['loser_name'] == player1 )])
    player2_win_last_year_count =  len(player2_wins [player2_wins['year']== prev_year])

    logger.info(" %s has won %s matches || %s has won %s matches" % ( player1, player1_win_count, player2, player2_win_count) ) 
    
    if (player1_win_count == 0 & player2_win_count == 0):
        msg = "Sorry, %s has not played with %s. So, we don't have enough information to predict winner" % (player1, player2) 
        return player1, 0
    
    player1chance = int( 100* (player1_win_count)/(player1_win_count + player2_win_count))
    
    scores = matches [( matches['winner_name'] == player1) & ( matches['loser_name'] == player2 )]['score']
#     logger.info("%s against %s: Scores: \n %s" %( player1, player2, scores ))
    
    logger.info(" In last year, %s has won %s matches || %s has won %s matches" % ( player1, player1_win_last_year_count, player2, player2_win_last_year_count) ) 

    last_year_wins = player1_win_last_year_count + player2_win_last_year_count
    
    if ( last_year_wins !=0 ):
        player1chanceyear = int( ( (100 * player1_win_last_year_count) / (last_year_wins )) ) 
    else:
        player1chanceyear = 0
        logger.info("Sorry, the two players haven't played in past one year.")
        
        if player1chanceyear == 0:
            player1totalchance = player1chance
            
        else:
            player1totalchance = (0.6 * player1chance) + (0.4 * player1chanceyear)
    
    logger.info("Based on this information, %s has a %s percent chance of winning the next match" % (player1, player1totalchance))
    return player1, player1totalchance


In [None]:
def applyScoreWeights():
    logger.info("apply score weights")

In [None]:
def playerOpps(matches, player):
    
    player_win_matches =   matches[ ( matches['winner_name'] == player ) ]
    player_lost_matches =  matches[(matches['loser_name'] == player) ]
    l = player_win_matches[['loser_name']]
    w = player_lost_matches[['winner_name']]
    
    player_opps = list(l.loser_name.unique()) + list(w.winner_name.unique() )
    logger.info("%s has played a total of %d other players" %( player, len(player_opps)))
    return player_opps
    

In [None]:
def commonPlayer(list1, list2):
    list3 = [value for value in list1 if value in list2] 
    logger.info(" There are %s common players." %( len(list3)))
    return set(list3)
    

In [None]:
def calculateChancesEachPlayer(common_players, player1, player2 ):
    common_player_chances = {}
    error_cnt = 0
    success_cnt = 0
    common_count = 0
    for common_player in common_players:
        common_count += 1
        logger.info("****************************************\n")
        logger.info("Common Player is: %s" % ( common_player))
        p1, c1 = findNextWinner(matches, player1, common_player)
        p2, c2  = findNextWinner(matches, player2, common_player)
        if (c1 == 0 & c2 ==0 ):
            logger.error("Sorry unable to calcualte chances")
            error_cnt +=1
            continue
        else:
            p1_chance = ( c1 / (c1 + c2)) * 100
            success_cnt +=1
        common_player_chances[common_player] = p1_chance

    common_player_chances
    player_chances = []

    total_chances = 0
    for k, v in common_player_chances.items():
        total_chances = total_chances + int(v)

    player1_chance = int( total_chances/len(common_player_chances))  
    return player1_chance



#### Identify the Players

In [None]:
player2 = "Serena Williams"
player1 = "Monica Puig"

#### Invoke different Functions to find the chances

In [None]:
player1_opps = playerOpps(matches, player1)
player2_opps = playerOpps(matches, player2)
logger.info("Player 1  opponents: %s" % ( len(player1_opps)) ) 
common_players = commonPlayer(player1_opps, player2_opps)
player1chance = calculateChancesEachPlayer(common_players, player1, player2)
logger.info("Player1 chance >> %s", player1chance)


Goals for Next week ( week of May 2nd )
1. Fix the unique list of common players
2. Work on returning the percentage for each player
3. Weightage for scores

In [None]:
# player_chances = []
# total_chances = 0
# for k, v in common_player_chances.items():
#     total_chances = total_chances + int(v)
# int( total_chances/len(common_player_chances))  

In [None]:
#One Player perspective
features =  ['date', '# of Matches Won', '# of Matches Lost', "year", 'Surface Type', 'age', 'Scores' ]

#
featuresToWeights = {
            "WinCounts" : 60,
            "LastYear" : 40,
            }




### Data visualization

We will visualize some of the data - eg, who has won most of the grand slams, who is second etc 

In [None]:
matches.head()

In [None]:
curr_year = datetime.datetime.today()
prev_year = datetime.datetime.today() - datetime.timedelta(days= 365)

prev_year.year
