## Extracció de freqüències

Aquest codi pren les dades recopilades dels partits jugats i en fa un fitxer amb els rendiments per equips. Aquest fitxer és una `xarray` de dimensions `player` vs. `teammate`. És a dir, tenim informació de cada possible combinació d'equips.

In [1]:
import asyncio
import sys

if sys.platform.startswith('win'):
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

In [2]:
# Importem les llibreries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr # per guardar les dades 3D
from collections import Counter

In [3]:
# Carreguem les dades
data_df = pd.read_csv('results.csv')

# Emplenem els espais en blanc amb 0
data_df = data_df.fillna(0.)

In [4]:
# Read the number of games played per each player
dataarray = xr.open_dataset('stats.nc', engine='scipy')
played_games = dataarray['GamesPlayed'].isel(matchday=-1)

In [5]:
# Obtenim una llista amb tots els noms dels participants
players_names = np.unique(data_df[['Jugador 1', 'Jugador 2', 'Jugador 3', 'Jugador 4']].values.flatten())

# Llista de dies jugats
matchdays = pd.unique(data_df['D'])

Crearem les matrius de `xarray` a partir de `pd.DataFrame()`. Per crear els dataframes, iniciarem els diccionaris buits on hi guardarem les dades per cada jugador. Això serà una de les dimensions de la matriu. Després, per cada partit, escriurem al diccionari el paràmetre corresponent a l'alineació. (ESCRIURE MILLOR AIXÒ)

In [18]:
# DataFrame with the number of team occurrences
mates_df = pd.DataFrame(columns = players_names) # number of games played with each mate
matesplayed_df = pd.DataFrame(columns = players_names) # number of games played with each mate divided by number of games playe by player
winmates_df = pd.DataFrame(columns = players_names) # number of games won by each mate
mates_attack_df = pd.DataFrame(columns = players_names) # number of games played with the mate playing as defender (player in attack)
winmates_attack_df = pd.DataFrame(columns = players_names) # number of games won with the mate playing as defender (player in attack)
closematches_df = pd.DataFrame(columns = players_names) # number of games where the game ended 2-3 or 3-2 (won or lost)
closewins_df = pd.DataFrame(columns = players_names) # number of games won by 1 goal (3-2 or 2-3)
closewinsplayed_df = pd.DataFrame(columns = players_names) # number of games won by 1 goal divided by total number of close matches

for player in players_names: # for eack player
    # Initialize a dictionary where the number of team-ups will be stored
    mate_count = {}
    mateplayed_count = {} # number of games played with mate divided by total games played
    winmate_count = {}
    mates_attack_count = {}
    winmates_attack_count = {}
    closematches_count = {}
    closewins_count = {}
    closewinsplayed_count = {}
    
    for mate in players_names:
        mate_count[mate] = 0 # set counter to 0
        mateplayed_count[mate] = 0
        winmate_count[mate] = 0
        mates_attack_count[mate] = 0
        winmates_attack_count[mate] = 0
        closematches_count[mate] = 0
        closewins_count[mate] = 0
        closewinsplayed_count[mate] = 0

    # Possible lineups
    lineups = [['Jugador 1', 'Jugador 2'], ['Jugador 2', 'Jugador 1'], ['Jugador 3', 'Jugador 4'], ['Jugador 4', 'Jugador 3']]

    # Calculate number of playing counts and victories
    for lineup in lineups:
        lineup_df = data_df[data_df[lineup[0]] == player] # pick games when player plays in position lineup[0]
        teammate_list = lineup_df[lineup[1]] # pick teammate name when player plays in position lineup[1]
        teammate_list_counts = teammate_list.value_counts() # count how many team occurences (teammate in position lineup[1])

        for mate in teammate_list_counts.keys():
            mate_count[mate] = mate_count.get(mate, 0) + teammate_list_counts.loc[mate] # store the teammate join number

            mask_mate = (teammate_list == mate) # mask selecting the games that involve 'mate'

            close_victories = 0 # initialize close victories
            close_losses = 0

            # For the overall games (regardless of position)
            if lineup in [lineups[0], lineups[1]]: # pick lineups playing as local
                if lineup == lineups[1]: # if the game lineup had the player as attacker
                    mates_attack_count[mate] = mates_attack_count.get(mate, 0) + teammate_list_counts.loc[mate] # store occurrences
                
                number_victories = (lineup_df['Local'][mask_mate] > lineup_df['Visitant'][mask_mate]).sum() # calculate number of victories

                if number_victories: # if the game was won
                    winmate_count[mate] = winmate_count.get(mate, 0) + number_victories # add number of victories

                    if lineup == lineups[1]: # if the game was won with player playing as attacker
                        winmates_attack_count[mate] = winmates_attack_count.get(mate, 0) + number_victories

                close_victories += ((lineup_df['Local'][mask_mate] - lineup_df['Visitant'][mask_mate]) == 1).sum() # calculate number of close victories (3-2)
                print(((lineup_df['Local'][mask_mate] - lineup_df['Visitant'][mask_mate]) == 1).sum())
                print(player, mate, close_victories)
                close_losses += ((lineup_df['Visitant'][mask_mate] - lineup_df['Local'][mask_mate]) == 1).sum() # calculate number of close losses (2-3)

            elif lineup in [lineups[2], lineups[3]]: # pick lineups playing as visitor    
                if lineup == lineups[3]: # if the game lineup had the player as attacker
                    mates_attack_count[mate] = mates_attack_count.get(mate, 0) + teammate_list_counts.loc[mate] # store occurrences
                number_victories = (lineup_df['Local'][mask_mate] < lineup_df['Visitant'][mask_mate]).sum() # calculate number of victories
                if number_victories: # if the game was won
                    winmate_count[mate] = winmate_count.get(mate, 0) + number_victories # add number of victories
                   
                    if lineup == lineups[3]: # if the game was won with player playing as attacker
                        winmates_attack_count[mate] = winmates_attack_count.get(mate, 0) + number_victories

                close_victories += ((lineup_df['Visitant'][mask_mate] - lineup_df['Local'][mask_mate]) == 1).sum() # calculate number of close victories (3-2)
                close_losses += ((lineup_df['Local'][mask_mate] - lineup_df['Visitant'][mask_mate]) == 1).sum() # calculate number of close losses (2-3)
                print(player, mate, close_victories)

            # Store close victories and losses
            if (close_victories + close_losses) == 0: # if there are no close victories or losses
                closematches_count[mate] = 0
                closewins_count[mate] = np.nan # set to 0
                closewinsplayed_count[mate] = np.nan
            else: # if there are close victories or losses
                closematches_count[mate] = close_victories + close_losses # total number of close victories and losses with this mate
                closewins_count[mate] = close_victories # total number of close victories with this mate
                closewinsplayed_count[mate] = close_victories / (close_victories + close_losses) # relative number of close victories

            # If we take into acount player position
            #if lineup in [lineups[0], lineups[2]]: # pick lineups playing as defender
 #               number_victories = lineup_df[

                
    # Divide count by number of games played by player
    for mate in mate_count.keys():   
        if played_games.sel(player = player).values == 0:
            continue
        else:
            mateplayed_count[mate] = mate_count.get(mate, 0) / played_games.sel(player = player).values

    # Append this player result to the overall property dataframe
    mates_df = pd.concat([mates_df, pd.DataFrame([mate_count])])
    matesplayed_df = pd.concat([matesplayed_df, pd.DataFrame([mateplayed_count])])
    winmates_df = pd.concat([winmates_df, pd.DataFrame([winmate_count])])
    mates_attack_df = pd.concat([mates_attack_df, pd.DataFrame([mates_attack_count])])
    winmates_attack_df = pd.concat([winmates_attack_df, pd.DataFrame([winmates_attack_count])])
    closematches_df = pd.concat([closematches_df, pd.DataFrame([closematches_count])])
    closewins_df = pd.concat([closewins_df, pd.DataFrame([closewins_count])])
    closewinsplayed_df = pd.concat([closewinsplayed_df, pd.DataFrame([closewinsplayed_count])])

# Transpose to match dimesnions with xarray dimensions (x: player, y: teammate)
matesplayed_df = matesplayed_df.transpose() # necessary if matrix is not symmetric
#closematches_df = closematches_df.transpose()
#closewins_df = closewins_df.transpose()

# Set index to teammate name
mates_df = mates_df.set_index(players_names)
winmates_df = winmates_df.set_index(players_names)
mates_attack_df = mates_attack_df.set_index(players_names)
winmates_attack_df = winmates_attack_df.set_index(players_names)
closematches_df = closematches_df.set_index(players_names)
closewins_df = closewins_df.set_index(players_names)
closewinsplayed_df = closewinsplayed_df.set_index(players_names)

# Create win / played ratio for each team (substitute 0 in the denominator by NaN, then recover 0 in the result)
winmatesplayed_df = winmates_df.div(mates_df.replace(0, pd.NA)).replace(pd.NA, 0)


2
Dani Pau 2
Luis Pedro 1
2
Pau Dani 2
Pedro Luis 1


  matesplayed_df = pd.concat([matesplayed_df, pd.DataFrame([mateplayed_count])])
  closewinsplayed_df = pd.concat([closewinsplayed_df, pd.DataFrame([closewinsplayed_count])])
  winmatesplayed_df = winmates_df.div(mates_df.replace(0, pd.NA)).replace(pd.NA, 0)


In [20]:
closewinsplayed_df

Unnamed: 0,Dani,Luis,Pau,Pedro
Dani,0.0,0.0,0.666667,0.0
Luis,0.0,0.0,0.0,0.333333
Pau,0.666667,0.0,0.0,0.0
Pedro,0.0,0.333333,0.0,0.0


In [7]:
# Creem una DataArray de xarray. Hi especifiquem els noms de cada dimensió
mates_da = xr.DataArray(mates_df.values, dims = ('teammate', 'player'),
                                      coords = {'teammate': mates_df.index, 'player': mates_df.columns})
matesplayed_da = xr.DataArray(matesplayed_df.values, dims = ('teammate', 'player'),
                                      coords = {'teammate': mates_df.index, 'player': mates_df.columns})
winmates_da = xr.DataArray(winmates_df.values, dims = ('teammate', 'player'),
                                      coords = {'teammate': winmates_df.index, 'player': winmates_df.columns})
winmatesplayed_da = xr.DataArray(winmatesplayed_df.values, dims = ('teammate', 'player'),
                                      coords = {'teammate': winmatesplayed_df.index, 'player': winmatesplayed_df.columns})
closewins_da = xr.DataArray(closewins_df.values, dims = ('teammate', 'player'),
                                      coords = {'teammate': winmatesplayed_df.index, 'player': winmatesplayed_df.columns})


# Combinem tots els DataArrays a un únic Dataset de xarray (cal que les coords siguin les mateixes per a tots)
dataset = xr.Dataset({"Teammates": mates_da,
                      "TeammatesPlayed": matesplayed_da,
                      "TeammatesWins": winmates_da,
                      "TeammatesWinsPlayed": winmatesplayed_da,
                      "CloseWinsRatio": closewins_da})

# TODO: el procés de crear el DataArray a partir del DataFrame es pot automatitzar amb una funció que faci un concat al dataframe. 

# Sote dataset
dataset.to_netcdf('teammates.nc', mode='w')

# dataset['goals'] = goals_da # si volem afegir un nou element
dataset