In [50]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
import warnings

warnings.filterwarnings("ignore")

In [70]:
# Load datasets
table = pd.read_csv('../scraping/tables/variable_data.csv')
laliga = pd.read_csv('../scraping/tables/laliga/laliga_all_seasons.csv', index_col=0)

# Set column names for `laliga` from the first row of `table` (excluding the first column)
laliga.columns = table.iloc[0, 1:]

# Get columns to keep based on the second row of `table`
columns_to_keep = table.T.loc[table.iloc[1] == '1'].iloc[:, 0]

# Remove duplicate columns and keep only the desired ones
laliga = laliga.loc[:, ~laliga.columns.duplicated() & laliga.columns.isin(columns_to_keep)]

# Reorder columns: move key identifiers to the front
columns_to_front = ['season_id', 'match_id', 'team_name', 'team_id']
laliga = laliga[columns_to_front + [col for col in laliga.columns if col not in columns_to_front]]

# Split the position and save the 1st one
laliga['Position'] = laliga['Position'].str.split(',').str[0].str.strip()
# Resulting DataFrame
laliga

Unnamed: 0,season_id,match_id,team_name,team_id,Player Name,Shirt Number,Nationality,Position,Age at Season Start,Minutes Played,...,Aerial Duels Won,Shots on Target Against,Goals Against,Saves,Save Percentage,Post-Shot Expected Goals (PSxG),Opponent Crosses Faced,Crosses Stopped,Crosses Stopped Percentage,Defensive Actions Outside Penalty Area
0,1,1,Almería,H,Adri Embarba,10.0,es ESP,LW,31-096,90,...,1,,,,,,,,,
1,1,1,Almería,H,Alejandro Pozo Pozo,17.0,es ESP,RB,24-170,65,...,1,,,,,,,,,
2,1,1,Almería,H,Diego Mariño,1.0,es ESP,GK,33-094,90,...,0,4.0,2.0,2.0,100.0,2.1,6.0,0.0,0.0,2.0
3,1,1,Almería,H,Dion Lopy,6.0,sn SEN,CM,21-190,13,...,0,,,,,,,,,
4,1,1,Almería,H,Gonzalo Melero,11.0,es ESP,CM,29-221,14,...,0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79170,7,380,Real Sociedad,A,Raúl Navas,22.0,es ESP,CB,30-009,90,...,0,,,,,,,,,
79171,7,380,Real Sociedad,A,Sergio Canales,16.0,es ESP,RW,27-093,28,...,0,,,,,,,,,
79172,7,380,Real Sociedad,A,Willian José,12.0,br BRA,FW,26-178,90,...,1,,,,,,,,,
79173,7,380,Real Sociedad,A,Xabi Prieto,10.0,es ESP,DM,34-264,2,...,0,,,,,,,,,


In [53]:
# try to solve
w = {
    "Gls": {"defensa": 0.05, "medio": 0.06, "ataque": 0.18},
    "Ast": {"defensa": 0.08, "medio": 0.14, "ataque": 0.1},
    "PK": {"defensa": 0.0, "medio": 0.02, "ataque": 0.04},
    # "PKatt": {"defensa": 0.0, "medio": 0.02, "ataque": 0.04},
    "Sh": {"defensa": 0.01, "medio": 0.04, "ataque": 0.1},
    "SoT": {"defensa": 0.02, "medio": 0.04, "ataque": 0.12},
    # "CrdY": {"defensa": 0.04, "medio": 0.03, "ataque": 0.02},
    # "CrdR": {"defensa": 0.05, "medio": 0.03, "ataque": 0.01},
    "Touches": {"defensa": 0.06, "medio": 0.07, "ataque": 0.04},
    "Tkl": {"defensa": 0.18, "medio": 0.08, "ataque": 0.01},
    "Int": {"defensa": 0.18, "medio": 0.05, "ataque": 0.01},
    "Blocks": {"defensa": 0.14, "medio": 0.04, "ataque": 0.01},
    # "xG": {"defensa": 0.01, "medio": 0.03, "ataque": 0.1},
    # "npxG": {"defensa": 0.01, "medio": 0.03, "ataque": 0.1},
    # "xAG": {"defensa": 0.01, "medio": 0.04, "ataque": 0.1},
    "SCA": {"defensa": 0.02, "medio": 0.07, "ataque": 0.09},
    "GCA": {"defensa": 0.01, "medio": 0.05, "ataque": 0.11},
    "Cmp": {"defensa": 0.08, "medio": 0.14, "ataque": 0.04},
    # "Att": {"defensa": 0.06, "medio": 0.06, "ataque": 0.03},
    "Cmp%": {"defensa": 0.07, "medio": 0.07, "ataque": 0.02},
    "PrgP": {"defensa": 0.05, "medio": 0.08, "ataque": 0.05},
    # "Carries": {"defensa": 0.04, "medio": 0.06, "ataque": 0.05},
    # "PrgC": {"defensa": 0.03, "medio": 0.06, "ataque": 0.06},
    "Succ": {"defensa": 0.05, "medio": 0.05, "ataque": 0.08}
}

# Verificación de que los pesos sumen 1 por posición
for position in ['defensa', 'medio', 'ataque']:
    total = sum(w[stat][position] for stat in w)
    print(f"Total para {position}: {total}")

Total para defensa: 1.0000000000000002
Total para medio: 1.0
Total para ataque: 1.0000000000000002


In [60]:
w2 = pd.read_csv("../scraping/tables/variable_data.csv")
w2
w2 = w2.transpose()
w2

columns_to_keep = w2[w2[1]=='1'].reset_index()
columns_to_keep = columns_to_keep.iloc[:,1:6]

columns_to_keep.set_index(0)
columns_to_keep.drop(1, axis=1, inplace=True)
columns_to_keep.columns = ["name","def", "mid", "off"]
w2 = columns_to_keep
w2 = w2.dropna()

w2["def"] = w2["def"].astype(int)
w2["mid"] = w2["mid"].astype(int)
w2["off"] = w2["off"].astype(int)



w2["def_w"] = w2["def"] / w2["def"].sum()
w2["mid_w"] = w2["mid"] / w2["mid"].sum()
w2["off_w"] = w2["off"] / w2["off"].sum()
w2

Unnamed: 0,name,def,mid,off,def_w,mid_w,off_w
6,Goals Scored,1,2,3,0.014925,0.02439,0.038462
7,Assists Provided,2,3,3,0.029851,0.036585,0.038462
8,Penalty Kicks Made,1,1,1,0.014925,0.012195,0.012821
9,Total Shots,1,2,3,0.014925,0.02439,0.038462
10,Shots on Target,1,2,3,0.014925,0.02439,0.038462
11,Non-Penalty Expected Goals (npxG),1,2,3,0.014925,0.02439,0.038462
12,Shot-Creating Actions,2,3,3,0.029851,0.036585,0.038462
13,Goal-Creating Actions,2,3,3,0.029851,0.036585,0.038462
14,Total Passes Completed,2,3,2,0.029851,0.036585,0.025641
15,Total Passes Attempted,1,2,1,0.014925,0.02439,0.012821


In [63]:
import pandas as pd

# Supongamos que tienes un DataFrame llamado 'df'
# A continuación, el código para convertirlo al formato deseado:

w = {
    row['name']: {
        'def': row['def_w'],
        'mid': row['mid_w'],
        'off': row['off_w']
    }
    for _, row in w2.iterrows()
}

w


{'Goals Scored': {'def': 0.014925373134328358,
  'mid': 0.024390243902439025,
  'off': 0.038461538461538464},
 'Assists Provided': {'def': 0.029850746268656716,
  'mid': 0.036585365853658534,
  'off': 0.038461538461538464},
 'Penalty Kicks Made': {'def': 0.014925373134328358,
  'mid': 0.012195121951219513,
  'off': 0.01282051282051282},
 'Total Shots': {'def': 0.014925373134328358,
  'mid': 0.024390243902439025,
  'off': 0.038461538461538464},
 'Shots on Target': {'def': 0.014925373134328358,
  'mid': 0.024390243902439025,
  'off': 0.038461538461538464},
 'Non-Penalty Expected Goals (npxG)': {'def': 0.014925373134328358,
  'mid': 0.024390243902439025,
  'off': 0.038461538461538464},
 'Shot-Creating Actions': {'def': 0.029850746268656716,
  'mid': 0.036585365853658534,
  'off': 0.038461538461538464},
 'Goal-Creating Actions': {'def': 0.029850746268656716,
  'mid': 0.036585365853658534,
  'off': 0.038461538461538464},
 'Total Passes Completed': {'def': 0.029850746268656716,
  'mid': 0.03

In [112]:
medias = {
    "Gls": {"defensa": 0.02, "medio": 0.1, "ataque": 0.7},
    "Ast": {"defensa": 0.03, "medio": 0.15, "ataque": 0.25},
    "PK": {"defensa": 0.0, "medio": 0.02, "ataque": 0.05},
    # "PKatt": {"defensa": 0.0, "medio": 0.03, "ataque": 0.06},
    "Sh": {"defensa": 0.2, "medio": 0.8, "ataque": 3.5},
    "SoT": {"defensa": 0.1, "medio": 0.4, "ataque": 1.5},
    # "CrdY": {"defensa": 0.2, "medio": 0.15, "ataque": 0.1},
    # "CrdR": {"defensa": 0.03, "medio": 0.02, "ataque": 0.01},
    "Touches": {"defensa": 50, "medio": 80, "ataque": 40},
    "Tkl": {"defensa": 2.5, "medio": 1.5, "ataque": 0.5},
    "Int": {"defensa": 1.8, "medio": 1.0, "ataque": 0.3},
    "Blocks": {"defensa": 1.2, "medio": 0.5, "ataque": 0.1},
    # "xG": {"defensa": 0.02, "medio": 0.12, "ataque": 0.5},
    # "npxG": {"defensa": 0.01, "medio": 0.1, "ataque": 0.4},
    # "xAG": {"defensa": 0.01, "medio": 0.14, "ataque": 0.3},
    "SCA": {"defensa": 0.2, "medio": 2.0, "ataque": 3.5},
    "GCA": {"defensa": 0.1, "medio": 1.2, "ataque": 2.5},
    "Cmp": {"defensa": 40, "medio": 55, "ataque": 30},
    # "Att": {"defensa": 50, "medio": 70, "ataque": 35},
    "Cmp%": {"defensa": 0.85, "medio": 0.83, "ataque": 0.78},
    "PrgP": {"defensa": 2.0, "medio": 5.0, "ataque": 3.0},
    # "Carries": {"defensa": 10, "medio": 20, "ataque": 15},
    # "PrgC": {"defensa": 3.0, "medio": 7.0, "ataque": 4.5},
    "Succ": {"defensa": 0.5, "medio": 1.2, "ataque": 2.0}
}


In [113]:
media_y_desv = {
    "medias": {"defensa": 7, "medio": 7, "ataque": 2},
    "desviaciones estándar": {"defensa": 2, "medio": 2, "ataque": 2},
}

In [67]:
import numpy as np
positions = laliga["Position"].unique()
unique_positions = np.unique(np.concatenate([pos.split(",") for pos in positions]))
unique_positions

array(['AM', 'CB', 'CM', 'DM', 'FW', 'GK', 'LB', 'LM', 'LW', 'RB', 'RM',
       'RW', 'WB'], dtype='<U2')

In [None]:
attack = ['FW', 'LW', 'RW']
midfield = ['AM', 'CM', 'DM', 'LM', 'RM']
defense = ['CB', 'LB', 'RB', 'WB']
goalkeeper = ['GK']

In [114]:
posiciones_ataque = ["FW"]

posiciones_medio = ["LM", "RM", "CM", "RM,LM"]

posiciones_defensa = ["LB", "CB", "RB"]

In [115]:
posiciones_ataque = ["FW", "RW", "LW", "AM"]

posiciones_medio = ["LM", "RM", "CM", "RM,LM", "DM"]

posiciones_defensa = ["LB", "CB", "RB", "DM,AM", "DM"]  

posiciones_especiales = ["GK"]

posiciones_sin_especificar = ["NaN"]

In [116]:
rm = list_tables[0] #cambiar por "7" para el barça
rm.columns = rm.columns.droplevel(0)
rm = rm.set_index("Player")
rm = rm[:-1]

In [117]:
def define_group(position):
    if position in posiciones_ataque:
        return "ataque"
    if position in posiciones_medio:
        return "medio"
    if position in posiciones_defensa:
        return "defensa"

In [118]:
for player, stat in rm.iterrows():
    group = define_group(rm.loc[player,"Pos"])
    rm.loc[player, "Group"] = group

In [119]:
rm

Unnamed: 0_level_0,#,Nation,Pos,Age,Min,Gls,Ast,PK,PKatt,Sh,...,GCA,Cmp,Att,Cmp%,PrgP,Carries,PrgC,Att,Succ,Group
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Vinicius Júnior,7.0,br BRA,FW,24-106,90,0,0,0,0,2,...,0,8,15,53.3,1,24,4,7,1,ataque
Kylian Mbappé,9.0,fr FRA,FW,25-311,90,0,0,0,0,3,...,0,13,15,86.7,1,17,2,4,1,ataque
Eduardo Camavinga,6.0,fr FRA,LM,21-351,76,0,0,0,0,0,...,0,24,29,82.8,5,20,2,1,1,medio
Brahim Díaz,21.0,ma MAR,RM,25-084,14,0,0,0,0,0,...,0,2,2,100.0,0,3,0,0,0,medio
Aurélien Tchouaméni,14.0,fr FRA,CM,24-273,62,0,0,0,0,0,...,0,13,15,86.7,2,7,0,1,0,medio
Luka Modrić,10.0,hr CRO,CM,39-047,28,0,0,0,0,1,...,0,14,19,73.7,2,10,1,0,0,medio
Federico Valverde,8.0,uy URU,CM,26-096,90,0,0,0,0,1,...,0,22,38,57.9,1,22,0,2,1,medio
Jude Bellingham,5.0,eng ENG,"RM,LM",21-119,90,0,0,0,0,1,...,0,13,16,81.3,1,15,0,0,0,medio
Ferland Mendy,23.0,fr FRA,LB,29-140,85,0,0,0,0,0,...,0,34,43,79.1,2,29,0,0,0,defensa
Fran Garcia,20.0,es ESP,LB,25-073,5,0,0,0,0,0,...,0,1,1,100.0,0,0,0,0,0,defensa


In [120]:
for player, stat in rm.iterrows():
    # puntuacion_inicial = sum(w[])
    group = define_group(rm.loc[player,"Pos"])


In [121]:
for player, stat in rm.iterrows():
    print(stat.index)
    print("******************")

Index(['#', 'Nation', 'Pos', 'Age', 'Min', 'Gls', 'Ast', 'PK', 'PKatt', 'Sh',
       'SoT', 'CrdY', 'CrdR', 'Touches', 'Tkl', 'Int', 'Blocks', 'xG', 'npxG',
       'xAG', 'SCA', 'GCA', 'Cmp', 'Att', 'Cmp%', 'PrgP', 'Carries', 'PrgC',
       'Att', 'Succ', 'Group'],
      dtype='object')
******************
Index(['#', 'Nation', 'Pos', 'Age', 'Min', 'Gls', 'Ast', 'PK', 'PKatt', 'Sh',
       'SoT', 'CrdY', 'CrdR', 'Touches', 'Tkl', 'Int', 'Blocks', 'xG', 'npxG',
       'xAG', 'SCA', 'GCA', 'Cmp', 'Att', 'Cmp%', 'PrgP', 'Carries', 'PrgC',
       'Att', 'Succ', 'Group'],
      dtype='object')
******************
Index(['#', 'Nation', 'Pos', 'Age', 'Min', 'Gls', 'Ast', 'PK', 'PKatt', 'Sh',
       'SoT', 'CrdY', 'CrdR', 'Touches', 'Tkl', 'Int', 'Blocks', 'xG', 'npxG',
       'xAG', 'SCA', 'GCA', 'Cmp', 'Att', 'Cmp%', 'PrgP', 'Carries', 'PrgC',
       'Att', 'Succ', 'Group'],
      dtype='object')
******************
Index(['#', 'Nation', 'Pos', 'Age', 'Min', 'Gls', 'Ast', 'PK', 'PKatt', 'Sh',
 

In [124]:
for player, stat in rm.iterrows():
    print(f"calculando puntuación inicial para {player}")
    
    group = define_group(rm.loc[player,"Pos"])

    n_to_sum = []

    for i in stat.keys():
        print(f"calculando {i} para {player}, que es un {rm.loc[player,'Pos']}")
        if i in w and rm.loc[player, "Pos"] != "GK":
            print(f"dividir {stat[i]} entre {medias[i][group]}")
            if medias[i][group] != 0:
                step_1 = stat[i] / medias[i][group]
            else:
                step_1 = 0
            print(step_1)
            step_2 = w[i][group] * step_1
            n_to_sum.append(step_2)
    print(f"lista de puntuaciones para cada estadística: {n_to_sum}")
    if rm.loc[player, "Pos"] != "GK":
        puntuacion_inicial = sum(n_to_sum)
        print(f"puntuación incial (sin normalizar por posiciones) para {player} es {puntuacion_inicial}")
        rm.loc[player, "Puntuación Inicial"] = puntuacion_inicial
        puntuacion_normalizada = (puntuacion_inicial / media_y_desv["medias"][group]) / media_y_desv["desviaciones estándar"][group]
        puntuacion_normalizada = puntuacion_normalizada * 10
        print(f"puntuación normalizada para {player} es {puntuacion_normalizada}")
        rm.loc[player, "Puntuación Normalizada"] = puntuacion_normalizada
    print("**********")


calculando puntuación inicial para Vinicius Júnior
calculando # para Vinicius Júnior, que es un FW
calculando Nation para Vinicius Júnior, que es un FW
calculando Pos para Vinicius Júnior, que es un FW
calculando Age para Vinicius Júnior, que es un FW
calculando Min para Vinicius Júnior, que es un FW
calculando Gls para Vinicius Júnior, que es un FW
dividir 0 entre 0.7
0.0
calculando Ast para Vinicius Júnior, que es un FW
dividir 0 entre 0.25
0.0
calculando PK para Vinicius Júnior, que es un FW
dividir 0 entre 0.05
0.0
calculando PKatt para Vinicius Júnior, que es un FW
calculando Sh para Vinicius Júnior, que es un FW
dividir 2 entre 3.5
0.5714285714285714
calculando SoT para Vinicius Júnior, que es un FW
dividir 0 entre 1.5
0.0
calculando CrdY para Vinicius Júnior, que es un FW
calculando CrdR para Vinicius Júnior, que es un FW
calculando Touches para Vinicius Júnior, que es un FW
dividir 30 entre 40
0.75
calculando Tkl para Vinicius Júnior, que es un FW
dividir 0 entre 0.5
0.0
calcul

In [125]:
rm

Unnamed: 0_level_0,#,Nation,Pos,Age,Min,Gls,Ast,PK,PKatt,Sh,...,Att,Cmp%,PrgP,Carries,PrgC,Att,Succ,Group,Puntuación Inicial,Puntuación Normalizada
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Vinicius Júnior,7.0,br BRA,FW,24-106,90,0,0,0,0,2,...,15,53.3,1,24,4,7,1,ataque,1.698286,4.245714
Kylian Mbappé,9.0,fr FRA,FW,25-311,90,0,0,0,0,3,...,15,86.7,1,17,2,4,1,ataque,2.69922,6.748049
Eduardo Camavinga,6.0,fr FRA,LM,21-351,76,0,0,0,0,0,...,29,82.8,5,20,2,1,1,medio,7.641598,5.458285
Brahim Díaz,21.0,ma MAR,RM,25-084,14,0,0,0,0,0,...,2,100.0,0,3,0,0,0,medio,8.495659,6.068328
Aurélien Tchouaméni,14.0,fr FRA,CM,24-273,62,0,0,0,0,0,...,15,86.7,2,7,0,1,0,medio,7.635597,5.453998
Luka Modrić,10.0,hr CRO,CM,39-047,28,0,0,0,0,1,...,19,73.7,2,10,1,0,0,medio,6.507549,4.648249
Federico Valverde,8.0,uy URU,CM,26-096,90,0,0,0,0,1,...,38,57.9,1,22,0,2,1,medio,5.451258,3.893755
Jude Bellingham,5.0,eng ENG,"RM,LM",21-119,90,0,0,0,0,1,...,16,81.3,1,15,0,0,0,medio,7.171884,5.122774
Ferland Mendy,23.0,fr FRA,LB,29-140,85,0,0,0,0,0,...,43,79.1,2,29,0,0,0,defensa,6.958118,4.970084
Fran Garcia,20.0,es ESP,LB,25-073,5,0,0,0,0,0,...,1,100.0,0,0,0,0,0,defensa,8.414094,6.010067
