In [63]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
import warnings

warnings.filterwarnings("ignore")

In [64]:
# Load datasets
table = pd.read_csv('../scraping/tables/variable_data.csv')
laliga = pd.read_csv('../scraping/tables/laliga/laliga_all_seasons.csv', index_col=0)

# Set column names for `laliga` from the first row of `table` (excluding the first column)
laliga.columns = table.iloc[0, 1:]

# Get columns to keep based on the second row of `table`
columns_to_keep = table.T.loc[table.iloc[1] == '1'].iloc[:, 0]

# Remove duplicate columns and keep only the desired ones
laliga = laliga.loc[:, ~laliga.columns.duplicated() & laliga.columns.isin(columns_to_keep)]

# Reorder columns: move key identifiers to the front
columns_to_front = ['season_id', 'match_id', 'team_name', 'team_id']
laliga = laliga[columns_to_front + [col for col in laliga.columns if col not in columns_to_front]]

# Split the position and save the 1st one
laliga['Position'] = laliga['Position'].str.split(',').str[0].str.strip()
# Resulting DataFrame
laliga

Unnamed: 0,season_id,match_id,team_name,team_id,Player Name,Shirt Number,Nationality,Position,Age at Season Start,Minutes Played,...,Aerial Duels Won,Shots on Target Against,Goals Against,Saves,Save Percentage,Post-Shot Expected Goals (PSxG),Opponent Crosses Faced,Crosses Stopped,Crosses Stopped Percentage,Defensive Actions Outside Penalty Area
0,1,1,Almería,H,Adri Embarba,10.0,es ESP,LW,31-096,90,...,1,,,,,,,,,
1,1,1,Almería,H,Alejandro Pozo Pozo,17.0,es ESP,RB,24-170,65,...,1,,,,,,,,,
2,1,1,Almería,H,Diego Mariño,1.0,es ESP,GK,33-094,90,...,0,4.0,2.0,2.0,100.0,2.1,6.0,0.0,0.0,2.0
3,1,1,Almería,H,Dion Lopy,6.0,sn SEN,CM,21-190,13,...,0,,,,,,,,,
4,1,1,Almería,H,Gonzalo Melero,11.0,es ESP,CM,29-221,14,...,0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79170,7,380,Real Sociedad,A,Raúl Navas,22.0,es ESP,CB,30-009,90,...,0,,,,,,,,,
79171,7,380,Real Sociedad,A,Sergio Canales,16.0,es ESP,RW,27-093,28,...,0,,,,,,,,,
79172,7,380,Real Sociedad,A,Willian José,12.0,br BRA,FW,26-178,90,...,1,,,,,,,,,
79173,7,380,Real Sociedad,A,Xabi Prieto,10.0,es ESP,DM,34-264,2,...,0,,,,,,,,,


In [65]:
w2 = pd.read_csv("../scraping/tables/variable_data.csv")
w2
w2 = w2.transpose()
w2

columns_to_keep = w2[w2[1]=='1'].reset_index()
columns_to_keep = columns_to_keep.iloc[:,1:6]

columns_to_keep.set_index(0)
columns_to_keep.drop(1, axis=1, inplace=True)
columns_to_keep.columns = ["name","def", "mid", "off"]
w2 = columns_to_keep
w2 = w2.dropna()

w2["def"] = w2["def"].astype(int)
w2["mid"] = w2["mid"].astype(int)
w2["off"] = w2["off"].astype(int)

w2["def_w"] = w2["def"] / w2["def"].sum()
w2["mid_w"] = w2["mid"] / w2["mid"].sum()
w2["off_w"] = w2["off"] / w2["off"].sum()
w2

Unnamed: 0,name,def,mid,off,def_w,mid_w,off_w
6,Goals Scored,1,2,3,0.014925,0.02439,0.038462
7,Assists Provided,2,3,3,0.029851,0.036585,0.038462
8,Penalty Kicks Made,1,1,1,0.014925,0.012195,0.012821
9,Total Shots,1,2,3,0.014925,0.02439,0.038462
10,Shots on Target,1,2,3,0.014925,0.02439,0.038462
11,Non-Penalty Expected Goals (npxG),1,2,3,0.014925,0.02439,0.038462
12,Shot-Creating Actions,2,3,3,0.029851,0.036585,0.038462
13,Goal-Creating Actions,2,3,3,0.029851,0.036585,0.038462
14,Total Passes Completed,2,3,2,0.029851,0.036585,0.025641
15,Total Passes Attempted,1,2,1,0.014925,0.02439,0.012821


In [66]:
import pandas as pd

# Supongamos que tienes un DataFrame llamado 'df'
# A continuación, el código para convertirlo al formato deseado:

w = {
    row['name']: {
        'def': row['def_w'],
        'mid': row['mid_w'],
        'off': row['off_w']
    }
    for _, row in w2.iterrows()
}

w


{'Goals Scored': {'def': 0.014925373134328358,
  'mid': 0.024390243902439025,
  'off': 0.038461538461538464},
 'Assists Provided': {'def': 0.029850746268656716,
  'mid': 0.036585365853658534,
  'off': 0.038461538461538464},
 'Penalty Kicks Made': {'def': 0.014925373134328358,
  'mid': 0.012195121951219513,
  'off': 0.01282051282051282},
 'Total Shots': {'def': 0.014925373134328358,
  'mid': 0.024390243902439025,
  'off': 0.038461538461538464},
 'Shots on Target': {'def': 0.014925373134328358,
  'mid': 0.024390243902439025,
  'off': 0.038461538461538464},
 'Non-Penalty Expected Goals (npxG)': {'def': 0.014925373134328358,
  'mid': 0.024390243902439025,
  'off': 0.038461538461538464},
 'Shot-Creating Actions': {'def': 0.029850746268656716,
  'mid': 0.036585365853658534,
  'off': 0.038461538461538464},
 'Goal-Creating Actions': {'def': 0.029850746268656716,
  'mid': 0.036585365853658534,
  'off': 0.038461538461538464},
 'Total Passes Completed': {'def': 0.029850746268656716,
  'mid': 0.03

In [67]:
media_y_desv = {
    "medias": {"def": 7, "mid": 7, "off": 2},
    "desviaciones estándar": {"def": 2, "mid": 2, "off": 2},
}

In [68]:
import numpy as np
positions = laliga["Position"].unique()
unique_positions = np.unique(np.concatenate([pos.split(",") for pos in positions]))
unique_positions

array(['AM', 'CB', 'CM', 'DM', 'FW', 'GK', 'LB', 'LM', 'LW', 'RB', 'RM',
       'RW', 'WB'], dtype='<U2')

In [69]:
off_pos = ['FW', 'LW', 'RW']
mid_pos = ['AM', 'CM', 'DM', 'LM', 'RM']
def_pos = ['CB', 'LB', 'RB', 'WB']
gk_pos = ['GK']

In [70]:
posiciones_especiales = ["GK"]

In [71]:
def define_group(position):
    if position in off_pos:
        return "off"
    if position in mid_pos:
        return "mid"
    if position in def_pos:
        return "def"

In [None]:
for player, stat in laliga.iterrows():
    group = define_group(laliga.loc[player,"Position"])
    laliga.loc[player, "Group"] = group

In [None]:
laliga.head(5)

Unnamed: 0,season_id,match_id,team_name,team_id,Player Name,Shirt Number,Nationality,Position,Age at Season Start,Minutes Played,...,Shots on Target Against,Goals Against,Saves,Save Percentage,Post-Shot Expected Goals (PSxG),Opponent Crosses Faced,Crosses Stopped,Crosses Stopped Percentage,Defensive Actions Outside Penalty Area,Group
0,1,1,Almería,H,Adri Embarba,10.0,es ESP,LW,31-096,90,...,,,,,,,,,,off
1,1,1,Almería,H,Alejandro Pozo Pozo,17.0,es ESP,RB,24-170,65,...,,,,,,,,,,def
2,1,1,Almería,H,Diego Mariño,1.0,es ESP,GK,33-094,90,...,4.0,2.0,2.0,100.0,2.1,6.0,0.0,0.0,2.0,
3,1,1,Almería,H,Dion Lopy,6.0,sn SEN,CM,21-190,13,...,,,,,,,,,,mid
4,1,1,Almería,H,Gonzalo Melero,11.0,es ESP,CM,29-221,14,...,,,,,,,,,,mid


In [None]:
# laliga.insert(0, 'ID', range(len(laliga)))

laliga.columns

new = laliga

new = new.drop(new.columns[-10:-1], axis=1)

new = new.drop(new.columns[0:10], axis=1)

df_grouped = new.groupby('Group').mean().iloc[:, 1:]

medias = {col: df_grouped[col].to_dict() for col in df_grouped.columns}

In [None]:
def compute_score(row):
    score = 0
    for col in new.columns:
        if col == "Group":
            continue
        group = row["Group"]
        score += (row[col] / medias[col][group]) * w[col][group]
    return score

new["Score"] = new.apply(compute_score, axis=1)

KeyError: 'Goals Scored'