In [171]:
from functools import reduce
import time
import numpy as np
import pandas as pd
import joblib
import warnings
warnings.filterwarnings("ignore")

In [172]:
pd.set_option('display.max_columns', None)

In [173]:
def limpiar_tabla(tabla: pd.DataFrame, nombre_tabla: str) -> pd.DataFrame:
    to_keep_as_is_dict = {
        'home_standard': ['Unnamed', 'Performance', 'Expected', 'SCA', 'Carries'],
        'away_standard': ['Unnamed', 'Performance', 'Expected', 'SCA', 'Carries'],
        'home_pass': ['Unnamed'],
        'away_pass': ['Unnamed'],
        'home_pass_types': ['Unnamed', 'Pass Types', 'Corner Kicks', 'Outcomes'],
        'away_pass_types': ['Unnamed', 'Pass Types', 'Corner Kicks', 'Outcomes'],
        'home_defense': ['Unnamed'],
        'away_defense': ['Unnamed'],
        'home_possession': ['Unnamed'],
        'away_possession': ['Unnamed'],
        'home_misc': ['Unnamed', 'Performance'],
        'away_misc': ['Unnamed', 'Performance'],
        'home_gk': ['Unnamed', 'Shot Stopping'],
        'away_gk': ['Unnamed', 'Shot Stopping'],
    }
    
    to_keep_as_is = to_keep_as_is_dict.get(nombre_tabla, [])
    tabla.columns = [
        f'{c[0]}-{c[1]}' if all(x not in c[0] for x in to_keep_as_is) else c[1]
        for c in tabla.columns
    ]
    
    drop_columns_dict = {
        'home_standard': ['Touches', 'Blocks', 'Int', 'Tkl', 'CrdY', 'CrdR', 'xAG'],
        'away_standard': ['Touches', 'Blocks', 'Int', 'Tkl', 'CrdY', 'CrdR', 'xAG'],
        'home_pass': ['Total-Cmp', 'Total-Att', 'Total-Cmp%', 'Short-Cmp%', 'Medium-Cmp%', 'Long-Cmp%'],
        'away_pass': ['Total-Cmp', 'Total-Att', 'Total-Cmp%', 'Short-Cmp%', 'Medium-Cmp%', 'Long-Cmp%'],
        'home_pass_types': ['In', 'Out', 'Str', 'Cmp', 'Off', 'Blocks'],
        'away_pass_types': ['In', 'Out', 'Str', 'Cmp', 'Off', 'Blocks'],
        'home_defense': ['Tackles-Def 3rd', 'Tackles-Mid 3rd', 'Tackles-Att 3rd', 'Challenges-Tkl%', 'Tkl+Int'],
        'away_defense': ['Tackles-Def 3rd', 'Tackles-Mid 3rd', 'Tackles-Att 3rd', 'Challenges-Tkl%', 'Tkl+Int'],
        'home_possession': ['Touches-Touches', 'Touches-Live', 'Take-Ons-Succ%', 'Take-Ons-Tkld', 'Take-Ons-Tkld%'],
        'away_possession': ['Touches-Touches', 'Touches-Live', 'Take-Ons-Succ%', 'Take-Ons-Tkld', 'Take-Ons-Tkld%'],
        'home_misc': ['Crs', 'Int', 'TklW', 'Aerial Duels-Won%'],
        'away_misc': ['Crs', 'Int', 'TklW', 'Aerial Duels-Won%'],
        'home_gk': ['Save%', 'Launched-Cmp%', 'Passes-Launch%', 'Goal Kicks-Launch%', 'Crosses-Stp%'],
        'away_gk': ['Save%', 'Launched-Cmp%', 'Passes-Launch%', 'Goal Kicks-Launch%', 'Crosses-Stp%'],
    }
    
    # Eliminar las últimas 8 columnas para ciertas tablas
    if nombre_tabla in ['home_standard', 'away_standard']:
        tabla = tabla.iloc[:, :-8]
    
    # Eliminar columnas específicas
    drop_columns = drop_columns_dict.get(nombre_tabla, [])
    tabla.drop(columns=drop_columns, inplace=True, errors='ignore')
    
    return tabla

In [174]:
fecha = 1
URL = 'https://fbref.com/en/comps/21/schedule/Liga-Profesional-Argentina-Scores-and-Fixtures'
modelo = joblib.load('../modelo_puntajes/modelos/primer_modelo.pkl')

In [175]:
df = pd.read_html(URL)[0]
df.head()

Unnamed: 0,Wk,Day,Date,Time,Home,xG,Score,xG.1,Away,Attendance,Venue,Referee,Match Report,Notes
0,1.0,Fri,2024-05-10,19:00,Sarmiento,1.1,1–2,1.2,Instituto,,Estadio Eva Peron de Junín,,Match Report,
1,1.0,Sat,2024-05-11,14:30,Huracán,1.6,3–1,0.5,Defensa y Just,,Estadio Tomás Adolfo Ducó,,Match Report,
2,1.0,Sat,2024-05-11,15:30,Godoy Cruz,1.9,0–1,0.6,Barracas Central,,Estadio Feliciano Gambarte,,Match Report,
3,1.0,Sat,2024-05-11,17:30,Independiente,0.4,1–3,1.3,Talleres,,Estadio Libertadores de América,,Match Report,
4,1.0,Sat,2024-05-11,19:45,River Plate,2.6,3–0,0.3,Cen. Córdoba–SdE,,Estadio Mâs Monumental,,Match Report,


In [176]:
links = pd.read_html(URL, extract_links='body')[0]
df_fecha = df[df['Wk'] == fecha]
links_fecha = links[links['Wk'] == (f"{fecha}", None)]

In [177]:
for i in range(len(df_fecha)):
        df_fecha['Match Report'].iloc[i] = 'https://fbref.com' + links_fecha['Match Report'].iloc[i][1]

In [178]:
df_fecha.head()

Unnamed: 0,Wk,Day,Date,Time,Home,xG,Score,xG.1,Away,Attendance,Venue,Referee,Match Report,Notes
0,1.0,Fri,2024-05-10,19:00,Sarmiento,1.1,1–2,1.2,Instituto,,Estadio Eva Peron de Junín,,https://fbref.com/en/matches/4fefca20/Sarmient...,
1,1.0,Sat,2024-05-11,14:30,Huracán,1.6,3–1,0.5,Defensa y Just,,Estadio Tomás Adolfo Ducó,,https://fbref.com/en/matches/ffacd80b/Huracan-...,
2,1.0,Sat,2024-05-11,15:30,Godoy Cruz,1.9,0–1,0.6,Barracas Central,,Estadio Feliciano Gambarte,,https://fbref.com/en/matches/c22ece76/Godoy-Cr...,
3,1.0,Sat,2024-05-11,17:30,Independiente,0.4,1–3,1.3,Talleres,,Estadio Libertadores de América,,https://fbref.com/en/matches/30e8b80d/Independ...,
4,1.0,Sat,2024-05-11,19:45,River Plate,2.6,3–0,0.3,Cen. Córdoba–SdE,,Estadio Mâs Monumental,,https://fbref.com/en/matches/9c49643c/River-Pl...,


In [179]:

df_fecha['match'] = df_fecha['Home'] + ' - ' + df_fecha['Away']
df_fecha = df_fecha[['match', 'Match Report', 'Score']]

In [180]:
df_fecha.head()

Unnamed: 0,match,Match Report,Score
0,Sarmiento - Instituto,https://fbref.com/en/matches/4fefca20/Sarmient...,1–2
1,Huracán - Defensa y Just,https://fbref.com/en/matches/ffacd80b/Huracan-...,3–1
2,Godoy Cruz - Barracas Central,https://fbref.com/en/matches/c22ece76/Godoy-Cr...,0–1
3,Independiente - Talleres,https://fbref.com/en/matches/30e8b80d/Independ...,1–3
4,River Plate - Cen. Córdoba–SdE,https://fbref.com/en/matches/9c49643c/River-Pl...,3–0


In [181]:
url = df_fecha['Match Report'].iloc[0]
score = df_fecha['Score'].iloc[0]
match = df_fecha['match'].iloc[0]

In [182]:
dfs = pd.read_html(url)

In [183]:
nombres_tablas = [
        'home_standard', 'home_pass', 'home_pass_types', 'home_defense', 
        'home_possession', 'home_misc', 'home_gk',
        'away_standard', 'away_pass', 'away_pass_types', 'away_defense', 
        'away_possession', 'away_misc', 'away_gk'
    ]

In [184]:
tablas_clean = []
for tabla, nombre_tabla in zip(dfs[3:17], nombres_tablas):
    tabla_limpia = limpiar_tabla(tabla, nombre_tabla)
    # Asignar el equipo
    if 'home' in nombre_tabla:
        tabla_limpia['match'] = match
        tabla_limpia['team'] = match.split(' - ')[0]
        tabla_limpia['team_goals'] = int(score.split('–')[0])
        tabla_limpia['conceded_goals'] = int(score.split('–')[1])
        tabla_limpia['win'] = int(score.split('–')[0]) > int(score.split('–')[1])
        tabla_limpia['tie'] = int(score.split('–')[0]) == int(score.split('–')[1])
    else:
        tabla_limpia['match'] = match
        tabla_limpia['team'] = match.split(' - ')[1]
        tabla_limpia['team_goals'] = int(score.split('–')[1])
        tabla_limpia['conceded_goals'] = int(score.split('–')[0])
        tabla_limpia['win'] = int(score.split('–')[1]) > int(score.split('–')[0])
        tabla_limpia['tie'] = int(score.split('–')[1]) == int(score.split('–')[0])
    # Eliminar la última fila si no es GK
    if nombre_tabla not in ['home_gk', 'away_gk']:
        tabla_limpia.drop(tabla_limpia.index[-1], inplace=True)
    tablas_clean.append(tabla_limpia)

In [185]:
tablas_clean[0]

Unnamed: 0,Player,#,Nation,Pos,Age,Min,Gls,Ast,PK,PKatt,Sh,SoT,xG,npxG,SCA,GCA,match,team,team_goals,conceded_goals,win,tie
0,Agustín Fontana,19.0,ar ARG,"FW,LW",27-334,90,0,0,0,0,4,1,0.6,0.6,4,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
1,Joaquín Gho,28.0,ar ARG,FW,20-362,69,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
2,David Gallardo,30.0,ar ARG,RM,27-072,21,0,0,0,0,1,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
3,Yair Ezequiel Arismendi,26.0,ar ARG,LM,26-035,45,0,0,0,0,2,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
4,Manuel Mónaco,11.0,,"RW,LM",,45,0,0,0,0,2,1,0.1,0.1,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
5,Diego Calcaterra,15.0,ar ARG,CM,22-289,90,0,0,0,0,1,0,0.1,0.1,4,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
6,José Mauri,8.0,it ITA,CM,27-360,69,0,0,0,0,1,0,0.0,0.0,3,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
7,Fernando Godoy,6.0,ar ARG,CM,34-009,21,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
8,Gabriel Gudiño,20.0,ar ARG,RM,32-055,69,0,0,0,0,0,0,0.0,0.0,3,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
9,Iván Morales Bravo,18.0,cl CHI,FW,24-286,21,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False


In [186]:
def primera_posicion(pos_str):
    if pd.isna(pos_str):
        return np.nan
    return pos_str.split(',')[0]

for df in tablas_clean:
    if 'Pos' in df.columns:
        df['Pos'] = df['Pos'].apply(primera_posicion)

In [187]:
tablas_clean[0]

Unnamed: 0,Player,#,Nation,Pos,Age,Min,Gls,Ast,PK,PKatt,Sh,SoT,xG,npxG,SCA,GCA,match,team,team_goals,conceded_goals,win,tie
0,Agustín Fontana,19.0,ar ARG,FW,27-334,90,0,0,0,0,4,1,0.6,0.6,4,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
1,Joaquín Gho,28.0,ar ARG,FW,20-362,69,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
2,David Gallardo,30.0,ar ARG,RM,27-072,21,0,0,0,0,1,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
3,Yair Ezequiel Arismendi,26.0,ar ARG,LM,26-035,45,0,0,0,0,2,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
4,Manuel Mónaco,11.0,,RW,,45,0,0,0,0,2,1,0.1,0.1,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
5,Diego Calcaterra,15.0,ar ARG,CM,22-289,90,0,0,0,0,1,0,0.1,0.1,4,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
6,José Mauri,8.0,it ITA,CM,27-360,69,0,0,0,0,1,0,0.0,0.0,3,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
7,Fernando Godoy,6.0,ar ARG,CM,34-009,21,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
8,Gabriel Gudiño,20.0,ar ARG,RM,32-055,69,0,0,0,0,0,0,0.0,0.0,3,0,Sarmiento - Instituto,Sarmiento,1,2,False,False
9,Iván Morales Bravo,18.0,cl CHI,FW,24-286,21,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False


In [188]:
def merge_and_concat(tablas_clean, group_size=7) -> pd.DataFrame:
    merged_dfs = []
    for i in range(0, len(tablas_clean), group_size):
        group = tablas_clean[i:i + group_size]
        df_merged = reduce(
            lambda left, right: pd.merge(left, right, how='left', on=['Player', 'Nation', 'Age', 'team'], suffixes=('', '_dup')),
            group
        )
        df_merged = df_merged.loc[:, ~df_merged.columns.str.endswith('_dup')]
        merged_dfs.append(df_merged)
    df_final = pd.concat(merged_dfs, ignore_index=True)
    return df_final

df_merged = merge_and_concat(tablas_clean, group_size=7)

In [189]:
df_merged.head()

Unnamed: 0,Player,#,Nation,Pos,Age,Min,Gls,Ast,PK,PKatt,Sh,SoT,xG,npxG,SCA,GCA,match,team,team_goals,conceded_goals,win,tie,Total-TotDist,Total-PrgDist,Short-Cmp,Short-Att,Medium-Cmp,Medium-Att,Long-Cmp,Long-Att,xAG,xA,KP,1/3,PPA,CrsPA,PrgP,Att,Live,Dead,FK,TB,Sw,Crs,TI,CK,Tackles-Tkl,Tackles-TklW,Challenges-Tkl,Challenges-Att,Challenges-Lost,Blocks-Blocks,Blocks-Sh,Blocks-Pass,Int,Clr,Err,Touches-Def Pen,Touches-Def 3rd,Touches-Mid 3rd,Touches-Att 3rd,Touches-Att Pen,Take-Ons-Att,Take-Ons-Succ,Carries-Carries,Carries-TotDist,Carries-PrgDist,Carries-PrgC,Carries-1/3,Carries-CPA,Carries-Mis,Carries-Dis,Receiving-Rec,Receiving-PrgR,CrdY,CrdR,2CrdY,Fls,Fld,Off,PKwon,PKcon,OG,Recov,Aerial Duels-Won,Aerial Duels-Lost,SoTA,GA,Saves,PSxG,Launched-Cmp,Launched-Att,Passes-Att (GK),Passes-Thr,Passes-AvgLen,Goal Kicks-Att,Goal Kicks-AvgLen,Crosses-Opp,Crosses-Stp,Sweeper-#OPA,Sweeper-AvgDist
0,Agustín Fontana,19.0,ar ARG,FW,27-334,90,0,0,0,0,4,1,0.6,0.6,4,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,182,10,3,6,4,6,2,3,0.3,0.0,1,0,0,0,1,18,15,3,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,14,16,6,2,0,16,48,20,0,0,0,5,1,20,4,0,0,0,1,0,2,0,0,0,1,0,4,,,,,,,,,,,,,,,
1,Joaquín Gho,28.0,ar ARG,FW,20-362,69,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,110,39,4,10,3,6,0,2,0.1,0.0,1,1,1,0,2,21,21,0,0,0,0,4,0,0,2,1,2,2,0,2,0,2,2,0,0,0,6,16,10,1,0,0,11,82,17,1,1,0,3,1,12,7,0,0,0,0,1,0,0,0,0,6,2,3,,,,,,,,,,,,,,,
2,David Gallardo,30.0,ar ARG,RM,27-072,21,0,0,0,0,1,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,57,28,3,3,1,3,0,1,0.0,0.0,0,0,0,0,0,7,5,2,0,0,0,3,2,0,1,1,1,1,0,0,0,0,0,0,0,0,2,6,5,0,1,1,6,75,49,1,0,0,2,1,7,1,0,0,0,0,1,0,0,0,0,2,1,0,,,,,,,,,,,,,,,
3,Yair Ezequiel Arismendi,26.0,ar ARG,LM,26-035,45,0,0,0,0,2,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,112,36,3,4,3,7,1,3,0.0,0.0,0,0,1,0,1,15,12,3,0,0,0,4,2,1,0,0,0,0,0,2,0,2,0,0,0,0,2,6,12,1,0,0,4,88,1,0,0,0,0,0,9,3,1,0,0,1,1,0,0,0,0,3,1,2,,,,,,,,,,,,,,,
4,Manuel Mónaco,11.0,,RW,,45,0,0,0,0,2,1,0.1,0.1,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,54,11,5,7,1,2,0,1,0.0,0.0,0,0,0,0,0,11,11,0,0,0,0,1,0,0,1,1,1,3,2,0,0,0,0,0,0,0,1,7,8,1,2,1,10,25,17,0,1,0,1,0,7,3,0,0,0,0,1,0,0,0,0,2,0,1,,,,,,,,,,,,,,,


In [190]:
df_merged['Pos'] = df_merged['Pos'].replace({
            'RW': 'W',
            'LW': 'W',
            'LM': 'M',
            'RM': 'M',
            'LB': 'FB',
            'RB': 'FB',
        })

In [191]:
df_merged.head()

Unnamed: 0,Player,#,Nation,Pos,Age,Min,Gls,Ast,PK,PKatt,Sh,SoT,xG,npxG,SCA,GCA,match,team,team_goals,conceded_goals,win,tie,Total-TotDist,Total-PrgDist,Short-Cmp,Short-Att,Medium-Cmp,Medium-Att,Long-Cmp,Long-Att,xAG,xA,KP,1/3,PPA,CrsPA,PrgP,Att,Live,Dead,FK,TB,Sw,Crs,TI,CK,Tackles-Tkl,Tackles-TklW,Challenges-Tkl,Challenges-Att,Challenges-Lost,Blocks-Blocks,Blocks-Sh,Blocks-Pass,Int,Clr,Err,Touches-Def Pen,Touches-Def 3rd,Touches-Mid 3rd,Touches-Att 3rd,Touches-Att Pen,Take-Ons-Att,Take-Ons-Succ,Carries-Carries,Carries-TotDist,Carries-PrgDist,Carries-PrgC,Carries-1/3,Carries-CPA,Carries-Mis,Carries-Dis,Receiving-Rec,Receiving-PrgR,CrdY,CrdR,2CrdY,Fls,Fld,Off,PKwon,PKcon,OG,Recov,Aerial Duels-Won,Aerial Duels-Lost,SoTA,GA,Saves,PSxG,Launched-Cmp,Launched-Att,Passes-Att (GK),Passes-Thr,Passes-AvgLen,Goal Kicks-Att,Goal Kicks-AvgLen,Crosses-Opp,Crosses-Stp,Sweeper-#OPA,Sweeper-AvgDist
0,Agustín Fontana,19.0,ar ARG,FW,27-334,90,0,0,0,0,4,1,0.6,0.6,4,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,182,10,3,6,4,6,2,3,0.3,0.0,1,0,0,0,1,18,15,3,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,14,16,6,2,0,16,48,20,0,0,0,5,1,20,4,0,0,0,1,0,2,0,0,0,1,0,4,,,,,,,,,,,,,,,
1,Joaquín Gho,28.0,ar ARG,FW,20-362,69,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,110,39,4,10,3,6,0,2,0.1,0.0,1,1,1,0,2,21,21,0,0,0,0,4,0,0,2,1,2,2,0,2,0,2,2,0,0,0,6,16,10,1,0,0,11,82,17,1,1,0,3,1,12,7,0,0,0,0,1,0,0,0,0,6,2,3,,,,,,,,,,,,,,,
2,David Gallardo,30.0,ar ARG,M,27-072,21,0,0,0,0,1,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,57,28,3,3,1,3,0,1,0.0,0.0,0,0,0,0,0,7,5,2,0,0,0,3,2,0,1,1,1,1,0,0,0,0,0,0,0,0,2,6,5,0,1,1,6,75,49,1,0,0,2,1,7,1,0,0,0,0,1,0,0,0,0,2,1,0,,,,,,,,,,,,,,,
3,Yair Ezequiel Arismendi,26.0,ar ARG,M,26-035,45,0,0,0,0,2,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,112,36,3,4,3,7,1,3,0.0,0.0,0,0,1,0,1,15,12,3,0,0,0,4,2,1,0,0,0,0,0,2,0,2,0,0,0,0,2,6,12,1,0,0,4,88,1,0,0,0,0,0,9,3,1,0,0,1,1,0,0,0,0,3,1,2,,,,,,,,,,,,,,,
4,Manuel Mónaco,11.0,,W,,45,0,0,0,0,2,1,0.1,0.1,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,54,11,5,7,1,2,0,1,0.0,0.0,0,0,0,0,0,11,11,0,0,0,0,1,0,0,1,1,1,3,2,0,0,0,0,0,0,0,1,7,8,1,2,1,10,25,17,0,1,0,1,0,7,3,0,0,0,0,1,0,0,0,0,2,0,1,,,,,,,,,,,,,,,


In [192]:
df_merged.fillna(0, inplace=True)

In [193]:
X_test = df_merged.copy()
X_test.drop(columns=['Player', '#', 'Nation', 'team', 'Age'], inplace=True, errors='ignore')
X_test = pd.get_dummies(X_test, columns=['Pos'], dtype=int)

# Asegurar que todas las columnas del entrenamiento estén presentes (esto es porque hay algunas posiciones que no aparecen en todos los partidos)
for col in modelo.feature_names_in_:
    if col not in X_test.columns:
        X_test[col] = 0
X_test = X_test[modelo.feature_names_in_]

In [194]:
df_merged['puntaje_modelo'] = modelo.predict(X_test)

In [195]:
df_merged.head()

Unnamed: 0,Player,#,Nation,Pos,Age,Min,Gls,Ast,PK,PKatt,Sh,SoT,xG,npxG,SCA,GCA,match,team,team_goals,conceded_goals,win,tie,Total-TotDist,Total-PrgDist,Short-Cmp,Short-Att,Medium-Cmp,Medium-Att,Long-Cmp,Long-Att,xAG,xA,KP,1/3,PPA,CrsPA,PrgP,Att,Live,Dead,FK,TB,Sw,Crs,TI,CK,Tackles-Tkl,Tackles-TklW,Challenges-Tkl,Challenges-Att,Challenges-Lost,Blocks-Blocks,Blocks-Sh,Blocks-Pass,Int,Clr,Err,Touches-Def Pen,Touches-Def 3rd,Touches-Mid 3rd,Touches-Att 3rd,Touches-Att Pen,Take-Ons-Att,Take-Ons-Succ,Carries-Carries,Carries-TotDist,Carries-PrgDist,Carries-PrgC,Carries-1/3,Carries-CPA,Carries-Mis,Carries-Dis,Receiving-Rec,Receiving-PrgR,CrdY,CrdR,2CrdY,Fls,Fld,Off,PKwon,PKcon,OG,Recov,Aerial Duels-Won,Aerial Duels-Lost,SoTA,GA,Saves,PSxG,Launched-Cmp,Launched-Att,Passes-Att (GK),Passes-Thr,Passes-AvgLen,Goal Kicks-Att,Goal Kicks-AvgLen,Crosses-Opp,Crosses-Stp,Sweeper-#OPA,Sweeper-AvgDist,puntaje_modelo
0,Agustín Fontana,19.0,ar ARG,FW,27-334,90,0,0,0,0,4,1,0.6,0.6,4,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,182,10,3,6,4,6,2,3,0.3,0.0,1,0,0,0,1,18,15,3,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,14,16,6,2,0,16,48,20,0,0,0,5,1,20,4,0,0,0,1,0,2,0,0,0,1,0,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.012663
1,Joaquín Gho,28.0,ar ARG,FW,20-362,69,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,110,39,4,10,3,6,0,2,0.1,0.0,1,1,1,0,2,21,21,0,0,0,0,4,0,0,2,1,2,2,0,2,0,2,2,0,0,0,6,16,10,1,0,0,11,82,17,1,1,0,3,1,12,7,0,0,0,0,1,0,0,0,0,6,2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.087499
2,David Gallardo,30.0,ar ARG,M,27-072,21,0,0,0,0,1,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,57,28,3,3,1,3,0,1,0.0,0.0,0,0,0,0,0,7,5,2,0,0,0,3,2,0,1,1,1,1,0,0,0,0,0,0,0,0,2,6,5,0,1,1,6,75,49,1,0,0,2,1,7,1,0,0,0,0,1,0,0,0,0,2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.996081
3,Yair Ezequiel Arismendi,26.0,ar ARG,M,26-035,45,0,0,0,0,2,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,112,36,3,4,3,7,1,3,0.0,0.0,0,0,1,0,1,15,12,3,0,0,0,4,2,1,0,0,0,0,0,2,0,2,0,0,0,0,2,6,12,1,0,0,4,88,1,0,0,0,0,0,9,3,1,0,0,1,1,0,0,0,0,3,1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.837017
4,Manuel Mónaco,11.0,0,W,0,45,0,0,0,0,2,1,0.1,0.1,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,54,11,5,7,1,2,0,1,0.0,0.0,0,0,0,0,0,11,11,0,0,0,0,1,0,0,1,1,1,3,2,0,0,0,0,0,0,0,1,7,8,1,2,1,10,25,17,0,1,0,1,0,7,3,0,0,0,0,1,0,0,0,0,2,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.958793


In [196]:
# asignar figura del partido
df_merged['figura'] = df_merged['puntaje_modelo'] == df_merged['puntaje_modelo'].max()

# formatear el puntaje
df_merged['puntaje'] = np.clip(np.round(df_merged['puntaje_modelo']), 1, 10)

In [197]:
df_merged[['Player', 'Pos', 'team', 'puntaje', 'figura']].sort_values('puntaje', ascending=False).head(10)

Unnamed: 0,Player,Pos,team,puntaje,figura
18,Santiago Rodriguez,FW,Instituto,9.0,True
13,Juan Guasone,CB,Sarmiento,7.0,False
21,Gastón Lodico,CM,Instituto,7.0,False
31,Manuel Roffo,GK,Instituto,7.0,False
20,Jonás Acevedo,M,Instituto,7.0,False
19,Gregorio Rodríguez,FW,Instituto,7.0,False
29,Gonzalo Requena,CB,Instituto,7.0,False
23,Damián Puebla,M,Instituto,7.0,False
27,Fernando Alarcón,CB,Instituto,7.0,False
30,Giuliano Cerato,FB,Instituto,7.0,False


In [198]:
print(df_merged[['Player', 'Pos', 'team', 'puntaje', 'figura']].sort_values('puntaje', ascending=False).head(10))

                Player Pos       team  puntaje  figura
18  Santiago Rodriguez  FW  Instituto      9.0    True
13        Juan Guasone  CB  Sarmiento      7.0   False
21       Gastón Lodico  CM  Instituto      7.0   False
31        Manuel Roffo  GK  Instituto      7.0   False
20       Jonás Acevedo   M  Instituto      7.0   False
19  Gregorio Rodríguez  FW  Instituto      7.0   False
29     Gonzalo Requena  CB  Instituto      7.0   False
23       Damián Puebla   M  Instituto      7.0   False
27    Fernando Alarcón  CB  Instituto      7.0   False
30     Giuliano Cerato  FB  Instituto      7.0   False


In [199]:
df_merged[['Gls', 'Ast', 'PK', 'conceded_goals', 'CrdY', 'CrdR', 'OG', 'PKatt', 'figura']].dtypes

Gls               int64
Ast               int64
PK                int64
conceded_goals    int64
CrdY              int64
CrdR              int64
OG                int64
PKatt             int64
figura             bool
dtype: object

In [200]:
# calculamos el puntaje total (el que cuenta en el juego)
def calcular_puntaje_total(row):
    if row['Pos'] == 'FW':
        row['puntaje'] += (row['Gls']-row['PK'])*3 + row['Ast']
    elif row['Pos'] in ['W', 'AM']:
        row['puntaje'] += (row['Gls']-row['PK'])*4 + row['Ast']
    elif row['Pos'] in ['M', 'CM']:
        row['puntaje'] += (row['Gls']-row['PK'])*5 + row['Ast']
    elif row['Pos'] == 'DM':
        row['puntaje'] += (row['Gls']-row['PK'])*6 + row['Ast']*2
        if row['conceded_goals'] == 0:
            row['puntaje'] += 1
    elif row['Pos'] in ['FB', 'WB']:
        row['puntaje'] += (row['Gls']-row['PK'])*8 + row['Ast']*2
        if row['conceded_goals'] == 0:
            row['puntaje'] += 2
    elif row['Pos'] == 'CB':
        row['puntaje'] += (row['Gls']-row['PK'])*6 + row['Ast']*3
        if row['conceded_goals'] == 0:
            row['puntaje'] += 2
    elif row['Pos'] == 'GK':
        row['puntaje'] += (row['Gls']-row['PK'])*10 + row['Ast']*4
        if row['conceded_goals'] == 0:
            row['puntaje'] += 3
    else:
        print(f'Posición no reconocida: {row["Pos"]}')
    
    row['puntaje'] -=  row['CrdR']*4 + row['OG']*3 + (row['PKatt']-row['PK'])*2 + row['CrdY']*2

    if row['figura'] == True:
        row['puntaje'] += 4
        
    return row

In [201]:
df_merged = df_merged.apply(calcular_puntaje_total, axis=1)

In [202]:
df_merged.drop(columns=['#', 'Nation', 'Age'], inplace=True)

In [203]:
df_merged.head()

Unnamed: 0,Player,Pos,Min,Gls,Ast,PK,PKatt,Sh,SoT,xG,npxG,SCA,GCA,match,team,team_goals,conceded_goals,win,tie,Total-TotDist,Total-PrgDist,Short-Cmp,Short-Att,Medium-Cmp,Medium-Att,Long-Cmp,Long-Att,xAG,xA,KP,1/3,PPA,CrsPA,PrgP,Att,Live,Dead,FK,TB,Sw,Crs,TI,CK,Tackles-Tkl,Tackles-TklW,Challenges-Tkl,Challenges-Att,Challenges-Lost,Blocks-Blocks,Blocks-Sh,Blocks-Pass,Int,Clr,Err,Touches-Def Pen,Touches-Def 3rd,Touches-Mid 3rd,Touches-Att 3rd,Touches-Att Pen,Take-Ons-Att,Take-Ons-Succ,Carries-Carries,Carries-TotDist,Carries-PrgDist,Carries-PrgC,Carries-1/3,Carries-CPA,Carries-Mis,Carries-Dis,Receiving-Rec,Receiving-PrgR,CrdY,CrdR,2CrdY,Fls,Fld,Off,PKwon,PKcon,OG,Recov,Aerial Duels-Won,Aerial Duels-Lost,SoTA,GA,Saves,PSxG,Launched-Cmp,Launched-Att,Passes-Att (GK),Passes-Thr,Passes-AvgLen,Goal Kicks-Att,Goal Kicks-AvgLen,Crosses-Opp,Crosses-Stp,Sweeper-#OPA,Sweeper-AvgDist,puntaje_modelo,figura,puntaje
0,Agustín Fontana,FW,90,0,0,0,0,4,1,0.6,0.6,4,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,182,10,3,6,4,6,2,3,0.3,0.0,1,0,0,0,1,18,15,3,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,14,16,6,2,0,16,48,20,0,0,0,5,1,20,4,0,0,0,1,0,2,0,0,0,1,0,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.012663,False,6.0
1,Joaquín Gho,FW,69,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,110,39,4,10,3,6,0,2,0.1,0.0,1,1,1,0,2,21,21,0,0,0,0,4,0,0,2,1,2,2,0,2,0,2,2,0,0,0,6,16,10,1,0,0,11,82,17,1,1,0,3,1,12,7,0,0,0,0,1,0,0,0,0,6,2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.087499,False,6.0
2,David Gallardo,M,21,0,0,0,0,1,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,57,28,3,3,1,3,0,1,0.0,0.0,0,0,0,0,0,7,5,2,0,0,0,3,2,0,1,1,1,1,0,0,0,0,0,0,0,0,2,6,5,0,1,1,6,75,49,1,0,0,2,1,7,1,0,0,0,0,1,0,0,0,0,2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.996081,False,6.0
3,Yair Ezequiel Arismendi,M,45,0,0,0,0,2,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,112,36,3,4,3,7,1,3,0.0,0.0,0,0,1,0,1,15,12,3,0,0,0,4,2,1,0,0,0,0,0,2,0,2,0,0,0,0,2,6,12,1,0,0,4,88,1,0,0,0,0,0,9,3,1,0,0,1,1,0,0,0,0,3,1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.837017,False,4.0
4,Manuel Mónaco,W,45,0,0,0,0,2,1,0.1,0.1,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,54,11,5,7,1,2,0,1,0.0,0.0,0,0,0,0,0,11,11,0,0,0,0,1,0,0,1,1,1,3,2,0,0,0,0,0,0,0,1,7,8,1,2,1,10,25,17,0,1,0,1,0,7,3,0,0,0,0,1,0,0,0,0,2,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.958793,False,6.0


In [204]:
rendimientos = []

In [205]:
rendimientos.append(df_merged)

In [206]:
df_fecha_concat = pd.concat(rendimientos, ignore_index=True)

In [207]:
df_fecha_concat['fecha'] = fecha

In [208]:
df_fecha_concat

Unnamed: 0,Player,Pos,Min,Gls,Ast,PK,PKatt,Sh,SoT,xG,npxG,SCA,GCA,match,team,team_goals,conceded_goals,win,tie,Total-TotDist,Total-PrgDist,Short-Cmp,Short-Att,Medium-Cmp,Medium-Att,Long-Cmp,Long-Att,xAG,xA,KP,1/3,PPA,CrsPA,PrgP,Att,Live,Dead,FK,TB,Sw,Crs,TI,CK,Tackles-Tkl,Tackles-TklW,Challenges-Tkl,Challenges-Att,Challenges-Lost,Blocks-Blocks,Blocks-Sh,Blocks-Pass,Int,Clr,Err,Touches-Def Pen,Touches-Def 3rd,Touches-Mid 3rd,Touches-Att 3rd,Touches-Att Pen,Take-Ons-Att,Take-Ons-Succ,Carries-Carries,Carries-TotDist,Carries-PrgDist,Carries-PrgC,Carries-1/3,Carries-CPA,Carries-Mis,Carries-Dis,Receiving-Rec,Receiving-PrgR,CrdY,CrdR,2CrdY,Fls,Fld,Off,PKwon,PKcon,OG,Recov,Aerial Duels-Won,Aerial Duels-Lost,SoTA,GA,Saves,PSxG,Launched-Cmp,Launched-Att,Passes-Att (GK),Passes-Thr,Passes-AvgLen,Goal Kicks-Att,Goal Kicks-AvgLen,Crosses-Opp,Crosses-Stp,Sweeper-#OPA,Sweeper-AvgDist,puntaje_modelo,figura,puntaje,fecha
0,Agustín Fontana,FW,90,0,0,0,0,4,1,0.6,0.6,4,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,182,10,3,6,4,6,2,3,0.3,0.0,1,0,0,0,1,18,15,3,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,14,16,6,2,0,16,48,20,0,0,0,5,1,20,4,0,0,0,1,0,2,0,0,0,1,0,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.012663,False,6.0,1
1,Joaquín Gho,FW,69,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,110,39,4,10,3,6,0,2,0.1,0.0,1,1,1,0,2,21,21,0,0,0,0,4,0,0,2,1,2,2,0,2,0,2,2,0,0,0,6,16,10,1,0,0,11,82,17,1,1,0,3,1,12,7,0,0,0,0,1,0,0,0,0,6,2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.087499,False,6.0,1
2,David Gallardo,M,21,0,0,0,0,1,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,57,28,3,3,1,3,0,1,0.0,0.0,0,0,0,0,0,7,5,2,0,0,0,3,2,0,1,1,1,1,0,0,0,0,0,0,0,0,2,6,5,0,1,1,6,75,49,1,0,0,2,1,7,1,0,0,0,0,1,0,0,0,0,2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.996081,False,6.0,1
3,Yair Ezequiel Arismendi,M,45,0,0,0,0,2,1,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,112,36,3,4,3,7,1,3,0.0,0.0,0,0,1,0,1,15,12,3,0,0,0,4,2,1,0,0,0,0,0,2,0,2,0,0,0,0,2,6,12,1,0,0,4,88,1,0,0,0,0,0,9,3,1,0,0,1,1,0,0,0,0,3,1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.837017,False,4.0,1
4,Manuel Mónaco,W,45,0,0,0,0,2,1,0.1,0.1,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,54,11,5,7,1,2,0,1,0.0,0.0,0,0,0,0,0,11,11,0,0,0,0,1,0,0,1,1,1,3,2,0,0,0,0,0,0,0,1,7,8,1,2,1,10,25,17,0,1,0,1,0,7,3,0,0,0,0,1,0,0,0,0,2,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.958793,False,6.0,1
5,Diego Calcaterra,CM,90,0,0,0,0,1,0,0.1,0.1,4,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,432,161,7,12,12,13,3,5,0.1,0.1,4,1,0,0,5,33,33,0,0,0,1,0,0,0,1,1,1,1,0,1,0,1,1,0,0,1,7,27,7,6,1,0,14,40,11,0,0,0,0,1,16,2,0,0,0,2,3,0,0,0,0,4,2,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.180746,False,6.0,1
6,José Mauri,CM,69,0,0,0,0,1,0,0.0,0.0,3,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,228,83,2,6,8,9,1,2,0.0,0.0,2,1,1,0,6,18,17,1,1,0,0,0,0,0,1,1,0,0,0,2,1,1,0,0,0,0,9,14,7,0,0,0,12,51,15,0,1,0,1,0,13,1,0,0,0,0,2,0,0,0,0,4,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.921443,False,6.0,1
7,Fernando Godoy,CM,21,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,97,10,2,5,3,3,0,2,0.0,0.0,0,0,0,0,0,10,7,3,1,0,0,0,2,0,0,0,0,1,1,0,0,0,0,0,0,0,2,6,2,0,0,0,5,17,11,0,1,0,0,0,3,0,0,0,0,1,0,0,0,0,0,2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.595305,False,6.0,1
8,Gabriel Gudiño,M,69,0,0,0,0,0,0,0.0,0.0,3,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,145,48,7,7,0,3,2,5,0.0,0.0,1,0,0,0,0,17,12,5,1,0,0,3,2,2,4,0,1,1,0,2,0,2,0,0,0,1,3,13,17,2,3,2,12,90,47,2,0,1,3,1,14,5,0,0,0,2,0,0,0,0,0,1,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.016045,False,6.0,1
9,Iván Morales Bravo,FW,21,0,0,0,0,0,0,0.0,0.0,0,0,Sarmiento - Instituto,Sarmiento,1,2,False,False,11,3,1,1,0,0,0,0,0.0,0.0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,8,3,0,0,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.708672,False,6.0,1
