In [283]:
import pandas as pd

In [284]:
laliga = pd.read_csv('../scraping/tables/laliga/laliga_all_seasons.csv', index_col=0)
table = pd.read_csv('../scraping/tables/variable_data.csv')

In [285]:
# Set column names for `laliga` from the first row of `table` (excluding the first column)
laliga.columns = table.iloc[0, 1:]

# Get columns to keep based on the second row of `table`
columns_to_keep = table.T.loc[table.iloc[1] == '1'].iloc[:, 0]

# Remove duplicate columns and keep only the desired ones
laliga = laliga.loc[:, ~laliga.columns.duplicated() & laliga.columns.isin(columns_to_keep)]

# Reorder columns: move key identifiers to the front
columns_to_front = ['season_id', 'match_id', 'team_name', 'team_id']
laliga = laliga[columns_to_front + [col for col in laliga.columns if col not in columns_to_front]]

# Split the position and save the 1st one
laliga['Position'] = laliga['Position'].str.split(',').str[0].str.strip()
# Resulting DataFrame

import numpy as np
positions = laliga["Position"].unique()
unique_positions = np.unique(np.concatenate([pos.split(",") for pos in positions]))
unique_positions

off_pos = ['FW', 'LW', 'RW']
mid_pos = ['AM', 'CM', 'DM', 'LM', 'RM']
def_pos = ['CB', 'LB', 'RB', 'WB']
gk_pos = ['GK']

def define_group(position):
    if position in off_pos:
        return "off"
    if position in mid_pos:
        return "mid"
    if position in def_pos:
        return "def"
    else:
        return 'gk'

for player, stat in laliga.iterrows():
    group = define_group(laliga.loc[player,"Position"])
    laliga.loc[player, "Group"] = group

laliga = laliga[['Player Name', 'season_id', 'match_id', 'team_name', 'team_id','Minutes Played', 'Goals Scored', 'Assists Provided', 'Goals Against', 'Saves', 'Penalty Kicks Conceded', 'Penalty Kicks Won',
                 'Penalty Kicks Made', 'Penalty Kicks Attempted', 'Yellow Cards', 'Red Cards', 'Shots on Target', 'Successful Take-Ons',
                 'Carries into Penalty Area', 'Ball Recoveries', 'Clearances Made', 'Miscontrols', 'Group']]


In [286]:
laliga

Unnamed: 0,Player Name,season_id,match_id,team_name,team_id,Minutes Played,Goals Scored,Assists Provided,Goals Against,Saves,...,Penalty Kicks Attempted,Yellow Cards,Red Cards,Shots on Target,Successful Take-Ons,Carries into Penalty Area,Ball Recoveries,Clearances Made,Miscontrols,Group
0,Adri Embarba,1,1,Almería,H,90,0,0,,,...,0,0,0,0,2,0,5,0,4,off
1,Alejandro Pozo Pozo,1,1,Almería,H,65,0,0,,,...,0,0,0,0,0,0,1,4,0,def
2,Diego Mariño,1,1,Almería,H,90,0,0,2.0,2.0,...,0,0,0,0,0,0,1,1,0,gk
3,Dion Lopy,1,1,Almería,H,13,0,0,,,...,0,0,0,0,0,0,1,0,0,mid
4,Gonzalo Melero,1,1,Almería,H,14,0,0,,,...,0,0,0,0,1,0,0,0,1,mid
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79170,Raúl Navas,7,380,Real Sociedad,A,90,0,0,,,...,0,1,0,0,0,0,2,2,0,def
79171,Sergio Canales,7,380,Real Sociedad,A,28,0,0,,,...,0,0,0,0,0,0,3,0,0,off
79172,Willian José,7,380,Real Sociedad,A,90,0,0,,,...,0,0,0,1,2,0,1,0,1,off
79173,Xabi Prieto,7,380,Real Sociedad,A,2,0,0,,,...,0,0,0,0,0,0,0,0,0,mid


In [287]:
# Define the function to calculate points for a player based on the rules
def calculate_points(row):
    points = 0
    
    # Minutes Played
    if row['Minutes Played'] < 60:
        points += 1
    elif row['Minutes Played'] >= 60:
        points += 2

    if row['Group'] == 'gk' or row['Group'] == 'def':
        points += 6 * row['Goals Scored']
    elif row['Group'] == 'mid':
        points += 5 * row['Goals Scored']
    elif row['Group'] == 'off':
        points += 4 * row['Goals Scored']

    points += 3 * row['Assists Provided']
        
    points -= 2 * (row['Penalty Kicks Attempted'] - row['Penalty Kicks Made'])  # Deduct points for penalties missed
    points += 2 * row['Penalty Kicks Won']
    points -= 2 * row['Penalty Kicks Conceded']
    points += 5 * (row['Penalty Kicks Made'])  # Add points for penalties stopped (goalkeeper's actions)

        # Clean Sheets
    if row['Minutes Played'] > 60 and row['Goals Against'] == 0:
        if row['Group'] == 'gk':
            points += 4
        elif row['Group'] == 'def':
            points += 3
        elif row['Group'] == 'mid':
            points += 2
        elif row['Group'] == 'off':
            points += 1

        # Goals Against
    if row['Goals Against'] > 0:
        if row['Group'] in ['gk', 'def']:
            points -= 2
        elif row['Group'] in ['mid', 'off']:
            points -= 1 * (row['Goals Against'] // 2)
    points -= row['Yellow Cards']
    points -= 3 * row['Red Cards']

    # Saves (Goalkeeper bonus)
    if row['Group'] == 'gk':
        points += (row['Saves'] // 1)

    # Offensive Bonuses
    points += (row['Shots on Target'] // 2)
    points += (row['Successful Take-Ons'] // 2)
    points += (row['Carries into Penalty Area'] // 2)

    # Defensive Bonuses
    points += (row['Ball Recoveries'] // 5)
    points += (row['Clearances Made'] // 3)

    # Penalties for Miscontrols
    miscontrols_penalty = 0
    if row['Group'] == 'gk' or row['Group'] == 'def':
        miscontrols_penalty = row['Miscontrols'] // 8
    elif row['Group'] == 'mid':
        miscontrols_penalty = row['Miscontrols'] // 10
    elif row['Group'] == 'off':
        miscontrols_penalty = row['Miscontrols'] // 12
    points -= miscontrols_penalty

    return points

# Apply the function to the DataFrame
laliga['Total Points'] = laliga.apply(calculate_points, axis=1)

In [288]:
laliga

Unnamed: 0,Player Name,season_id,match_id,team_name,team_id,Minutes Played,Goals Scored,Assists Provided,Goals Against,Saves,...,Yellow Cards,Red Cards,Shots on Target,Successful Take-Ons,Carries into Penalty Area,Ball Recoveries,Clearances Made,Miscontrols,Group,Total Points
0,Adri Embarba,1,1,Almería,H,90,0,0,,,...,0,0,0,2,0,5,0,4,off,4.0
1,Alejandro Pozo Pozo,1,1,Almería,H,65,0,0,,,...,0,0,0,0,0,1,4,0,def,3.0
2,Diego Mariño,1,1,Almería,H,90,0,0,2.0,2.0,...,0,0,0,0,0,1,1,0,gk,2.0
3,Dion Lopy,1,1,Almería,H,13,0,0,,,...,0,0,0,0,0,1,0,0,mid,1.0
4,Gonzalo Melero,1,1,Almería,H,14,0,0,,,...,0,0,0,1,0,0,0,1,mid,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79170,Raúl Navas,7,380,Real Sociedad,A,90,0,0,,,...,1,0,0,0,0,2,2,0,def,1.0
79171,Sergio Canales,7,380,Real Sociedad,A,28,0,0,,,...,0,0,0,0,0,3,0,0,off,1.0
79172,Willian José,7,380,Real Sociedad,A,90,0,0,,,...,0,0,1,2,0,1,0,1,off,3.0
79173,Xabi Prieto,7,380,Real Sociedad,A,2,0,0,,,...,0,0,0,0,0,0,0,0,mid,1.0


In [289]:
laliga.groupby('Group')['Total Points'].sum().sort_values(ascending=False)

Group
def    81233.0
mid    79598.0
off    63949.0
gk     23586.0
Name: Total Points, dtype: float64

In [292]:
laliga.groupby('Player Name')['Total Points'].sum().sort_values(ascending=False).head(15)

Player Name
Lionel Messi             1528.0
Iago Aspas               1397.0
Karim Benzema            1356.0
Jan Oblak                1290.0
Marc-André ter Stegen    1243.0
Antoine Griezmann        1207.0
Daniel Parejo            1146.0
Gerard Moreno            1146.0
Iñaki Williams           1134.0
José Luis Morales        1113.0
Mikel Oyarzabal          1088.0
David Soria              1057.0
Vinicius Júnior          1018.0
Luis Suárez               947.0
Ángel Correa              906.0
Name: Total Points, dtype: float64

In [305]:
x = laliga.groupby('Player Name')['Total Points'].sum().sort_values(ascending=False).reset_index()
x[x['Player Name']=='Jesús Navas']

Unnamed: 0,Player Name,Total Points
44,Jesús Navas,678.0
