In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import re
from collections import defaultdict
import plotly.io as pio

players = pd.read_csv('data/Player.csv')
player_atts = pd.read_csv('data/Player_Attributes.csv')
teams = pd.read_csv('data/Team.csv')
matches = pd.read_csv('data/Match.csv')
leagues = pd.read_csv('data/League.csv')
positions = pd.read_csv('data/PositionReference.csv')

player_atts['potential_rating_ratio'] = ((player_atts['potential'] / player_atts['overall_rating']) * 100)

In [None]:
teams[teams['team_long_name']=='RCD Espanyol']

In [10]:
matches['date'] = pd.to_datetime(matches['date'])
rcde_matches = matches[matches['home_team_api_id']== 8558]

In [12]:
filtered_players_rcde = [col for col in rcde_matches.columns if re.match(r'home_player_\d+$', col)]

In [24]:
rcde_matches

Unnamed: 0,id,country_id,league_id,season,stage,date,match_api_id,home_team_api_id,away_team_api_id,home_team_goal,...,weight_home_player_10_info,home_player_10_position,id_home_player_11_info,player_api_id_home_player_11_info,player_name_home_player_11_info,player_fifa_api_id_home_player_11_info,birthday_home_player_11_info,height_home_player_11_info,weight_home_player_11_info,home_player_11_position
0,21525,21518,21518,2008/2009,1,2008-08-30,530090,8558,10281,1,...,163,10.0,6356,24852,Luis Garcia,16,1981-02-06 00:00:00,177.80,152,10.0
1,21540,21518,21518,2008/2009,11,2008-11-16,530245,8558,8388,3,...,154,8.0,8831,32764,Raul Tamudo,2264,1977-10-19 00:00:00,180.34,163,11.0
2,21560,21518,21518,2008/2009,13,2008-11-30,530325,8558,9869,0,...,152,10.0,8831,32764,Raul Tamudo,2264,1977-10-19 00:00:00,180.34,163,10.0
3,21595,21518,21518,2008/2009,16,2008-12-20,530360,8558,9906,2,...,154,8.0,5348,150330,Jose Maria Callejon,185020,1987-02-11 00:00:00,177.80,161,11.0
4,21611,21518,21518,2008/2009,18,2009-01-11,530376,8558,9865,2,...,154,8.0,6356,24852,Luis Garcia,16,1981-02-06 00:00:00,177.80,152,11.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,24454,21518,21518,2015/2016,34,2016-04-19,2030494,8558,9910,1,...,165,10.0,6680,498033,Marco Asensio,220834,1996-01-21 00:00:00,177.80,154,10.0
148,24473,21518,21518,2015/2016,36,2016-05-01,2030513,8558,8302,1,...,185,10.0,6680,498033,Marco Asensio,220834,1996-01-21 00:00:00,177.80,154,10.0
149,24492,21518,21518,2015/2016,38,2016-05-15,2030532,8558,8372,4,...,154,8.0,3774,246438,Gerard Moreno,208093,1992-04-07 00:00:00,177.80,165,11.0
150,24515,21518,21518,2015/2016,5,2015-09-22,2030129,8558,10267,1,...,152,8.0,3278,41622,Felipe Caicedo,175254,1988-09-05 00:00:00,182.88,185,11.0


In [15]:
for column in rcde_matches:
    if re.match(r'home_player_\d+$', column):
        rcde_matches[column] = rcde_matches[column].astype('Int64')
        rcde_matches = rcde_matches.merge(players, left_on=column, right_on='player_api_id', how='left', suffixes=('','_new'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rcde_matches[column] = rcde_matches[column].astype('Int64')


In [25]:
player_cols = [col for col in rcde_matches.columns if re.fullmatch(r'home_player_\d+', col)]

# Step 2: For each player column, find its corresponding position column and merge
for col in player_cols:
    # Find the corresponding Y column (e.g., 'home_player_Y1' for 'home_player_1')
    y_col = col.replace('home_player_', 'home_player_Y')

    # Cast player column to nullable int for merge
    rcde_matches[col] = rcde_matches[col].astype('Int64')

    # Merge with players table on player ID
    rcde_matches = rcde_matches.merge(
        players,
        left_on=col,
        right_on='player_api_id',
        how='left',
        suffixes=('', f'_{col}_info')
    )

    # Optional: Add position info from the Y column to the merged data
    rcde_matches[f'{col}_position'] = rcde_matches[y_col]

In [None]:
rcde_player_atts = player_atts[player_atts['player_api_id'].isin(rcde_players['player_api_id'])]
# add name into the player attributes df
rcde_player_atts = rcde_player_atts.merge(rcde_players[['player_api_id', 'player_name']], on='player_api_id', how='left')

In [68]:
# Extract relevant name and position columns again
home_players_df = pd.read_csv('data/Filtered_Home_Player_Data.csv')
name_cols = [col for col in home_players_df.columns if 'player_name_home_player_' in col]
pos_cols = [col for col in home_players_df.columns if re.fullmatch(r'home_player_\d+_position', col)]

player_position_dict = {}

for name_col in name_cols:
    player_number = re.search(r'\d+', name_col).group()
    pos_col = f'home_player_{player_number}_position'

    for name, pos in zip(home_players_df[name_col], home_players_df[pos_col]):
        if pd.notna(name) and pd.notna(pos):
            player_position_dict[name] = int(pos)


({'Carlos Kameni': 1,
  'Cristian Alvarez': 1,
  'Francisco Casilla': 1,
  'Pau Sabata': 1,
  'Giedrius Arlauskis': 1,
  'Sergio Sanchez': 3,
  'Steve Finnan': 3,
  'Francisco Chica': 3,
  'Ivan Pillud': 3,
  'Facundo Sebastian Roncaglia': 3,
  'Jordi Amat': 3,
  'Javi Lopez': 7,
  'Ernesto Galan': 3,
  'Victor Sanchez Mata': 8,
  'Raul Rodriguez': 7,
  'Felipe Mattioni Rohde': 3,
  'Anaitz Arbilla': 6,
  'Alvaro Gonzalez Soberon': 3,
  'Roberto Correa': 3,
  'Daniel Jarque': 3,
  'Marc Torrejon Moya': 3,
  'Nicolas Pareja': 3,
  'Victor Ruiz': 3,
  'Hurtado Perez Moises': 6,
  'Juan Forlin': 6,
  'Hector Moreno': 3,
  'Diego Daniel Colotto': 3,
  'Sidnei': 3,
  'Enzo Roco': 3,
  'Didac Vila': 3,
  'Eric Bertrand Bailly': 3,
  'Michael Ciani': 3,
  'Oscar Duarte': 7,
  'Gregory Beranger': 3,
  'David Garcia': 3,
  'Victor Alvarez': 8,
  'Joan Capdevila': 3,
  'Juan Rafael Fuentes': 3,
  'Ruben Duarte': 3,
  'Roman Martinez': 7,
  'Angel Cervara': 6,
  'Milan Smiljanic': 6,
  'Jose Mari

In [64]:
# not sure if still needed
player_info = []
for column in rcde_matches:
    if column in name_columns:
        player_info.append(pd.unique(rcde_matches[column].values.ravel()).tolist())

flat_list = [name for sublist in player_info for name in sublist if pd.notna(name)]
unique_rcde_names = pd.unique(flat_list).tolist()

In [None]:
# only get the role_y information since we did not explictly mention anything about left or right
pos_role_dict = {}
for position, role in zip(positions['player_pos_y'], positions['role_y']):
    pos_role_dict[position] = role

In [71]:
# map player position numbers and map to role
player_role_dict = {player: pos_role_dict.get(int(position), 'Unknown') for player, position in player_position_dict.items()}

({'Carlos Kameni': 'GK',
  'Cristian Alvarez': 'GK',
  'Francisco Casilla': 'GK',
  'Pau Sabata': 'GK',
  'Giedrius Arlauskis': 'GK',
  'Sergio Sanchez': 'BK',
  'Steve Finnan': 'BK',
  'Francisco Chica': 'BK',
  'Ivan Pillud': 'BK',
  'Facundo Sebastian Roncaglia': 'BK',
  'Jordi Amat': 'BK',
  'Javi Lopez': 'MF',
  'Ernesto Galan': 'BK',
  'Victor Sanchez Mata': 'MF',
  'Raul Rodriguez': 'MF',
  'Felipe Mattioni Rohde': 'BK',
  'Anaitz Arbilla': 'MF',
  'Alvaro Gonzalez Soberon': 'BK',
  'Roberto Correa': 'BK',
  'Daniel Jarque': 'BK',
  'Marc Torrejon Moya': 'BK',
  'Nicolas Pareja': 'BK',
  'Victor Ruiz': 'BK',
  'Hurtado Perez Moises': 'MF',
  'Juan Forlin': 'MF',
  'Hector Moreno': 'BK',
  'Diego Daniel Colotto': 'BK',
  'Sidnei': 'BK',
  'Enzo Roco': 'BK',
  'Didac Vila': 'BK',
  'Eric Bertrand Bailly': 'BK',
  'Michael Ciani': 'BK',
  'Oscar Duarte': 'MF',
  'Gregory Beranger': 'BK',
  'David Garcia': 'BK',
  'Victor Alvarez': 'MF',
  'Joan Capdevila': 'BK',
  'Juan Rafael Fuen

In [72]:
# sort player names per rol on the soccer pitch
roles = defaultdict(list)
for player, role in player_role_dict.items():
    roles[role].append(player)

unique_roles = sorted(roles.keys())
role_counts = {role: len(players) for role, players in roles.items()}

gk_player_names = roles['GK']
bk_player_names = roles['BK']
mf_player_names = roles['MF']
fw_player_names = roles['FW']

In [88]:
# initalize dataframes to base the heatmaps on

bk_players = rcde_player_atts[rcde_player_atts['player_name'].isin(bk_player_names)]
mf_players = rcde_player_atts[rcde_player_atts['player_name'].isin(mf_player_names)]
fw_players = rcde_player_atts[rcde_player_atts['player_name'].isin(fw_player_names)]

In [106]:
fw_players.to_csv("fw_players.csv", index=False)

In [108]:
mf_players.to_csv("mf_players.csv", index=False)

In [107]:
bk_players.to_csv("bk_players.csv", index=False)

In [44]:
rcde_players = players[players['player_name'].isin(unique_rcde_names)]

In [85]:
# retain only gk attributes here for GKs and remove gk attributes for non-GK players


101

In [92]:
# reorder columns in all dfs

cols = rcde_player_atts.columns.tolist()
cols_to_move = ['player_name', 'potential_rating_ratio']
target_col = 'player_api_id'

for i in cols_to_move:
    cols.remove(i)
cols.insert(cols.index(target_col) + 1, cols_to_move[0])
cols.insert(cols.index(target_col) + 2, cols_to_move[1])

rcde_player_atts = rcde_player_atts[cols]

In [93]:
rcde_player_atts.sort_values('potential_rating_ratio', ascending=False, inplace=True)

In [98]:
pio.renderers.default = "notebook_connected"

In [116]:
def create_heatmap(df):
    disposable_cols = ['player_api_id', 'player_fifa_api_id', 'date', 'id',
                       'gk_diving', 'gk_kicking', 'gk_handling', 'gk_positioning', 'gk_reflexes']
    attribute_cols = df.select_dtypes(include='number').columns.difference(disposable_cols + ['potential_rating_ratio']).tolist()
    attribute_cols = ['potential_rating_ratio'] + attribute_cols

    pivot_df = df.pivot_table(index=None, columns='player_name', values=attribute_cols, aggfunc='mean')
    pivot_df = pivot_df.T
    pio.renderers.default = "notebook_connected"

    fig = go.Figure(
        data=go.Heatmap(
            z=pivot_df.values,
            x=pivot_df.columns,
            y=pivot_df.index,
            colorscale='Greens',
            colorbar=dict(title='Attribute Value'),
            hovertemplate='Player: %{x}<br>Attribute: %{y}<br>Value: %{z}<extra></extra>'
        )
    )

    # Layout adjustments
    fig.update_layout(
        xaxis=dict(title='Attributes', tickangle=45),
        yaxis=dict(title='Player Names'),
        autosize=True,
        height=600,
        width=1200
    )

    return fig


In [112]:


dimensions = PitchDimensions()
fig = make_pitch_figure(dimensions)
fig.show()


In [122]:
from plotly_football_pitch import (
    make_pitch_figure,
    PitchDimensions,
    SingleColourBackground, add_heatmap, PitchOrientation
)


dimensions = PitchDimensions()
fig = make_pitch_figure(
    dimensions,
    pitch_background=SingleColourBackground("#81B622"),
)
fig.show()

In [126]:
pitch_figure = make_pitch_figure(
    dimensions=PitchDimensions(),
    marking_colour="black",
    marking_width=2,
    pitch_background=SingleColourBackground("#81B622"),
    figure_width_pixels=1200,
    figure_height_pixels=600,
    orientation=PitchOrientation.HORIZONTAL
)

In [124]:
fw_heatmap = create_heatmap(fw_players)
bk_heatmap = create_heatmap(bk_players)
mf_heatmap = create_heatmap(mf_players)
