In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

read csv from github and filter for only "Period" Data

In [26]:
url = "https://raw.githubusercontent.com/philippdrebes/sda-hockey-c2d/main/data/data.csv"
data = pd.read_csv(url, sep=";", encoding_errors="ignore")
condition = data['Types'] == "Period" 
match_data = data[condition]
match_data = match_data[['Player ID', 'Description', 'Session ID','Position', 'Distance / min (m)',   'High Metabolic Power Distance / min (m)', 'Acceleration Load (max.)', 'Speed (max.) (km/h)', 'Speed (Ø) (km/h)', 'Acceleration (max.) (m/s²)', 'Deceleration (max.) (m/s²)', 'Accelerations / min', 'Decelerations / min']]

create dataframes for player and position

In [27]:
player_data = match_data
avg_position = match_data.groupby(['Position', 'Session ID', 'Description'], as_index=False).mean()
avg_position["Player ID"] = "avg of Position"

display(player_data)
display(avg_position)

Unnamed: 0,Player ID,Description,Session ID,Position,Distance / min (m),High Metabolic Power Distance / min (m),Acceleration Load (max.),Speed (max.) (km/h),Speed (Ø) (km/h),Acceleration (max.) (m/s²),Deceleration (max.) (m/s²),Accelerations / min,Decelerations / min
42,1,Period 1,234,D,186.0,80.0,6.79,28.66,11.17,3.44,-5.72,5.23,5.66
43,2,Period 1,234,F,235.0,132.0,5.10,29.27,14.07,3.74,-4.54,2.50,3.12
44,3,Period 1,234,F,229.0,119.0,4.92,30.89,13.73,3.68,-6.30,4.53,5.05
45,4,Period 1,234,D,218.0,100.0,10.27,31.06,13.10,3.58,-5.91,3.92,4.88
46,5,Period 1,234,D,205.0,89.0,4.70,32.17,12.27,3.54,-6.92,4.98,4.98
...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,38,Overtime,194,F,,,,,,,,0.00,0.00
267,39,Overtime,194,F,287.0,165.0,3.94,30.46,17.22,3.37,-3.61,3.77,3.77
268,40,Overtime,194,F,279.0,163.0,3.74,27.26,16.76,4.06,-3.87,4.32,4.32
269,41,Overtime,194,F,,,,,,,,0.00,0.00


Unnamed: 0,Position,Session ID,Description,Player ID,Distance / min (m),High Metabolic Power Distance / min (m),Acceleration Load (max.),Speed (max.) (km/h),Speed (Ø) (km/h),Acceleration (max.) (m/s²),Deceleration (max.) (m/s²),Accelerations / min,Decelerations / min
0,D,194,Overtime,avg of Position,274.75,150.25,3.5675,24.655,16.4925,2.2675,-2.735,3.17,0.933333
1,D,194,Period 1,avg of Position,215.333333,102.166667,5.663333,29.451667,12.913333,3.943333,-5.475,5.03,5.095
2,D,194,Period 2,avg of Position,200.6,85.6,6.07,29.0,12.044,3.89,-5.232,3.875,3.33
3,D,194,Period 3,avg of Position,219.4,95.0,6.476,29.3,13.164,3.812,-5.692,3.783333,3.606667
4,D,234,Period 1,avg of Position,214.923077,95.0,6.566154,28.84,12.9,3.548462,-5.423846,4.531538,4.775385
5,D,234,Period 2,avg of Position,203.615385,86.230769,6.826154,29.446154,12.213077,3.411538,-4.897692,4.462308,4.068462
6,D,234,Period 3,avg of Position,199.538462,79.461538,12.47,28.517692,11.973846,3.609231,-5.050769,3.473077,3.238462
7,F,194,Overtime,avg of Position,294.166667,186.833333,3.925,28.403333,17.665,2.838333,-3.325,2.349231,1.856154
8,F,194,Period 1,avg of Position,254.615385,151.923077,7.043077,31.464615,15.267692,3.804615,-6.005385,4.837143,4.822857
9,F,194,Period 2,avg of Position,234.615385,127.538462,6.188462,30.633077,14.073077,3.847692,-6.010769,4.844286,4.035


create function that creates dataframe based on player id and session id

In [28]:
def prepare_for_visualization(player_id=2, session_id=234, metric="Speed (Ø) (km/h)"):
    condition1 = player_data['Player ID'] == player_id 
    condition2 = player_data['Session ID'] == session_id 
    filtered_player_df = player_data[condition1 & condition2]
    position = player_data.loc[player_data['Player ID'] == player_id, 'Position'].values[0]
    condition_pos = avg_position["Position"] == position
    condition_pos2 = avg_position["Session ID"] == session_id
    filtered_pos_df = avg_position[condition_pos & condition_pos2]
    column_order = filtered_player_df.columns
    filtered_pos_df = filtered_pos_df[column_order]
    result_df = pd.concat([filtered_player_df, filtered_pos_df], ignore_index=True)
    
    return result_df

In [29]:
visualization_df = prepare_for_visualization()

create function for visualization

In [30]:
def create_subplots_for_metrics(df):
    cols_not_needed = ["Player ID", "Description", "Session ID", "Position"]
    METRICS = [col for col in visualization_df.columns if col not in cols_not_needed]
    METRIC = METRICS[0]
    PLAYERS = visualization_df["Player ID"].unique()
    PLAYER = PLAYERS[0]
    # Get the metric columns
    metric_columns = METRICS
    
    # Get unique IDs from the 'Player ID' column
    unique_ids = df['Player ID'].unique()
    
    # Create a subplot grid based on the number of metric columns
    num_metrics = len(metric_columns)
    num_rows = (num_metrics + 2) // 3  
    num_cols = min(num_metrics, 3)
    
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 5*num_rows))
    
    for idx, metric_col in enumerate(metric_columns):
        row = idx // num_cols
        col = idx % num_cols
        
        for unique_id in unique_ids:
            id_filter = df['Player ID'] == unique_id
            data = df[id_filter][['Description', metric_col]]
            
            ax = axes[row, col] if num_metrics > 1 else axes
            if unique_id == PLAYER:
                # Highlight points for ID = selected player with dots and labels
                ax.plot(data['Description'], data[metric_col], label=f'{unique_id}', color="#0b53c1", lw=2.4, zorder=10)
                ax.scatter(data['Description'], data[metric_col], fc="w", ec="#0b53c1", s=60, lw=2.4, zorder=12)
            else:
                # Plot other IDs normally
                ax.plot(data['Description'], data[metric_col], label=f'{unique_id}', color="#BFBFBF", lw=1.5)
                
            ax.set_title(metric_col)
            ax.set_xlabel('Description')
            ax.set_ylabel('Value')
            ax.legend()
    
    plt.tight_layout()
    plt.savefig('output/linechart.png')
    plt.close()



In [32]:
create_subplots_for_metrics(visualization_df)