<a href="https://colab.research.google.com/github/marclamberts/football-analysis/blob/main/Pressures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Import Libraries
import pandas as pd
import numpy as np

# Step 3: Load the Excel File into a DataFrame
# Update the file path according to the location in your Google Drive
file_path = '/content/drive/My Drive/BM2425.xlsx'
sheet_data = pd.read_excel(file_path)

# Display the first few rows to verify data is loaded correctly
print(sheet_data.head())

# Step 4: Define Function to Calculate Pressures
def calculate_pressures(sheet_data, sample_size=50):
    # Identify events where players have the ball (typeId is not NaN)
    ball_possession = sheet_data.dropna(subset=['typeId'])

    # Sample a subset of possession events (for testing or optimization)
    sampled_possession = ball_possession.sample(n=sample_size, random_state=42)

    # Initialize a list to collect pressure data
    pressure_data = []

    # Iterate through the sampled possession events
    for _, event in sampled_possession.iterrows():
        ball_handler_id = event['contestantId']
        ball_handler_name = event['playerName']
        ball_x, ball_y = event['x'], event['y']
        time_id = event['timeMin'] * 60 + event['timeSec']  # Convert time to seconds

        # Filter players in the same period and exclude the ball handler
        same_period_data = sheet_data[
            (sheet_data['periodId'] == event['periodId']) &
            (sheet_data['contestantId'] != ball_handler_id)
        ]

        # Calculate distances to the ball for all players in the same period
        same_period_data = same_period_data.assign(
            distance_to_ball=np.sqrt(
                (same_period_data['x'] - ball_x) ** 2 + (same_period_data['y'] - ball_y) ** 2
            )
        )

        # Append relevant data to the pressure list
        for _, player in same_period_data.iterrows():
            pressure_data.append({
                'ball_handler_id': ball_handler_id,
                'ball_handler_name': ball_handler_name,
                'pressure_player_id': player['contestantId'],
                'pressure_player_name': player['playerName'],
                'time': time_id,
                'distance_to_ball': player['distance_to_ball']
            })

    # Convert the collected data into a DataFrame
    pressure_df = pd.DataFrame(pressure_data)
    return pressure_df

# Step 5: Calculate Pressures
pressure_df = calculate_pressures(sheet_data)

# Step 6: Display Results
print(pressure_df.head())

# Step 7 (Optional): Save the Results to a File in Google Drive
output_path = '/content/drive/My Drive/PressureResults.xlsx'
pressure_df.to_excel(output_path, index=False)
print(f"Pressure results saved to: {output_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
           id  eventId  typeId  periodId  timeMin  timeSec  \
0  2713245567        1      34        16        0        0   
1  2713246813        1      34        16        0        0   
2  2713302913        2      32         1        0        0   
3  2713302931        2      32         1        0        0   
4  2713303059        3       1         1        0        0   

                contestantId  outcome     x     y  ... In-swinger Out-swinger  \
0  a8l3w3n0j99qjlsxj3jnmgkz1        1   0.0   0.0  ...          0           0   
1  apoawtpvac4zqlancmvw4nk4o        1   0.0   0.0  ...          0           0   
2  apoawtpvac4zqlancmvw4nk4o        1   0.0   0.0  ...          0           0   
3  a8l3w3n0j99qjlsxj3jnmgkz1        1   0.0   0.0  ...          0           0   
4  apoawtpvac4zqlancmvw4nk4o        1  50.0  50.1  ...          0           0   

   Straight

In [4]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Import Libraries
import pandas as pd
import numpy as np

# Step 3: Load the Excel File into a DataFrame
# Update the file path according to the location in your Google Drive
file_path = '/content/drive/My Drive/BM2425.xlsx'
sheet_data = pd.read_excel(file_path)

# Display the first few rows to verify data is loaded correctly
print(sheet_data.head())

# Step 4: Calculate Pressures and Passes
def calculate_pressures_and_passes(sheet_data):
    # Identify events where players have the ball (typeId is not NaN)
    ball_possession = sheet_data.dropna(subset=['typeId'])

    # Initialize data structures
    player_stats = {}

    # Iterate over ball possession events
    for _, event in ball_possession.iterrows():
        ball_handler_id = event['contestantId']
        ball_handler_name = event['playerName']
        ball_x, ball_y = event['x'], event['y']
        event_type = event['typeId']
        event_outcome = event['outcome']

        # Filter players in the same period, excluding the ball handler
        same_period_data = sheet_data[
            (sheet_data['periodId'] == event['periodId']) &
            (sheet_data['contestantId'] != ball_handler_id)
        ]

        # Calculate distances to the ball
        same_period_data = same_period_data.assign(
            distance_to_ball=np.sqrt(
                (same_period_data['x'] - ball_x) ** 2 + (same_period_data['y'] - ball_y) ** 2
            )
        )

        # Find players within a threshold distance (e.g., within 5 units) for pressure
        pressure_players = same_period_data[same_period_data['distance_to_ball'] <= 5]

        # Update pressures made and received
        for _, player in pressure_players.iterrows():
            presser_id = player['contestantId']
            presser_name = player['playerName']

            # Initialize stats for both ball handler and presser if not present
            if ball_handler_id not in player_stats:
                player_stats[ball_handler_id] = {
                    'player_name': ball_handler_name,
                    'pressures_received': 0,
                    'pressures_made': 0,
                    'passes_under_pressure': 0,
                    'successful_passes': 0
                }
            if presser_id not in player_stats:
                player_stats[presser_id] = {
                    'player_name': presser_name,
                    'pressures_received': 0,
                    'pressures_made': 0,
                    'passes_under_pressure': 0,
                    'successful_passes': 0
                }

            # Update stats
            player_stats[ball_handler_id]['pressures_received'] += 1
            player_stats[presser_id]['pressures_made'] += 1

        # Check if the ball handler made a pass under pressure
        if event_type == 1:  # Pass event
            if not pressure_players.empty:  # Pass made under pressure
                player_stats[ball_handler_id]['passes_under_pressure'] += 1
                if event_outcome == 1:  # Successful pass
                    player_stats[ball_handler_id]['successful_passes'] += 1

    # Convert player stats to a DataFrame
    stats_list = [
        {
            'player_id': player_id,
            'player_name': stats['player_name'],
            'pressures_made': stats['pressures_made'],
            'pressures_received': stats['pressures_received'],
            'passes_under_pressure': stats['passes_under_pressure'],
            'pass_success_ratio': stats['successful_passes'] / stats['passes_under_pressure'] if stats['passes_under_pressure'] > 0 else 0
        }
        for player_id, stats in player_stats.items()
    ]

    stats_df = pd.DataFrame(stats_list)
    return stats_df

# Step 5: Calculate Stats
player_stats_df = calculate_pressures_and_passes(sheet_data)

# Step 6: Display Results
print(player_stats_df.head())

# Step 7 (Optional): Save the Results to a File in Google Drive
output_path = '/content/drive/My Drive/PlayerStats.xlsx'
player_stats_df.to_excel(output_path, index=False)
print(f"Player stats saved to: {output_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
           id  eventId  typeId  periodId  timeMin  timeSec  \
0  2713245567        1      34        16        0        0   
1  2713246813        1      34        16        0        0   
2  2713302913        2      32         1        0        0   
3  2713302931        2      32         1        0        0   
4  2713303059        3       1         1        0        0   

                contestantId  outcome     x     y  ... In-swinger Out-swinger  \
0  a8l3w3n0j99qjlsxj3jnmgkz1        1   0.0   0.0  ...          0           0   
1  apoawtpvac4zqlancmvw4nk4o        1   0.0   0.0  ...          0           0   
2  apoawtpvac4zqlancmvw4nk4o        1   0.0   0.0  ...          0           0   
3  a8l3w3n0j99qjlsxj3jnmgkz1        1   0.0   0.0  ...          0           0   
4  apoawtpvac4zqlancmvw4nk4o        1  50.0  50.1  ...          0           0   

   Straight

In [5]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Import Libraries
import pandas as pd
import numpy as np

# Step 3: Load the Excel File into a DataFrame
# Update the file path according to the location in your Google Drive
file_path = '/content/drive/My Drive/BM2425.xlsx'
sheet_data = pd.read_excel(file_path)

# Display the first few rows to verify data is loaded correctly
print(sheet_data.head())

# Step 4: Calculate Pressures and Passes
def calculate_pressures_and_passes(sheet_data):
    # Identify events where players have the ball (typeId is not NaN)
    ball_possession = sheet_data.dropna(subset=['typeId'])

    # Initialize data structures
    player_stats = {}

    # Iterate over ball possession events
    for _, event in ball_possession.iterrows():
        ball_handler_id = event['contestantId']
        ball_handler_name = event['playerName']
        ball_x, ball_y = event['x'], event['y']
        event_type = event['typeId']
        event_outcome = event['outcome']

        # Filter players in the same period, excluding the ball handler
        same_period_data = sheet_data[
            (sheet_data['periodId'] == event['periodId']) &
            (sheet_data['contestantId'] != ball_handler_id)
        ]

        # Calculate distances to the ball
        same_period_data = same_period_data.assign(
            distance_to_ball=np.sqrt(
                (same_period_data['x'] - ball_x) ** 2 + (same_period_data['y'] - ball_y) ** 2
            )
        )

        # Find players within a threshold distance (e.g., within 5 units) for pressure
        pressure_players = same_period_data[same_period_data['distance_to_ball'] <= 5]

        # Update pressures made and received
        for _, player in pressure_players.iterrows():
            presser_id = player['contestantId']
            presser_name = player['playerName']

            # Initialize stats for both ball handler and presser if not present
            if ball_handler_id not in player_stats:
                player_stats[ball_handler_id] = {
                    'player_name': ball_handler_name,
                    'pressures_received': 0,
                    'pressures_made': 0,
                    'passes_under_pressure': 0,
                    'successful_passes': 0
                }
            if presser_id not in player_stats:
                player_stats[presser_id] = {
                    'player_name': presser_name,
                    'pressures_received': 0,
                    'pressures_made': 0,
                    'passes_under_pressure': 0,
                    'successful_passes': 0
                }

            # Update stats
            player_stats[ball_handler_id]['pressures_received'] += 1
            player_stats[presser_id]['pressures_made'] += 1

        # Check if the ball handler made a pass under pressure
        if event_type == 1:  # Pass event
            if not pressure_players.empty:  # Pass made under pressure
                player_stats[ball_handler_id]['passes_under_pressure'] += 1
                if event_outcome == 1:  # Successful pass
                    player_stats[ball_handler_id]['successful_passes'] += 1

    # Convert player stats to a DataFrame
    stats_list = [
        {
            'player_name': stats['player_name'],  # Only include player_name
            'pressures_made': stats['pressures_made'],
            'pressures_received': stats['pressures_received'],
            'passes_under_pressure': stats['passes_under_pressure'],
            'pass_success_ratio': stats['successful_passes'] / stats['passes_under_pressure'] if stats['passes_under_pressure'] > 0 else 0
        }
        for stats in player_stats.values()
    ]

    stats_df = pd.DataFrame(stats_list)
    return stats_df

# Step 5: Calculate Stats
player_stats_df = calculate_pressures_and_passes(sheet_data)

# Step 6: Display Results
print(player_stats_df.head())

# Step 7 (Optional): Save the Results to a File in Google Drive
output_path = '/content/drive/My Drive/PlayerStats.xlsx'
player_stats_df.to_excel(output_path, index=False)
print(f"Player stats saved to: {output_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
           id  eventId  typeId  periodId  timeMin  timeSec  \
0  2713245567        1      34        16        0        0   
1  2713246813        1      34        16        0        0   
2  2713302913        2      32         1        0        0   
3  2713302931        2      32         1        0        0   
4  2713303059        3       1         1        0        0   

                contestantId  outcome     x     y  ... In-swinger Out-swinger  \
0  a8l3w3n0j99qjlsxj3jnmgkz1        1   0.0   0.0  ...          0           0   
1  apoawtpvac4zqlancmvw4nk4o        1   0.0   0.0  ...          0           0   
2  apoawtpvac4zqlancmvw4nk4o        1   0.0   0.0  ...          0           0   
3  a8l3w3n0j99qjlsxj3jnmgkz1        1   0.0   0.0  ...          0           0   
4  apoawtpvac4zqlancmvw4nk4o        1  50.0  50.1  ...          0           0   

   Straight