In [6]:
import pandas as pd

# Load the original dataset
file_path = 'BM2425.xlsx'
data = pd.read_excel(file_path, sheet_name='Sheet1')

# Load the xG dataset
xg_file_path = 'BMxg.xlsx'  # Update this path to your xG data file
xg_data = pd.read_excel(xg_file_path)

# Preview the xG dataset
print(xg_data.head())

# Merge xG data with the original dataset on 'x' and 'y'
merged_data = pd.merge(data, xg_data, on=['x', 'y'], how='left')

# Rename the xG column if necessary
merged_data = merged_data.rename(columns={'xG': 'calculated_xG'})

# Save the updated dataset
output_file = 'totalxg.xlsx'
merged_data.to_excel(output_file, index=False)
print(f"Updated dataset with xG saved to {output_file}")


   Unnamed: 0        xG                      Date     PlayerId  \
0           1  0.347147  2024-10-06T15:31:13.155Z     M. Olise   
1           2  0.066620  2024-10-06T15:36:54.452Z     M. Olise   
2           3  0.078832  2024-10-06T15:37:46.947Z  A. Pavlović   
3           4  0.092364  2024-10-06T15:39:29.691Z    T. Müller   
4           5  0.032533  2024-10-06T15:44:02.894Z     M. Olise   

              TeamId             HomeTeam           AwayTeam  timeMin  \
0  FC Bayern München  Eintracht Frankfurt  FC Bayern München        1   
1  FC Bayern München  Eintracht Frankfurt  FC Bayern München        6   
2  FC Bayern München  Eintracht Frankfurt  FC Bayern München        7   
3  FC Bayern München  Eintracht Frankfurt  FC Bayern München        9   
4  FC Bayern München  Eintracht Frankfurt  FC Bayern München       13   

   timeSec     x  ...  isIntentionalAssist  isAssistedShot  isOwnGoal  \
0        4  88.3  ...                  Yes             Yes      False   
1       45  84.0  

In [10]:
import pandas as pd

# Load the dataset
file_path = 'totalxg.xlsx'
data = pd.read_excel(file_path, sheet_name='Sheet1')

# Preview the data
print(data.head())

# Preprocessing: Keep relevant columns
columns = ['id', 'eventId', 'typeId', 'contestantId', 'playerName', 'outcome', 'timeMin_x', 'timeSec_x', 'x', 'y', 'GoalMouthY', 'GoalMouthZ', 'calculated_xG']
data = data[columns]

# Create a timestamp for sorting
data['timestamp'] = data['timeMin_x'] * 60 + data['timeSec_x']

# Sort data by timestamp
data = data.sort_values(by=['timestamp'])

# Define event types
SHOT_TYPE_IDS = [13, 14, 15, 16]  # IDs for shots
PASS_TYPE_ID = 2  # Replace with actual typeId for passes
CARRY_TYPE_ID = 3  # Replace with actual typeId for carries

# Build xG Chains
# Group by playerName and chain events for passes, carries, and shots
chains = []
chain = []
last_player = None

for _, row in data.iterrows():
    if row['typeId'] in SHOT_TYPE_IDS + [PASS_TYPE_ID, CARRY_TYPE_ID]:
        if last_player is not None and row['playerName'] != last_player:
            chains.append({'playerName': last_player, 'events': chain})
            chain = []
        chain.append(row)
        last_player = row['playerName']
    else:
        if chain:
            chains.append({'playerName': last_player, 'events': chain})
            chain = []

# Summarize xG contributions per player
player_xg_summary = []

for chain in chains:
    player_name = chain['playerName']
    total_xG = sum(event['calculated_xG'] for event in chain['events'])
    chain_length = len(chain['events'])
    player_xg_summary.append({'playerName': player_name, 'chain_length': chain_length, 'total_xG': total_xG})

# Convert to DataFrame
player_xg_summary_df = pd.DataFrame(player_xg_summary)

# Group by playerName to aggregate xG contributions across chains
player_xg_summary_df = player_xg_summary_df.groupby('playerName').agg({
    'chain_length': 'sum',
    'total_xG': 'sum'
}).reset_index()

# Display summarized xG chain data per player
print(player_xg_summary_df)

# Optional: Save the output
output_path = 'xg_chain_per_player.xlsx'
player_xg_summary_df.to_excel(output_path, index=False)
print(f"xG chain summary per player saved to {output_path}")


           id  eventId  typeId  periodId  timeMin_x  timeSec_x  \
0  2713245567        1      34        16          0          0   
1  2713246813        1      34        16          0          0   
2  2713302913        2      32         1          0          0   
3  2713302931        2      32         1          0          0   
4  2713303059        3       1         1          0          0   

                contestantId  outcome     x     y  ... isIntentionalAssist  \
0  a8l3w3n0j99qjlsxj3jnmgkz1        1   0.0   0.0  ...                 NaN   
1  apoawtpvac4zqlancmvw4nk4o        1   0.0   0.0  ...                 NaN   
2  apoawtpvac4zqlancmvw4nk4o        1   0.0   0.0  ...                 NaN   
3  a8l3w3n0j99qjlsxj3jnmgkz1        1   0.0   0.0  ...                 NaN   
4  apoawtpvac4zqlancmvw4nk4o        1  50.0  50.1  ...                 NaN   

  isAssistedShot  isOwnGoal_y  expandedMinute Goal  Time_in_sec  Gamestate  \
0            NaN          NaN             NaN  NaN      