In [8]:
import pandas as pd

# Load your cleaned Transfermarkt data
df_tm = pd.read_csv('transfermarkt_history.csv', parse_dates=['date'])
df_tm = df_tm.sort_values('date')

# Set the date as the index, which is required for resampling
df_tm.set_index('date', inplace=True)

# Resample the data to a weekly frequency ('W').
# ffill() stands for 'forward fill' - it carries the last known value forward.
df_weekly = df_tm.resample('W').ffill()

# Now, let's create a placeholder for our other features
df_weekly['weekly_sentiment_score'] = 0.0  # Placeholder
df_weekly['goals_per_week'] = 0  # Placeholder

print("Weekly Resampled Market Value Data:")
df_weekly.info()
print(df_weekly.head(10))

Weekly Resampled Market Value Data:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 498 entries, 2015-12-06 to 2025-06-15
Freq: W-SUN
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   market_value_eur        498 non-null    int64  
 1   club                    498 non-null    object 
 2   weekly_sentiment_score  498 non-null    float64
 3   goals_per_week          498 non-null    int64  
dtypes: float64(1), int64(2), object(1)
memory usage: 19.5+ KB
            market_value_eur           club  weekly_sentiment_score  \
date                                                                  
2015-12-06             50000  AS Monaco U19                     0.0   
2015-12-13             50000  AS Monaco U19                     0.0   
2015-12-20             50000  AS Monaco U19                     0.0   
2015-12-27             50000  AS Monaco U19                     0.0   
2016-01-03            

In [9]:
import pandas as pd

# --- Part 1: Load and Prepare the Market Value Data (from before) ---
# NOTE: This part uses Mbappé's data, but we'll use the structure for our new player.
df_tm = pd.read_csv('transfermarkt_history.csv', parse_dates=['date'])
df_tm = df_tm.sort_values('date')
df_tm.set_index('date', inplace=True)
df_weekly_base = df_tm.resample('W').ffill()
# We will just use this as a template for the date range, but we'll fill it with new data.


# --- Part 2: Load and Process the Performance Data ---
df_performance = pd.read_csv('statsbomb_match_performance.csv')

# Convert match_date to datetime objects
df_performance['match_date'] = pd.to_datetime(df_performance['match_date'])

print("Loaded performance data for all players.")


# --- Part 3: Create a Weekly Timeline for a Specific Player ---
# Let's choose a player from our performance data, e.g., Bethany Mead
player_name_to_analyze = "Ellen White"
df_player_performance = df_performance[df_performance['player_name'] == player_name_to_analyze].copy()

# Set date as the index for resampling
df_player_performance.set_index('match_date', inplace=True)

# Resample performance data to weekly sums
df_weekly_performance = df_player_performance.resample('W').sum()
print(f"\nGenerated weekly performance summary for {player_name_to_analyze}:")
print(df_weekly_performance.head())


# --- Part 4: Create the Master DataFrame ---
# We'll use the weekly performance data as our base
df_master = df_weekly_performance.copy()

# Now, we merge the market value. For a real project, we'd have this player's market value.
# For now, we'll merge the structure we have and keep the placeholder values.
# This step combines the two dataframes based on their weekly date index.
df_master = df_master.join(df_weekly_base[['market_value_eur']], how='left')

# Forward-fill the market value for weeks where the player played but value didn't change
df_master['market_value_eur'].ffill(inplace=True)

# For weeks the player didn't play, we can fill missing stats with 0
df_master.fillna(0, inplace=True)


print(f"\n--- Generated Master Dataset for {player_name_to_analyze} ---")
df_master.info()
print(df_master.head(15))

# Save our first complete, model-ready dataset!
df_master.to_csv(f'master_dataset_{player_name_to_analyze.replace(" ", "_")}.csv')
print(f"\n✅ Master dataset saved for {player_name_to_analyze}")

Loaded performance data for all players.

Generated weekly performance summary for Ellen White:
            shots  goals  assists  key_passes  match_id
match_date                                             
2025-08-31     52     10        5           9  83040591

--- Generated Master Dataset for Ellen White ---
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1 entries, 2025-08-31 to 2025-08-31
Freq: W-SUN
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   shots             1 non-null      int64  
 1   goals             1 non-null      int64  
 2   assists           1 non-null      int64  
 3   key_passes        1 non-null      int64  
 4   match_id          1 non-null      int64  
 5   market_value_eur  1 non-null      float64
dtypes: float64(1), int64(5)
memory usage: 164.0 bytes
            shots  goals  assists  key_passes  match_id  market_value_eur
match_date                                      

  df_weekly_performance = df_player_performance.resample('W').sum()


In [10]:
import pandas as pd

# Load the performance data
df_perf = pd.read_csv('statsbomb_match_performance.csv')

# Count the number of matches for each player
player_match_counts = df_perf['player_name'].value_counts()

print("Top 10 players by number of matches played:")
print(player_match_counts.head(10))

# Get the name of the player with the most matches
most_active_player = player_match_counts.index[0]
print(f"\nPlayer with the most matches: '{most_active_player}' ({player_match_counts.iloc[0]} matches)")

Top 10 players by number of matches played:
Inessa Kaagman               22
Mary Alexandra Earps         22
Vivianne Miedema             22
Amalie Vevle Eikeland        22
Samantha May Kerr            22
Natasha Harding              22
Pernille Mosegaard Harder    22
Grace Fisk                   22
Aileen Whelan                22
Yana Daniëls                 22
Name: player_name, dtype: int64

Player with the most matches: 'Inessa Kaagman' (22 matches)
