In [None]:
%run clean_data.ipynb

In [None]:
import statsmodels.formula.api as smf

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
#Look at who Messi gets the ball from most often
to_messi = possession_df.query('`passingEvent.targetPlayer.nickname` == "Lionel Messi"')

to_messi.groupby(['passingEvent.passerPlayer.id', 'passingEvent.passerPlayer.nickname']).size().sort_values(ascending = False)

In [None]:
# Who RDP passes to most often
from_rdp = possession_df.query('`passingEvent.passerPlayer.nickname` == "Rodrigo de Paul"')

from_rdp.groupby(['passingEvent.targetPlayer.id', 'passingEvent.targetPlayer.nickname']).size().sort_values(ascending = False)

In [None]:
# Get and clean all passes from possession data
passes = possession_df.query('possessionEventType == "PA"').dropna(subset=['id','passingEvent.passerPlayer.id', 
                                                                               'passingEvent.targetPlayer.id']).copy()

In [None]:
passes['id'] = arg_passes['id'].astype(int)
passes['passingEvent.passerPlayer.id'] = arg_passes['passingEvent.passerPlayer.id'].astype(int)
passes['passingEvent.targetPlayer.id'] = arg_passes['passingEvent.targetPlayer.id'].astype(int)

In [None]:
#Join to cleaned tracking data, will only be Argentina passes
arg_passes_tracking = passes.merge(
    clean_tracking_df,
    left_on=['id', 'passingEvent.passerPlayer.id'],
    right_on=['possession_event_id', 'player.id'],# player_id matches the passer's ID
    how='inner'
)

In [None]:
arg_passes_tracking = arg_passes_tracking.merge(
    clean_tracking_df[['possession_event_id', 'player.id', 'x_normalized', 'y_normalized']],
    left_on=['possession_event_id', 'passingEvent.targetPlayer.id'],
    right_on=['possession_event_id', 'player.id'],  # Match receiver's ID
    how='inner',
    suffixes=('_passer', '_receiver')  # Distinguish passer and receiver columns
)

In [None]:
# Add new features pass distance and angle
arg_passes_tracking = arg_passes_tracking.assign(
    pass_distance=lambda df: np.sqrt(
        (df['x_normalized_receiver'] - df['x_normalized_passer'])**2 +
        (df['y_normalized_receiver'] - df['y_normalized_passer'])**2
    ),
    pass_angle=lambda df: np.abs(np.arctan2(
    df['y_normalized_receiver'] - df['y_normalized_passer'],
    df['x_normalized_receiver'] - df['x_normalized_passer']
) * (180 / np.pi))  # Convert to degrees
)

In [None]:
#Reduce granularity of categorical data

arg_passes_model_data = arg_passes_tracking[['passingEvent.ballHeightType', 'passingEvent.pressureType', 'passingEvent.passBodyType',
                                             'passingEvent.passType','pass_distance', 'pass_angle', 'passingEvent.passOutcomeType']]\
.assign(PressureType=lambda x: np.where(x['passingEvent.pressureType'].isna(), 'NO', x['passingEvent.pressureType']),
        BodyType=lambda x: np.where(x['passingEvent.passBodyType'].isin(["L", "R"]), x['passingEvent.passBodyType'], 'Other'),
        PassHeight=lambda x: np.where(x['passingEvent.ballHeightType'].isin(["G", "A"]), x['passingEvent.ballHeightType'], 'Other'),
        PassType=lambda x: np.where(x['passingEvent.passType'].isin(["S", "T", "O"]), x['passingEvent.passType'], 'Other'),
        PassOutcome=lambda x: np.where(x['passingEvent.passOutcomeType'] == "C", 1, 0)).copy()

In [None]:
arg_passes_model_data['PassType'] = pd.Series(pd.Categorical(arg_passes_model_data.PassType, categories=["S", "T", "O", "Other"]))
arg_passes_model_data['PressureType'] = pd.Series(pd.Categorical(arg_passes_model_data.PressureType, categories=["NO", "L", "P", "A"]))
arg_passes_model_data['PassHeight'] = pd.Series(pd.Categorical(arg_passes_model_data.PassHeight, categories=["G", "A", "Other"]))
arg_passes_model_data['BodyType'] = pd.Series(pd.Categorical(arg_passes_model_data.BodyType, categories=["R", "L", "Other"]))

reg = smf.logit('PassOutcome ~  PressureType + pass_distance + pass_angle + BodyType + PassHeight + PassType', 
                data=arg_passes_model_data).fit()

In [None]:
print(reg.summary())

In [None]:
np.exp(reg.params)