In [2]:
import pandas as pd
import numpy as np
from math import sqrt
import os
import pysubgroup as ps

In [3]:
PATH: str = "../data/spadl_format/"
LEAGUES: list[str] = ["England", "Spain", "France", "Italy", "Germany"]

teams_df = pd.read_json("../data/wyscout/teams/teams.json")
players_df = pd.read_json("../data/wyscout/players/players.json")
ranking_df = pd.read_json('../data/wyscout/playeranks/playeranks.json')

df_dict = {}
if not os.path.exists("../data/processed/"):
    os.mkdir("../data/processed")
for league in LEAGUES:

    df = pd.read_csv(f"{PATH}{league}.csv", index_col=0)

    # remove not used columns
    df.drop(["original_event_id", "result_name","bodypart_id", "type_id"], inplace=True, axis=1)

    df_dict[league] = df
all_df = pd.concat([df for df in df_dict.values()])
all_df

Unnamed: 0,game_id,period_id,time_seconds,team_id,player_id,start_x,start_y,end_x,end_y,result_id,action_id,type_name,bodypart_name,player_name
0,2499719,1,2.758649,1609,25413,51.45,34.68,32.55,14.96,1,0,pass,foot,A. Lacazette
1,2499719,1,4.946850,1609,370224,32.55,14.96,53.55,17.00,1,1,pass,foot,R. Holding
2,2499719,1,6.542188,1609,3319,53.55,17.00,36.75,19.72,1,2,pass,head,M. Özil
3,2499719,1,8.143395,1609,120339,36.75,19.72,43.05,3.40,1,3,pass,head,Mohamed Elneny
4,2499719,1,10.302366,1609,167145,43.05,3.40,75.60,8.16,1,4,pass,foot,Bellerín
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389146,2517044,2,2817.761761,2463,94831,87.15,36.72,87.15,36.72,0,1148,shot,foot_right,Y. Ōsako
389147,2517044,2,2818.280436,2451,14917,17.85,31.96,23.10,27.20,1,1149,interception,foot,R. Knoche
389148,2517044,2,2823.180681,2451,14804,23.10,27.20,0.00,3.40,1,1150,dribble,foot,J. Błaszczykowski
389149,2517044,2,2828.080925,2451,14804,0.00,3.40,5.25,4.76,0,1151,pass,foot,J. Błaszczykowski


In [4]:
print(all_df["type_name"].unique())

['pass' 'interception' 'dribble' 'take_on' 'tackle' 'foul'
 'freekick_short' 'cross' 'shot' 'clearance' 'throw_in' 'goalkick'
 'corner_short' 'corner_crossed' 'keeper_save' 'freekick_crossed'
 'shot_freekick' 'bad_touch' 'shot_penalty']


## Pre-process

Gerando variáveis *shot_distance_from_goal* e *shot_angle_from_goal*, além de contar a quantidade de eventos, passes e dribles até o chute. Essa contagem está sendo feita **a partir do momento que o time que chuta tem a posse de bola** (o que talvez possa pegar mais eventos do que a jogada em si).

Ainda não foi feita discretização das coordenadas (conforme tabela 4.2), e nem algumas das variáveis da seção 3.3 (por não estarem na 4). Precisa ser discutido. 

In [5]:
GOAL_CENTER_X: int = 105
GOAL_CENTER_Y: int = 34

UPPER_CROSSBAR_X: int = 105
UPPER_CROSSBAR_Y: int = 38

LOWER_CROSSBAR_X: int = 105
LOWER_CROSSBAR_Y: int = 30

def get_shot_angle(shot_pos_x: np.float64, shot_pos_y: np.float64) -> np.float64:
    v1: np.array = np.array([UPPER_CROSSBAR_X - shot_pos_x, UPPER_CROSSBAR_Y - shot_pos_y])
    v2: np.array = np.array([LOWER_CROSSBAR_X - shot_pos_x, LOWER_CROSSBAR_Y - shot_pos_y])
    return np.arccos(np.dot(v1 / np.linalg.norm(v1), v2 / np.linalg.norm(v2)))

def calcular_ranking_medio(player_id):
    jogador_rankings = ranking_df[ranking_df['playerId'] == player_id]
    if jogador_rankings.empty:
        return -0.2
    ranking_ponderado = np.sum(jogador_rankings['playerankScore'] * jogador_rankings['minutesPlayed']) / jogador_rankings['minutesPlayed'].sum()
    return ranking_ponderado

def generate_shots_with_counts_events(df):
    shot_data = []
    result_ids = []
    grouped = df.groupby(['game_id', 'period_id'])
    
    for (game_id, period_id), group in grouped:
        group = group.sort_values(by='time_seconds').reset_index(drop=True)
        start_index = 0

        while start_index < len(group):
            shot_index = group[start_index:].index[group['type_name'][start_index:] == 'shot']
            if len(shot_index) == 0:
                break
            shot_index = shot_index[0]

            shot_row = group.loc[shot_index]
            play_events = group.loc[start_index:shot_index]
            
            # Encontrar o índice onde o time que fez o chute tomou posse da bola
            for idx in play_events.index[::-1]:
                if play_events.loc[idx, 'team_id'] != shot_row['team_id']:
                    start_index = idx + 1
                    break
            else:
                start_index = play_events.index[0]

            play_events = group.loc[start_index:shot_index]

            player_rank = calcular_ranking_medio(shot_row['player_id'])
            
            shot_data.append({
                'game_id': game_id,
                'period_id': period_id,
                'team_id': shot_row['team_id'],
                'player_id': shot_row['player_id'],
                'time_seconds': shot_row['time_seconds'],
                'start_x': shot_row['start_x'],
                'start_y': shot_row['start_y'],
                'end_x': shot_row['end_x'],
                'end_y': shot_row['end_y'],
                'num_events': len(play_events),
                'num_passes': (play_events['type_name'] == 'pass').sum(),
                'num_dribbles': (play_events['type_name'] == 'dribble').sum(),
                'play_duration': play_events['time_seconds'].iloc[-1] - play_events['time_seconds'].iloc[0],
                'player_rank': player_rank,
                'bodypart_name': shot_row['bodypart_name'],
            })

            result_ids.append(shot_row['result_id'])
            
            # Atualizar o índice de início para a próxima jogada
            start_index = shot_index + 1

    shots_df = pd.DataFrame(shot_data)
    shots_df["shot_distance_from_goal"] = shots_df.apply(lambda x: sqrt((x["start_x"] - GOAL_CENTER_X)**2 + (x["start_y"] - GOAL_CENTER_Y)**2), axis=1)
    shots_df["shot_angle_from_goal"] = shots_df[["start_x", "start_y"]].apply(lambda pos: get_shot_angle(pos["start_x"], pos["start_y"]), axis=1)
    shots_df["result_id"] = result_ids
    return shots_df

shots_df = generate_shots_with_counts_events(all_df)

In [6]:
shots_df.head()

Unnamed: 0,game_id,period_id,team_id,player_id,time_seconds,start_x,start_y,end_x,end_y,num_events,num_passes,num_dribbles,play_duration,player_rank,bodypart_name,shot_distance_from_goal,shot_angle_from_goal,result_id
0,2499719,1,1609,25413,94.595788,92.4,40.12,105.0,37.4,7,4,0,19.858647,0.022208,foot_right,14.007655,0.509981,1
1,2499719,1,1631,26150,179.854785,89.25,32.64,105.0,40.8,2,0,0,4.546657,0.011654,foot_left,15.808608,0.494098,0
2,2499719,1,1631,14763,254.745027,100.8,32.64,105.0,34.0,7,3,0,18.935591,0.010892,head/other,4.414703,1.46731,1
3,2499719,1,1609,7868,425.824035,85.05,45.56,105.0,40.8,6,3,1,12.967477,0.016258,foot_left,23.057235,0.300168,0
4,2499719,1,1609,7868,815.462015,78.75,47.6,105.0,37.4,1,0,0,0.0,0.016258,foot_right,29.563872,0.24003,0


## Binary Goal/Not Goal

In [7]:
shots_df_cp = shots_df.copy()
shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.BinaryTarget('result_id', 1)

search_space = ps.create_selectors(shots_df_cp, ignore=['result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(shots_df_cp, target, search_space, result_set_size=10, depth=2, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch()
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    # print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

Quality: 0.032996125330170024
Subgroup: end_x>=105.0 AND shot_angle_from_goal>=0.60
----------------------------------------
Quality: 0.03243715447894347
Subgroup: end_x>=105.0 AND shot_distance_from_goal<11.26
----------------------------------------
Quality: 0.031155481651607888
Subgroup: shot_angle_from_goal>=0.60
----------------------------------------
Quality: 0.030839137893783095
Subgroup: shot_distance_from_goal<11.26
----------------------------------------
Quality: 0.029720454021914134
Subgroup: shot_angle_from_goal>=0.60 AND shot_distance_from_goal<11.26
----------------------------------------
Quality: 0.027978658146957595
Subgroup: end_x>=105.0 AND start_x>=96.60
----------------------------------------
Quality: 0.02758813287621146
Subgroup: shot_distance_from_goal<11.26 AND start_x>=96.60
----------------------------------------
Quality: 0.0264073773750995
Subgroup: shot_angle_from_goal>=0.60 AND start_x>=96.60
----------------------------------------
Quality: 0.026308026

## XG

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from xgboost import XGBClassifier

In [10]:
# constans
RANDOM_STATE: int = 123
TEST_SIZE: float  = 0.3

shots_df_cp = shots_df.copy()


# Random Forest Classifier
rfc = RandomForestClassifier(random_state=RANDOM_STATE)
X = shots_df_cp[["bodypart_name", "shot_distance_from_goal", "shot_angle_from_goal"]]
X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)
y = shots_df_cp["result_id"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
rfc.fit(X=X_train, y=y_train)
y_pred = rfc.predict(X=X_test)
classification_report(y_test, y_pred, output_dict=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)


{'0': {'precision': 0.9070434415858287,
  'recall': 0.9876010286554004,
  'f1-score': 0.9456096381304138,
  'support': 10888.0},
 '1': {'precision': 0.5246478873239436,
  'recall': 0.11910471622701839,
  'f1-score': 0.19413680781758957,
  'support': 1251.0},
 'accuracy': 0.8980970425899992,
 'macro avg': {'precision': 0.7158456644548862,
  'recall': 0.5533528724412095,
  'f1-score': 0.5698732229740017,
  'support': 12139.0},
 'weighted avg': {'precision': 0.8676351840372977,
  'recall': 0.8980970425899992,
  'f1-score': 0.8681656550410867,
  'support': 12139.0}}

In [11]:
shots_df_cp["xg"] = rfc.predict(X=X)


In [12]:
print("Comparação 0/1: ", shots_df_cp[shots_df_cp["xg"]==0].shape[0], " VS " , shots_df_cp[shots_df_cp["xg"]==1].shape[0])


Comparação 0/1:  39526  VS  935


In [13]:
shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.BinaryTarget('xg', 1)

search_space = ps.create_selectors(shots_df_cp, ignore=['xg'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(shots_df_cp, target, search_space, result_set_size=10, depth=2, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch()
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    # print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

Quality: 0.018716662379767844
Subgroup: shot_angle_from_goal>=0.60 AND start_x>=96.60
----------------------------------------
Quality: 0.018398288803861684
Subgroup: shot_distance_from_goal<11.26 AND start_x>=96.60
----------------------------------------
Quality: 0.018238065116663007
Subgroup: shot_angle_from_goal>=0.60 AND shot_distance_from_goal<11.26
----------------------------------------
Quality: 0.017846586326198236
Subgroup: shot_distance_from_goal<11.26
----------------------------------------
Quality: 0.017443045908338128
Subgroup: shot_angle_from_goal>=0.60
----------------------------------------
Quality: 0.01716379535033011
Subgroup: end_x>=105.0 AND shot_distance_from_goal<11.26
----------------------------------------
Quality: 0.017079852018944016
Subgroup: start_x>=96.60
----------------------------------------
Quality: 0.016838085597729025
Subgroup: end_x>=105.0 AND shot_angle_from_goal>=0.60
----------------------------------------
Quality: 0.016540572174539914
Subg

## VAEP

In [14]:
from tqdm import tqdm
import socceraction.spadl as spd
from socceraction.vaep import features as ft
import socceraction.vaep.labels as lab
import socceraction.vaep.formula as fm
import xgboost as xgb
import sklearn.metrics as mt

In [15]:
def features_transform(spadl):
    spadl.loc[spadl.result_id.isin([2, 3]), ["result_id"]] = 0
    spadl.loc[spadl.result_name.isin(["offside", "owngoal"]), ["result_name"]] = "fail"

    xfns = [
        ft.actiontype_onehot,
        ft.bodypart_onehot,
        ft.result_onehot,
        ft.goalscore,
        ft.startlocation,
        ft.endlocation,
        ft.team,
        ft.time,
        ft.time_delta
    ]

    features = []
    for game in tqdm(np.unique(spadl.game_id).tolist()):
        match_actions = spadl.loc[spadl.game_id == game].reset_index(drop=True)
        match_states = ft.gamestates(actions=match_actions)
        match_feats = pd.concat([fn(match_states) for fn in xfns], axis=1)
        features.append(match_feats)
    features = pd.concat(features).reset_index(drop=True)

    return features

def labels_transform(spadl):
    yfns = [lab.scores, lab.concedes]

    labels = []
    for game in tqdm(np.unique(spadl.game_id).tolist()):
        match_actions = spadl.loc[spadl.game_id == game].reset_index(drop=True)
        labels.append(pd.concat([fn(actions=match_actions) for fn in yfns], axis=1))

    labels = pd.concat(labels).reset_index(drop=True)

    return labels

def train_vaep(X_train, y_train, X_test, y_test):
    models = {}
    for m in ["scores", "concedes"]:
        models[m] = xgb.XGBClassifier(random_state=0, n_estimators=50, max_depth=3)

        print("training " + m + " model")
        models[m].fit(X_train, y_train[m])

        p = sum(y_train[m]) / len(y_train[m])
        base = [p] * len(y_train[m])
        y_train_pred = models[m].predict_proba(X_train)[:, 1]
        train_brier = mt.brier_score_loss(y_train[m], y_train_pred) / mt.brier_score_loss(y_train[m], base)
        print(m + " Train NBS: " + str(train_brier))
        print()

        p = sum(y_test[m]) / len(y_test[m])
        base = [p] * len(y_test[m])
        y_test_pred = models[m].predict_proba(X_test)[:, 1]
        test_brier = mt.brier_score_loss(y_test[m], y_test_pred) / mt.brier_score_loss(y_test[m], base)
        print(m + " Test NBS: " + str(test_brier))
        print()

        print("----------------------------------------")

    return models

def generate_predictions(features, models):
    preds = {}
    for m in ["scores", "concedes"]:
        preds[m] = models[m].predict_proba(features)[:, 1]
    preds = pd.DataFrame(preds)

    return preds

def calculate_action_values(spadl, predictions):
    action_values = fm.value(actions=spadl, Pscores=predictions["scores"], Pconcedes=predictions["concedes"])
    action_values = pd.concat([
        spadl[["original_event_id", "player_id", "action_id", "game_id", "start_x", "start_y", "end_x", "end_y", "type_name", "result_name"]],
        predictions.rename(columns={"scores": "Pscores", "concedes": "Pconcedes"}),
        action_values
    ], axis=1)

    return action_values


In [16]:
spadl = {}
for league in LEAGUES:
    spadl[league] = pd.read_csv(f"../data/spadl_format/{league}.csv")

features = {}
for league in LEAGUES:
    features[league] = features_transform(spadl[league])

labels = {}
for league in LEAGUES:
    labels[league] = labels_transform(spadl[league])

models = train_vaep(X_train=features["England"], y_train=labels["England"], X_test=features["Spain"], y_test=labels["Spain"])


100%|██████████| 380/380 [00:06<00:00, 62.77it/s]
100%|██████████| 380/380 [00:06<00:00, 63.08it/s]
100%|██████████| 380/380 [00:06<00:00, 63.00it/s]
100%|██████████| 380/380 [00:06<00:00, 63.03it/s]
100%|██████████| 306/306 [00:04<00:00, 63.80it/s]
100%|██████████| 380/380 [00:06<00:00, 54.53it/s]
100%|██████████| 380/380 [00:07<00:00, 53.85it/s]
100%|██████████| 380/380 [00:06<00:00, 54.29it/s]
100%|██████████| 380/380 [00:07<00:00, 53.85it/s]
100%|██████████| 306/306 [00:05<00:00, 54.00it/s]


training scores model
scores Train NBS: 0.8452471194228581

scores Test NBS: 0.8503677630926355

----------------------------------------
training concedes model
concedes Train NBS: 0.9660641623881886

concedes Test NBS: 0.9766251611701147

----------------------------------------


In [17]:
preds = {}
action_values = {}

for league in LEAGUES:
    preds[league] = generate_predictions(features=features[league], models=models)
    action_values[league] = calculate_action_values(spadl=spadl[league], predictions=preds[league])

all_action_values = pd.concat([df for df in action_values.values()])

In [18]:
shots_df.head(1)

Unnamed: 0,game_id,period_id,team_id,player_id,time_seconds,start_x,start_y,end_x,end_y,num_events,num_passes,num_dribbles,play_duration,player_rank,bodypart_name,shot_distance_from_goal,shot_angle_from_goal,result_id
0,2499719,1,1609,25413,94.595788,92.4,40.12,105.0,37.4,7,4,0,19.858647,0.022208,foot_right,14.007655,0.509981,1


In [19]:
all_action_values.head(1)

Unnamed: 0,original_event_id,player_id,action_id,game_id,start_x,start_y,end_x,end_y,type_name,result_name,Pscores,Pconcedes,offensive_value,defensive_value,vaep_value
0,177959171.0,25413,0,2499719,51.45,34.68,32.55,14.96,pass,success,0.003555,0.00056,0.0,-0.0,0.0


In [20]:
shots_df_cp = shots_df.copy()
all_action_values_cp = all_action_values.copy()
all_action_values_cp.drop(["original_event_id", "result_name", "action_id", "type_name"], inplace=True, axis=1)

shots_df_cp = shots_df_cp.merge(all_action_values_cp, on=['game_id', 'player_id', 'start_x', 'start_y', 'end_x', 'end_y'], how='left')

# Exibir o dataframe resultante
shots_df_cp.head()

Unnamed: 0,game_id,period_id,team_id,player_id,time_seconds,start_x,start_y,end_x,end_y,num_events,...,player_rank,bodypart_name,shot_distance_from_goal,shot_angle_from_goal,result_id,Pscores,Pconcedes,offensive_value,defensive_value,vaep_value
0,2499719,1,1609,25413,94.595788,92.4,40.12,105.0,37.4,7,...,0.022208,foot_right,14.007655,0.509981,1,0.978135,0.002137,0.902766,-0.000387,0.902379
1,2499719,1,1631,26150,179.854785,89.25,32.64,105.0,40.8,2,...,0.011654,foot_left,15.808608,0.494098,0,0.018184,0.007322,-0.020396,-0.003519,-0.023916
2,2499719,1,1631,14763,254.745027,100.8,32.64,105.0,34.0,7,...,0.010892,head/other,4.414703,1.46731,1,0.977107,0.00229,0.88553,0.000666,0.886196
3,2499719,1,1609,7868,425.824035,85.05,45.56,105.0,40.8,6,...,0.016258,foot_left,23.057235,0.300168,0,0.021434,0.002819,-0.004685,-0.001744,-0.006429
4,2499719,1,1609,7868,815.462015,78.75,47.6,105.0,37.4,1,...,0.016258,foot_right,29.563872,0.24003,0,0.017245,0.005117,-0.019283,-0.002159,-0.021442


In [21]:
shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.NumericTarget('Pscores')

search_space = ps.create_selectors(shots_df_cp, ignore=["Pscores", "Pconcedes", "offensive_value", "defensive_value", "vaep_value"])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(shots_df_cp, target, search_space, result_set_size=10, depth=2, qf=ps.StandardQFNumeric(0))
# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch()

result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    # print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

Quality: 0.3814171552658081
Subgroup: num_dribbles>=3 AND num_passes: [0:1[
----------------------------------------
Quality: 0.36441946029663086
Subgroup: end_y: [34.68:40.80[ AND shot_distance_from_goal<11.26
----------------------------------------
Quality: 0.36410027742385864
Subgroup: end_y: [34.68:40.80[ AND start_y: [25.84:31.96[
----------------------------------------
Quality: 0.35834449529647827
Subgroup: end_y: [34.68:40.80[ AND shot_angle_from_goal>=0.60
----------------------------------------
Quality: 0.33168739080429077
Subgroup: end_y: [34.68:40.80[ AND start_x>=96.60
----------------------------------------
Quality: 0.279859721660614
Subgroup: shot_distance_from_goal>=26.29 AND start_x>=96.60
----------------------------------------
Quality: 0.2769468426704407
Subgroup: num_events<2 AND shot_distance_from_goal<11.26
----------------------------------------
Quality: 0.2766340374946594
Subgroup: num_events<2 AND shot_angle_from_goal>=0.60
--------------------------------