In [1]:
import pandas as pd
import numpy as np
from math import sqrt
import os
import pysubgroup as ps

In [2]:
PATH: str = "../data/spadl_format/"
LEAGUES: list[str] = ["England", "Spain", "France", "Italy", "Germany"]

teams_df = pd.read_json("../data/wyscout/teams/teams.json")
players_df = pd.read_json("../data/wyscout/players/players.json")
ranking_df = pd.read_json('../data/wyscout/playeranks/playeranks.json')

df_dict = {}
if not os.path.exists("../data/processed/"):
    os.mkdir("../data/processed")
for league in LEAGUES:

    df = pd.read_csv(f"{PATH}{league}.csv", index_col=0)

    # remove not used columns
    df.drop(["original_event_id", "result_name","bodypart_id", "type_id"], inplace=True, axis=1)

    df_dict[league] = df
all_df = pd.concat([df for df in df_dict.values()])
all_df

Unnamed: 0,game_id,period_id,time_seconds,team_id,player_id,start_x,start_y,end_x,end_y,result_id,action_id,type_name,bodypart_name,player_name
0,2499719,1,2.758649,1609,25413,51.45,34.68,32.55,14.96,1,0,pass,foot,A. Lacazette
1,2499719,1,4.946850,1609,370224,32.55,14.96,53.55,17.00,1,1,pass,foot,R. Holding
2,2499719,1,6.542188,1609,3319,53.55,17.00,36.75,19.72,1,2,pass,head,M. Özil
3,2499719,1,8.143395,1609,120339,36.75,19.72,43.05,3.40,1,3,pass,head,Mohamed Elneny
4,2499719,1,10.302366,1609,167145,43.05,3.40,75.60,8.16,1,4,pass,foot,Bellerín
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389146,2517044,2,2817.761761,2463,94831,87.15,36.72,87.15,36.72,0,1148,shot,foot_right,Y. Ōsako
389147,2517044,2,2818.280436,2451,14917,17.85,31.96,23.10,27.20,1,1149,interception,foot,R. Knoche
389148,2517044,2,2823.180681,2451,14804,23.10,27.20,0.00,3.40,1,1150,dribble,foot,J. Błaszczykowski
389149,2517044,2,2828.080925,2451,14804,0.00,3.40,5.25,4.76,0,1151,pass,foot,J. Błaszczykowski


In [3]:
print(all_df["type_name"].unique())

['pass' 'interception' 'dribble' 'take_on' 'tackle' 'foul'
 'freekick_short' 'cross' 'shot' 'clearance' 'throw_in' 'goalkick'
 'corner_short' 'corner_crossed' 'keeper_save' 'freekick_crossed'
 'shot_freekick' 'bad_touch' 'shot_penalty']


## Pre-process

Falta decidir se posições serão float ou discretizadas. 

In [4]:
GOAL_CENTER_X: int = 105
GOAL_CENTER_Y: int = 34

UPPER_CROSSBAR_X: int = 105
UPPER_CROSSBAR_Y: int = 38

LOWER_CROSSBAR_X: int = 105
LOWER_CROSSBAR_Y: int = 30


def get_shot_angle(shot_pos_x, shot_pos_y):
    v1 = np.array([UPPER_CROSSBAR_X - shot_pos_x, UPPER_CROSSBAR_Y - shot_pos_y])
    v2 = np.array([LOWER_CROSSBAR_X - shot_pos_x, LOWER_CROSSBAR_Y - shot_pos_y])
    return np.arccos(np.dot(v1 / np.linalg.norm(v1), v2 / np.linalg.norm(v2)))

def calcular_media_global():
    # Filtrar jogadores com dados válidos
    jogadores_validos = ranking_df[ranking_df['playerankScore'].notna()]
    media_global = np.sum(jogadores_validos['playerankScore'] * jogadores_validos['minutesPlayed']) / jogadores_validos['minutesPlayed'].sum()
    return media_global

media_rank_global = calcular_media_global()

def calcular_ranking_medio(player_id):
    jogador_rankings = ranking_df[ranking_df['playerId'] == player_id]
    if jogador_rankings.empty:
        return media_rank_global
    ranking_ponderado = np.sum(jogador_rankings['playerankScore'] * jogador_rankings['minutesPlayed']) / jogador_rankings['minutesPlayed'].sum()
    return ranking_ponderado

def calculate_distance(x1, y1, x2, y2):
    return sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

def discretize_start_x(x):
    if x < 26.25:
        return 'defensive'
    elif x < 52.5:
        return 'pre-defensive'
    elif x < 78.75:
        return 'pre-attacking'
    else:
        return 'attacking'

def discretize_start_y(y):
    if y < 22.67:
        return 'left'
    elif y < 45.33:
        return 'center'
    else:
        return 'right'

def generate_shots_with_counts_events(df):
    shot_data = []
    result_ids = []
    grouped = df.groupby(['game_id', 'period_id'])
    
    for (game_id, period_id), group in grouped:
        group = group.sort_values(by='time_seconds').reset_index(drop=True)
        start_index = 0

        while start_index < len(group):
            shot_index = group[start_index:].index[group['type_name'][start_index:] == 'shot']
            if len(shot_index) == 0:
                break
            shot_index = shot_index[0]

            shot_row = group.loc[shot_index]
            play_events = group.loc[start_index:shot_index]
            
            for idx in play_events.index[::-1]:
                if play_events.loc[idx, 'team_id'] != shot_row['team_id']:
                    start_index = idx + 1
                    break
            else:
                start_index = play_events.index[0]

            play_events = group.loc[start_index:shot_index]

            player_rank = calcular_ranking_medio(shot_row['player_id'])
            
            play_distance = 0
            play_distance_towards_goal = 0
            play_distances_to_goal = []

            for i in range(1, len(play_events)):
                x1, y1 = play_events.iloc[i - 1][['start_x', 'start_y']]
                x2, y2 = play_events.iloc[i][['start_x', 'start_y']]
                dist = calculate_distance(x1, y1, x2, y2)
                play_distance += dist
                play_distance_towards_goal += x2 - x1
                play_distances_to_goal.append(sqrt((x2 - GOAL_CENTER_X) ** 2 + (y2 - GOAL_CENTER_Y) ** 2))

            if len(play_events) > 1:
                play_mean_distance_to_the_goal = np.mean(play_distances_to_goal)
                play_std_distance_to_the_goal = np.std(play_distances_to_goal)
            else:
                play_mean_distance_to_the_goal = 0
                play_std_distance_to_the_goal = 0

            play_duration = play_events['time_seconds'].iloc[-1] - play_events['time_seconds'].iloc[0]
            ratio_distance = play_distance_towards_goal / play_distance if play_distance != 0 else 0
            total_time_per_play = play_duration / len(play_events) if len(play_events) != 0 else 0
            play_speed = play_distance / play_duration if play_duration != 0 else 0
            play_speed_towards_goal = play_distance_towards_goal / play_duration if play_duration != 0 else 0

            shot_data.append({
                'game_id': game_id,
                'period_id': period_id,
                'team_id': shot_row['team_id'],
                'player_id': shot_row['player_id'],
                'time_seconds': shot_row['time_seconds'],
                'start_x': shot_row['start_x'],
                'start_y': shot_row['start_y'],
                # 'disc_start_x': discretize_start_x(shot_row['start_x']),
                # 'disc_start_y': discretize_start_y(shot_row['start_y']),
                'num_events': len(play_events),
                'num_passes': (play_events['type_name'] == 'pass').sum(),
                'num_dribbles': (play_events['type_name'] == 'dribble').sum(),
                'play_duration': play_duration,
                'player_rank': player_rank,
                'bodypart_name': shot_row['bodypart_name'],
                'play_distance': play_distance,
                'play_mean_distance_to_the_goal': play_mean_distance_to_the_goal,
                'play_std_distance_to_the_goal': play_std_distance_to_the_goal,
                'play_distance_towards_goal': play_distance_towards_goal,
                'ratio_distance': ratio_distance,
                'total_time_per_play': total_time_per_play,
                'play_speed': play_speed,
                'play_speed_towards_goal': play_speed_towards_goal,
            })

            result_ids.append(shot_row['result_id'])
            start_index = shot_index + 1

    shots_df = pd.DataFrame(shot_data)
    shots_df["shot_distance_from_goal"] = shots_df.apply(lambda x: sqrt((x["start_x"] - GOAL_CENTER_X)**2 + (x["start_y"] - GOAL_CENTER_Y)**2), axis=1)
    shots_df["shot_angle_from_goal"] = shots_df[["start_x", "start_y"]].apply(lambda pos: get_shot_angle(pos["start_x"], pos["start_y"]), axis=1)
    shots_df["result_id"] = result_ids
    return shots_df

shots_df = generate_shots_with_counts_events(all_df)

In [5]:
shots_df.head()

Unnamed: 0,game_id,period_id,team_id,player_id,time_seconds,start_x,start_y,num_events,num_passes,num_dribbles,...,play_mean_distance_to_the_goal,play_std_distance_to_the_goal,play_distance_towards_goal,ratio_distance,total_time_per_play,play_speed,play_speed_towards_goal,shot_distance_from_goal,shot_angle_from_goal,result_id
0,2499719,1,1609,25413,94.595788,92.4,40.12,7,4,0,...,30.932864,8.413575,24.15,0.15487,2.83695,7.852377,1.216095,14.007655,0.509981,1
1,2499719,1,1631,26150,179.854785,89.25,32.64,2,0,0,...,15.808608,0.0,-9.45,-0.25819,2.273328,8.050087,-2.07845,15.808608,0.494098,0
2,2499719,1,1631,14763,254.745027,100.8,32.64,7,3,0,...,21.08692,12.262251,4.2,0.034728,2.705084,6.386817,0.221805,4.414703,1.46731,1
3,2499719,1,1609,7868,425.824035,85.05,45.56,6,3,1,...,57.591081,22.443334,75.6,0.783021,2.161246,7.44548,5.82997,23.057235,0.300168,0
4,2499719,1,1609,7868,815.462015,78.75,47.6,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,29.563872,0.24003,0


## Binary Goal/Not Goal

In [6]:
shots_df_cp = shots_df.copy()
shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.BinaryTarget('result_id', 1)

search_space = ps.create_selectors(shots_df_cp, ignore=['result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

Quality: 0.031155481651607888
Subgroup: shot_angle_from_goal>=0.60
Size of Subgroup: 8189
----------------------------------------
Quality: 0.030839137893783095
Subgroup: shot_distance_from_goal<11.26
Size of Subgroup: 8045
----------------------------------------
Quality: 0.029720454021914134
Subgroup: shot_angle_from_goal>=0.60 AND shot_distance_from_goal<11.26
Size of Subgroup: 6797
----------------------------------------
Quality: 0.02758813287621146
Subgroup: shot_distance_from_goal<11.26 AND start_x>=96.60
Size of Subgroup: 6127
----------------------------------------
Quality: 0.0264073773750995
Subgroup: shot_angle_from_goal>=0.60 AND start_x>=96.60
Size of Subgroup: 5007
----------------------------------------
Quality: 0.0264073773750995
Subgroup: shot_angle_from_goal>=0.60 AND shot_distance_from_goal<11.26 AND start_x>=96.60
Size of Subgroup: 5007
----------------------------------------
Quality: 0.025837991718490057
Subgroup: start_x>=96.60
Size of Subgroup: 8522
----------

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.031155,shot_angle_from_goal>=0.60,8189,40461,2125,4271,32272,0.202392,0.797608,0.497542,0.502458,0.259494,0.066497,0.105558,2.458301
1,0.030839,shot_distance_from_goal<11.26,8045,40461,2097,4271,32416,0.198833,0.801167,0.490986,0.509014,0.260659,0.067066,0.105558,2.469332
2,0.029720,shot_angle_from_goal>=0.60 AND shot_distance_f...,6797,40461,1920,4271,33664,0.167989,0.832011,0.449543,0.550457,0.282478,0.069837,0.105558,2.676030
3,0.027588,shot_distance_from_goal<11.26 AND start_x>=96.60,6127,40461,1763,4271,34334,0.151430,0.848570,0.412784,0.587216,0.287743,0.073047,0.105558,2.725910
4,0.026407,shot_angle_from_goal>=0.60 AND start_x>=96.60,5007,40461,1597,4271,35454,0.123749,0.876251,0.373917,0.626083,0.318953,0.075422,0.105558,3.021582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.008752,shot_distance_from_goal<11.26 AND total_time_p...,1439,40461,506,4271,39022,0.035565,0.964435,0.118473,0.881527,0.351633,0.096484,0.105558,3.331170
96,0.008741,num_dribbles: [0:1[ AND shot_distance_from_goa...,1424,40461,504,4271,39037,0.035194,0.964806,0.118005,0.881995,0.353933,0.096498,0.105558,3.352954
97,0.008726,play_speed>=9.72 AND shot_angle_from_goal>=0.6...,2576,40461,625,4271,37885,0.063666,0.936334,0.146336,0.853664,0.242624,0.096239,0.105558,2.298482
98,0.008719,play_duration<1.34 AND shot_distance_from_goal...,1404,40461,501,4271,39057,0.034700,0.965300,0.117303,0.882697,0.356838,0.096526,0.105558,3.380474


In [7]:
import pandas as pd

# Supondo que `result_df` é o DataFrame contendo os resultados dos subgrupos
# E que `shots_df_cp` é o dataframe original

def get_covered_indices(subgroup, df):
    """
    Retorna os índices dos registros no dataframe que são cobertos pelo subgrupo.
    """
    condition = subgroup.covers(df)
    return df[condition].index

# Lista para armazenar todos os índices cobertos
covered_indices = set()

# Iterar sobre os subgrupos e adicionar os índices cobertos ao conjunto
for sg_result in result.to_dataframe().itertuples():
    subgroup = sg_result.subgroup  # Ajuste conforme a coluna correta
    indices = get_covered_indices(subgroup, shots_df_cp)
    covered_indices.update(indices)
    print(len(covered_indices) / len(shots_df_cp))

# Cobertura total (proporção de registros cobertos)
total_coverage = len(covered_indices) / len(shots_df_cp)

print(f"Total Coverage: {total_coverage:.4f}")


0.20239242727564816
0.23323694421788882
0.23323694421788882
0.23323694421788882
0.23323694421788882
0.23323694421788882
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.39677714

## XG

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from xgboost import XGBClassifier

In [9]:
# constans
RANDOM_STATE: int = 123
TEST_SIZE: float  = 0.3

shots_df_cp = shots_df.copy()


# Random Forest Classifier
rfc = RandomForestClassifier(random_state=RANDOM_STATE)
X = shots_df_cp[["bodypart_name", "shot_distance_from_goal", "shot_angle_from_goal"]]
X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)
y = shots_df_cp["result_id"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
rfc.fit(X=X_train, y=y_train)
y_pred = rfc.predict(X=X_test)
classification_report(y_test, y_pred, output_dict=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)


{'0': {'precision': 0.9070434415858287,
  'recall': 0.9876010286554004,
  'f1-score': 0.9456096381304138,
  'support': 10888.0},
 '1': {'precision': 0.5246478873239436,
  'recall': 0.11910471622701839,
  'f1-score': 0.19413680781758957,
  'support': 1251.0},
 'accuracy': 0.8980970425899992,
 'macro avg': {'precision': 0.7158456644548862,
  'recall': 0.5533528724412095,
  'f1-score': 0.5698732229740017,
  'support': 12139.0},
 'weighted avg': {'precision': 0.8676351840372977,
  'recall': 0.8980970425899992,
  'f1-score': 0.8681656550410867,
  'support': 12139.0}}

In [10]:
shots_df_cp["xg"] = rfc.predict(X=X)


In [11]:
print("Comparação 0/1: ", shots_df_cp[shots_df_cp["xg"]==0].shape[0], " VS " , shots_df_cp[shots_df_cp["xg"]==1].shape[0])


Comparação 0/1:  39526  VS  935


In [12]:
shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.BinaryTarget('xg', 1)

search_space = ps.create_selectors(shots_df_cp, ignore=['xg'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    # print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

Quality: 0.018716662379767844
Subgroup: shot_angle_from_goal>=0.60 AND start_x>=96.60
----------------------------------------
Quality: 0.018716662379767844
Subgroup: shot_angle_from_goal>=0.60 AND shot_distance_from_goal<11.26 AND start_x>=96.60
----------------------------------------
Quality: 0.018398288803861684
Subgroup: shot_distance_from_goal<11.26 AND start_x>=96.60
----------------------------------------
Quality: 0.018238065116663007
Subgroup: shot_angle_from_goal>=0.60 AND shot_distance_from_goal<11.26
----------------------------------------
Quality: 0.017846586326198236
Subgroup: shot_distance_from_goal<11.26
----------------------------------------
Quality: 0.017443045908338128
Subgroup: shot_angle_from_goal>=0.60
----------------------------------------
Quality: 0.017079852018944016
Subgroup: start_x>=96.60
----------------------------------------
Quality: 0.01321760482222617
Subgroup: start_x>=96.60 AND start_y: [31.96:37.40[
----------------------------------------
Qua

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.018717,shot_angle_from_goal>=0.60 AND start_x>=96.60,5007,40461,873,935,35454,0.123749,0.876251,0.933690,0.066310,0.174356,0.001749,0.023109,7.545042
1,0.018717,shot_angle_from_goal>=0.60 AND shot_distance_f...,5007,40461,873,935,35454,0.123749,0.876251,0.933690,0.066310,0.174356,0.001749,0.023109,7.545042
2,0.018398,shot_distance_from_goal<11.26 AND start_x>=96.60,6127,40461,886,935,34334,0.151430,0.848570,0.947594,0.052406,0.144606,0.001427,0.023109,6.257644
3,0.018238,shot_angle_from_goal>=0.60 AND shot_distance_f...,6797,40461,895,935,33664,0.167989,0.832011,0.957219,0.042781,0.131676,0.001188,0.023109,5.698109
4,0.017847,shot_distance_from_goal<11.26,8045,40461,908,935,32416,0.198833,0.801167,0.971123,0.028877,0.112865,0.000833,0.023109,4.884103
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.006342,play_distance_towards_goal: [0.0:7.35[ AND sta...,1662,40461,295,935,38799,0.041077,0.958923,0.315508,0.684492,0.177497,0.016495,0.023109,7.680969
96,0.006332,shot_distance_from_goal<11.26 AND start_x>=96....,1160,40461,283,935,39301,0.028670,0.971330,0.302674,0.697326,0.243966,0.016590,0.023109,10.557314
97,0.006317,ratio_distance: [0.0:0.12[ AND shot_angle_from...,1533,40461,291,935,38928,0.037888,0.962112,0.311230,0.688770,0.189824,0.016543,0.023109,8.214400
98,0.006316,play_duration<1.34 AND shot_distance_from_goal...,1187,40461,283,935,39274,0.029337,0.970663,0.302674,0.697326,0.238416,0.016601,0.023109,10.317173


In [13]:
def get_covered_indices(subgroup, df):
    """
    Retorna os índices dos registros no dataframe que são cobertos pelo subgrupo.
    """
    condition = subgroup.covers(df)
    return df[condition].index

# Lista para armazenar todos os índices cobertos
covered_indices = set()

# Iterar sobre os subgrupos e adicionar os índices cobertos ao conjunto
for sg_result in result.to_dataframe().itertuples():
    subgroup = sg_result.subgroup  # Ajuste conforme a coluna correta
    indices = get_covered_indices(subgroup, shots_df_cp)
    covered_indices.update(indices)
    print(len(covered_indices) / len(shots_df_cp))

# Cobertura total (proporção de registros cobertos)
total_coverage = len(covered_indices) / len(shots_df_cp)

print(f"Total Coverage: {total_coverage:.4f}")


0.12374879513605694
0.12374879513605694
0.15142977187909346
0.1956699043523393
0.19883344455154345
0.23323694421788882
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.29242974716393566
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.3967771434220608
0.39677714342

## VAEP

In [14]:
from tqdm import tqdm
import socceraction.spadl as spd
from socceraction.vaep import features as ft
import socceraction.vaep.labels as lab
import socceraction.vaep.formula as fm
import xgboost as xgb
import sklearn.metrics as mt

In [15]:
def features_transform(spadl):
    spadl.loc[spadl.result_id.isin([2, 3]), ["result_id"]] = 0
    spadl.loc[spadl.result_name.isin(["offside", "owngoal"]), ["result_name"]] = "fail"

    xfns = [
        ft.actiontype_onehot,
        ft.bodypart_onehot,
        ft.result_onehot,
        ft.goalscore,
        ft.startlocation,
        ft.endlocation,
        ft.team,
        ft.time,
        ft.time_delta
    ]

    features = []
    for game in tqdm(np.unique(spadl.game_id).tolist()):
        match_actions = spadl.loc[spadl.game_id == game].reset_index(drop=True)
        match_states = ft.gamestates(actions=match_actions)
        match_feats = pd.concat([fn(match_states) for fn in xfns], axis=1)
        features.append(match_feats)
    features = pd.concat(features).reset_index(drop=True)

    return features

def labels_transform(spadl):
    yfns = [lab.scores, lab.concedes]

    labels = []
    for game in tqdm(np.unique(spadl.game_id).tolist()):
        match_actions = spadl.loc[spadl.game_id == game].reset_index(drop=True)
        labels.append(pd.concat([fn(actions=match_actions) for fn in yfns], axis=1))

    labels = pd.concat(labels).reset_index(drop=True)

    return labels

def train_vaep(X_train, y_train, X_test, y_test):
    models = {}
    for m in ["scores", "concedes"]:
        models[m] = xgb.XGBClassifier(random_state=0, n_estimators=50, max_depth=3)

        print("training " + m + " model")
        models[m].fit(X_train, y_train[m])

        p = sum(y_train[m]) / len(y_train[m])
        base = [p] * len(y_train[m])
        y_train_pred = models[m].predict_proba(X_train)[:, 1]
        train_brier = mt.brier_score_loss(y_train[m], y_train_pred) / mt.brier_score_loss(y_train[m], base)
        print(m + " Train NBS: " + str(train_brier))
        print()

        p = sum(y_test[m]) / len(y_test[m])
        base = [p] * len(y_test[m])
        y_test_pred = models[m].predict_proba(X_test)[:, 1]
        test_brier = mt.brier_score_loss(y_test[m], y_test_pred) / mt.brier_score_loss(y_test[m], base)
        print(m + " Test NBS: " + str(test_brier))
        print()

        print("----------------------------------------")

    return models

def generate_predictions(features, models):
    preds = {}
    for m in ["scores", "concedes"]:
        preds[m] = models[m].predict_proba(features)[:, 1]
    preds = pd.DataFrame(preds)

    return preds

def calculate_action_values(spadl, predictions):
    action_values = fm.value(actions=spadl, Pscores=predictions["scores"], Pconcedes=predictions["concedes"])
    action_values = pd.concat([
        spadl[["original_event_id", "player_id", "action_id", "game_id", "start_x", "start_y", "end_x", "end_y", "type_name", "result_name"]],
        predictions.rename(columns={"scores": "Pscores", "concedes": "Pconcedes"}),
        action_values
    ], axis=1)

    return action_values


In [16]:
spadl = {}
for league in LEAGUES:
    spadl[league] = pd.read_csv(f"../data/spadl_format/{league}.csv")

features = {}
for league in LEAGUES:
    features[league] = features_transform(spadl[league])

labels = {}
for league in LEAGUES:
    labels[league] = labels_transform(spadl[league])

models = train_vaep(X_train=features["England"], y_train=labels["England"], X_test=features["Spain"], y_test=labels["Spain"])


100%|██████████| 380/380 [00:06<00:00, 62.27it/s]
100%|██████████| 380/380 [00:06<00:00, 62.92it/s]
100%|██████████| 380/380 [00:06<00:00, 62.88it/s]
100%|██████████| 380/380 [00:06<00:00, 62.03it/s]
100%|██████████| 306/306 [00:04<00:00, 62.03it/s]
100%|██████████| 380/380 [00:07<00:00, 53.84it/s]
100%|██████████| 380/380 [00:07<00:00, 54.03it/s]
100%|██████████| 380/380 [00:07<00:00, 54.10it/s]
100%|██████████| 380/380 [00:07<00:00, 53.77it/s]
100%|██████████| 306/306 [00:05<00:00, 54.47it/s]


training scores model
scores Train NBS: 0.8452471194228581

scores Test NBS: 0.8503677630926355

----------------------------------------
training concedes model
concedes Train NBS: 0.9660641623881886

concedes Test NBS: 0.9766251611701147

----------------------------------------


In [17]:
preds = {}
action_values = {}

for league in LEAGUES:
    preds[league] = generate_predictions(features=features[league], models=models)
    action_values[league] = calculate_action_values(spadl=spadl[league], predictions=preds[league])

all_action_values = pd.concat([df for df in action_values.values()])

In [18]:
shots_df

Unnamed: 0,game_id,period_id,team_id,player_id,time_seconds,start_x,start_y,num_events,num_passes,num_dribbles,...,play_mean_distance_to_the_goal,play_std_distance_to_the_goal,play_distance_towards_goal,ratio_distance,total_time_per_play,play_speed,play_speed_towards_goal,shot_distance_from_goal,shot_angle_from_goal,result_id
0,2499719,1,1609,25413,94.595788,92.40,40.12,7,4,0,...,30.932864,8.413575,24.15,0.154870,2.836950,7.852377,1.216095,14.007655,0.509981,1
1,2499719,1,1631,26150,179.854785,89.25,32.64,2,0,0,...,15.808608,0.000000,-9.45,-0.258190,2.273328,8.050087,-2.078450,15.808608,0.494098,0
2,2499719,1,1631,14763,254.745027,100.80,32.64,7,3,0,...,21.086920,12.262251,4.20,0.034728,2.705084,6.386817,0.221805,4.414703,1.467310,1
3,2499719,1,1609,7868,425.824035,85.05,45.56,6,3,1,...,57.591081,22.443334,75.60,0.783021,2.161246,7.445480,5.829970,23.057235,0.300168,0
4,2499719,1,1609,7868,815.462015,78.75,47.60,1,0,0,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,29.563872,0.240030,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40456,2576338,2,3193,116269,1152.032980,99.75,37.40,9,5,2,...,25.291699,10.312188,28.35,0.174816,2.465061,7.309718,1.277859,6.254798,1.067542,0
40457,2576338,2,3193,3548,1251.730517,97.65,42.16,20,16,2,...,52.186433,19.477819,75.60,0.262006,2.510138,5.747561,1.505893,10.982172,0.512084,0
40458,2576338,2,3193,21177,2065.034482,94.50,36.72,1,0,0,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,10.846585,0.690619,1
40459,2576338,2,3193,349102,2367.252041,82.95,46.24,3,2,0,...,27.824216,2.604777,6.30,0.133757,2.367156,6.632500,0.887140,25.219439,0.277183,0


In [19]:
all_action_values

Unnamed: 0,original_event_id,player_id,action_id,game_id,start_x,start_y,end_x,end_y,type_name,result_name,Pscores,Pconcedes,offensive_value,defensive_value,vaep_value
0,177959171.0,25413,0,2499719,51.45,34.68,32.55,14.96,pass,success,0.003555,0.000560,0.000000,-0.000000,0.000000
1,177959172.0,370224,1,2499719,32.55,14.96,53.55,17.00,pass,success,0.004460,0.000536,0.000905,0.000024,0.000928
2,177959173.0,3319,2,2499719,53.55,17.00,36.75,19.72,pass,success,0.005223,0.000446,0.000764,0.000090,0.000854
3,177959174.0,120339,3,2499719,36.75,19.72,43.05,3.40,pass,success,0.002345,0.000363,-0.002879,0.000083,-0.002795
4,177959175.0,167145,4,2499719,43.05,3.40,75.60,8.16,pass,success,0.005549,0.000346,0.003204,0.000017,0.003221
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389146,251206865.0,94831,1148,2517044,87.15,36.72,87.15,36.72,shot,fail,0.014503,0.009633,0.007984,-0.002125,0.005859
389147,251206783.0,14917,1149,2517044,17.85,31.96,23.10,27.20,interception,success,0.005530,0.017490,-0.004103,-0.002987,-0.007090
389148,,14804,1150,2517044,23.10,27.20,0.00,3.40,dribble,success,0.004298,0.008181,-0.001233,0.009310,0.008077
389149,251206790.0,14804,1151,2517044,0.00,3.40,5.25,4.76,pass,fail,0.002736,0.014004,-0.001561,-0.005823,-0.007385


In [20]:
shots_df_cp = shots_df.copy()
all_action_values_cp = all_action_values.copy()
all_action_values_cp.drop(["original_event_id", "result_name", "action_id", "type_name"], inplace=True, axis=1)

shots_df_cp = shots_df_cp.merge(all_action_values_cp, on=['game_id', 'player_id', 'start_x', 'start_y'], how='left')

shots_df_cp

Unnamed: 0,game_id,period_id,team_id,player_id,time_seconds,start_x,start_y,num_events,num_passes,num_dribbles,...,shot_distance_from_goal,shot_angle_from_goal,result_id,end_x,end_y,Pscores,Pconcedes,offensive_value,defensive_value,vaep_value
0,2499719,1,1609,25413,94.595788,92.40,40.12,7,4,0,...,14.007655,0.509981,1,105.0,37.4000,0.978135,0.002137,0.902766,-0.000387,0.902379
1,2499719,1,1631,26150,179.854785,89.25,32.64,2,0,0,...,15.808608,0.494098,0,105.0,40.8000,0.018184,0.007322,-0.020396,-0.003519,-0.023916
2,2499719,1,1631,14763,254.745027,100.80,32.64,7,3,0,...,4.414703,1.467310,1,105.0,34.0000,0.977107,0.002290,0.885530,0.000666,0.886196
3,2499719,1,1609,7868,425.824035,85.05,45.56,6,3,1,...,23.057235,0.300168,0,105.0,40.8000,0.021434,0.002819,-0.004685,-0.001744,-0.006429
4,2499719,1,1609,7868,815.462015,78.75,47.60,1,0,0,...,29.563872,0.240030,0,105.0,37.4000,0.017245,0.005117,-0.019283,-0.002159,-0.021442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40778,2576338,2,3193,116269,1152.032980,99.75,37.40,9,5,2,...,6.254798,1.067542,0,105.0,40.8000,0.034776,0.009745,-0.081321,-0.004355,-0.085676
40779,2576338,2,3193,3548,1251.730517,97.65,42.16,20,16,2,...,10.982172,0.512084,0,105.0,40.8000,0.029877,0.010725,-0.052057,-0.005569,-0.057625
40780,2576338,2,3193,21177,2065.034482,94.50,36.72,1,0,0,...,10.846585,0.690619,1,105.0,34.0000,0.977372,0.007182,0.950002,-0.002297,0.947704
40781,2576338,2,3193,349102,2367.252041,82.95,46.24,3,2,0,...,25.219439,0.277183,0,105.0,40.8000,0.015293,0.006782,-0.020930,-0.003574,-0.024504


In [21]:
print(min(np.unique(shots_df_cp['Pscores'].tolist())), max(np.unique(shots_df_cp['Pscores'].tolist())))

0.0024156407453119755 0.9968808889389038


In [22]:
shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id", "end_x", "end_y"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.NumericTarget('Pscores')

search_space = ps.create_selectors(shots_df_cp, ignore=["Pscores", "Pconcedes", "offensive_value", "defensive_value", "vaep_value"])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.StandardQFNumeric(1.0))
# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)

result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    # print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

print(result.to_dataframe().shape)

Quality: 1259.5690176784992
Subgroup: shot_angle_from_goal>=0.60
----------------------------------------
Quality: 1246.4595663398504
Subgroup: shot_distance_from_goal<11.26
----------------------------------------
Quality: 1193.3499402701855
Subgroup: shot_angle_from_goal>=0.60 AND shot_distance_from_goal<11.26
----------------------------------------
Quality: 1106.3482034951448
Subgroup: shot_distance_from_goal<11.26 AND start_x>=96.60
----------------------------------------
Quality: 1053.5573259294033
Subgroup: start_x>=96.60
----------------------------------------
Quality: 1052.0391435921192
Subgroup: shot_angle_from_goal>=0.60 AND start_x>=96.60
----------------------------------------
Quality: 1052.0391435921192
Subgroup: shot_angle_from_goal>=0.60 AND shot_distance_from_goal<11.26 AND start_x>=96.60
----------------------------------------
Quality: 828.5282593220472
Subgroup: num_dribbles: [0:1[ AND shot_distance_from_goal<11.26
----------------------------------------
Quality

In [23]:
result.to_dataframe()

Unnamed: 0,quality,subgroup,size_sg,size_dataset,mean_sg,mean_dataset,std_sg,std_dataset,median_sg,median_dataset,max_sg,max_dataset,min_sg,min_dataset,mean_lift,median_lift
0,1259.569018,shot_angle_from_goal>=0.60,8242,40783,0.278054,0.125231,0.414101,0.292654,0.036029,0.026238,0.995717,0.996881,0.009808,0.002416,2.220332,1.373161
1,1246.459566,shot_distance_from_goal<11.26,8083,40783,0.279438,0.125231,0.414796,0.292654,0.036144,0.026238,0.995717,0.996881,0.012183,0.002416,2.231386,1.377538
2,1193.349940,shot_angle_from_goal>=0.60 AND shot_distance_f...,6834,40783,0.299850,0.125231,0.425310,0.292654,0.036719,0.026238,0.995717,0.996881,0.012183,0.002416,2.394381,1.399457
3,1106.348203,shot_distance_from_goal<11.26 AND start_x>=96.60,6147,40783,0.305213,0.125231,0.427922,0.292654,0.036634,0.026238,0.995717,0.996881,0.013894,0.002416,2.437200,1.396204
4,1053.557326,start_x>=96.60,8562,40783,0.248281,0.125231,0.396757,0.292654,0.034972,0.026238,0.995717,0.996881,0.007717,0.002416,1.982588,1.332886
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,350.894941,play_speed_towards_goal: [0.0:0.75[ AND ratio_...,1429,40783,0.370784,0.125231,0.452545,0.292654,0.040042,0.026238,0.990374,0.996881,0.016980,0.002416,2.960801,1.526107
96,350.812498,play_speed>=9.71 AND shot_angle_from_goal>=0.60,3033,40783,0.240896,0.125231,0.391269,0.292654,0.035133,0.026238,0.993340,0.996881,0.009808,0.002416,1.923616,1.338999
97,349.976713,play_duration<1.34 AND shot_distance_from_goal...,1418,40783,0.372041,0.125231,0.452245,0.292654,0.040868,0.026238,0.990374,0.996881,0.015709,0.002416,2.970841,1.557601
98,349.324554,play_duration<1.34 AND play_std_distance_to_th...,1445,40783,0.366978,0.125231,0.450838,0.292654,0.040675,0.026238,0.990374,0.996881,0.016980,0.002416,2.930412,1.550230


In [24]:
def get_covered_indices(subgroup, df):
    # Verifica se a condição cobre algum registro
    condition = subgroup.covers(df)
    return df[condition].index

# Lista para armazenar todos os índices cobertos
covered_indices = set()

# Iterar sobre os subgrupos e adicionar os índices cobertos ao conjunto
for sg_result in result.to_dataframe().itertuples():
    subgroup = sg_result.subgroup
    indices = get_covered_indices(subgroup, shots_df_cp)
    covered_indices.update(indices)
    print(len(covered_indices) / len(shots_df_cp))

# Cobertura total (proporção de registros cobertos)
total_coverage = len(covered_indices) / len(shots_df_cp)

print(f"Total Coverage: {total_coverage:.4f}")

0.20209400975896818
0.232719515484393
0.232719515484393
0.232719515484393
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.2919353652257068
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.39616997278277716
0.396

In [25]:
result.to_dataframe().head()

Unnamed: 0,quality,subgroup,size_sg,size_dataset,mean_sg,mean_dataset,std_sg,std_dataset,median_sg,median_dataset,max_sg,max_dataset,min_sg,min_dataset,mean_lift,median_lift
0,1259.569018,shot_angle_from_goal>=0.60,8242,40783,0.278054,0.125231,0.414101,0.292654,0.036029,0.026238,0.995717,0.996881,0.009808,0.002416,2.220332,1.373161
1,1246.459566,shot_distance_from_goal<11.26,8083,40783,0.279438,0.125231,0.414796,0.292654,0.036144,0.026238,0.995717,0.996881,0.012183,0.002416,2.231386,1.377538
2,1193.34994,shot_angle_from_goal>=0.60 AND shot_distance_f...,6834,40783,0.29985,0.125231,0.42531,0.292654,0.036719,0.026238,0.995717,0.996881,0.012183,0.002416,2.394381,1.399457
3,1106.348203,shot_distance_from_goal<11.26 AND start_x>=96.60,6147,40783,0.305213,0.125231,0.427922,0.292654,0.036634,0.026238,0.995717,0.996881,0.013894,0.002416,2.4372,1.396204
4,1053.557326,start_x>=96.60,8562,40783,0.248281,0.125231,0.396757,0.292654,0.034972,0.026238,0.995717,0.996881,0.007717,0.002416,1.982588,1.332886


## Análise Uma liga VS Outra Liga

#### Pre-process

In [26]:
England_df = pd.read_csv(f"{PATH}England.csv", index_col=0)
England_df.drop(["original_event_id", "result_name","bodypart_id", "type_id"], inplace=True, axis=1)
England_shots_df = generate_shots_with_counts_events(England_df)


Spain_df = pd.read_csv(f"{PATH}Spain.csv", index_col=0)
Spain_df.drop(["original_event_id", "result_name","bodypart_id", "type_id"], inplace=True, axis=1)
Spain_shots_df = generate_shots_with_counts_events(Spain_df)

### Binário

In [27]:
England_shots_df_cp = England_shots_df.copy()
England_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.BinaryTarget('result_id', 1)

search_space = ps.create_selectors(England_shots_df_cp, ignore=['result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(England_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# # Exibir os resultados
for sg_result in result.to_dataframe().head(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    print(f"Size of Subgroup: {sg_result.size_sg}")
#     print(f"Size of Dataset: {sg_result.size_dataset}")
#     print(f"Positives in Subgroup: {sg_result.positives_sg}")
#     print(f"Positives in Dataset: {sg_result.positives_dataset}")
#     print(f"Size of Complement: {sg_result.size_complement}")
#     print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
#     print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
#     print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
#     print(f"Coverage of Complement: {sg_result.coverage_complement}")
#     print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
#     print(f"Target Share in Complement: {sg_result.target_share_complement}")
#     print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
#     print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

Quality: 0.03192391658395455
Subgroup: shot_angle_from_goal>=0.61
Size of Subgroup: 1694
----------------------------------------
Quality: 0.03192076617948671
Subgroup: shot_distance_from_goal<11.26
Size of Subgroup: 1685
----------------------------------------
Quality: 0.030326171455848525
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.26
Size of Subgroup: 1449
----------------------------------------
Quality: 0.02862860751240409
Subgroup: shot_distance_from_goal<11.26 AND start_x>=96.60
Size of Subgroup: 1332
----------------------------------------
Quality: 0.027164565549912693
Subgroup: start_x>=96.60
Size of Subgroup: 1807
----------------------------------------
Quality: 0.0270340127887659
Subgroup: shot_angle_from_goal>=0.61 AND start_x>=96.60
Size of Subgroup: 1096
----------------------------------------
Quality: 0.0270340127887659
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.26 AND start_x>=96.60
Size of Subgroup: 1096
------------

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.031924,shot_angle_from_goal>=0.61,1694,8451,453,914,6757,0.200450,0.799550,0.495624,0.504376,0.267414,0.068226,0.108153,2.472559
1,0.031921,shot_distance_from_goal<11.26,1685,8451,452,914,6766,0.199385,0.800615,0.494530,0.505470,0.268249,0.068283,0.108153,2.480278
2,0.030326,shot_angle_from_goal>=0.61 AND shot_distance_f...,1449,8451,413,914,7002,0.171459,0.828541,0.451860,0.548140,0.285024,0.071551,0.108153,2.635382
3,0.028629,shot_distance_from_goal<11.26 AND start_x>=96.60,1332,8451,386,914,7119,0.157614,0.842386,0.422319,0.577681,0.289790,0.074168,0.108153,2.679446
4,0.027165,start_x>=96.60,1807,8451,425,914,6644,0.213821,0.786179,0.464989,0.535011,0.235196,0.073600,0.108153,2.174667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.009279,num_dribbles: [0:1[ AND shot_distance_from_goa...,292,8451,110,914,8159,0.034552,0.965448,0.120350,0.879650,0.376712,0.098541,0.108153,3.483146
96,0.009270,player_rank>=0.02 AND shot_distance_from_goal<...,302,8451,111,914,8149,0.035735,0.964265,0.121444,0.878556,0.367550,0.098540,0.108153,3.398427
97,0.009235,play_distance_towards_goal: [0.0:6.30[ AND pla...,277,8451,108,914,8174,0.032777,0.967223,0.118162,0.881838,0.389892,0.098605,0.108153,3.605005
98,0.009203,play_speed_towards_goal: [0.0:0.59[ AND ratio_...,298,8451,110,914,8153,0.035262,0.964738,0.120350,0.879650,0.369128,0.098614,0.108153,3.413016


In [28]:
def show_coverage(result, df):# Lista para armazenar todos os índices cobertos
    covered_indices = set()

    # Iterar sobre os subgrupos e adicionar os índices cobertos ao conjunto
    for sg_result in result.to_dataframe().itertuples():
        subgroup = sg_result.subgroup  # Ajuste conforme a coluna correta
        indices = get_covered_indices(subgroup, df)
        covered_indices.update(indices)
        print(len(covered_indices) / len(df))

    # Cobertura total (proporção de registros cobertos)
    total_coverage = len(covered_indices) / len(df)

    print(f"Total Coverage: {total_coverage:.4f}")

show_coverage(result, England_shots_df_cp)

0.20044965092888414
0.22837534019642647
0.22837534019642647
0.22837534019642647
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.284581

In [29]:
Spain_shots_df_cp = Spain_shots_df.copy()
Spain_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.BinaryTarget('result_id', 1)

search_space = ps.create_selectors(Spain_shots_df_cp, ignore=['result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(Spain_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# # Exibir os resultados
for sg_result in result.to_dataframe().head(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    print(f"Size of Subgroup: {sg_result.size_sg}")
#     print(f"Size of Dataset: {sg_result.size_dataset}")
#     print(f"Positives in Subgroup: {sg_result.positives_sg}")
#     print(f"Positives in Dataset: {sg_result.positives_dataset}")
#     print(f"Size of Complement: {sg_result.size_complement}")
#     print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
#     print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
#     print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
#     print(f"Coverage of Complement: {sg_result.coverage_complement}")
#     print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
#     print(f"Target Share in Complement: {sg_result.target_share_complement}")
#     print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
#     print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

Quality: 0.030671627196098374
Subgroup: shot_distance_from_goal<11.04
Size of Subgroup: 1582
----------------------------------------
Quality: 0.030533135443692964
Subgroup: shot_angle_from_goal>=0.61
Size of Subgroup: 1601
----------------------------------------
Quality: 0.029297500625192017
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.04
Size of Subgroup: 1347
----------------------------------------
Quality: 0.02877760286939455
Subgroup: shot_distance_from_goal<11.04 AND start_x>=96.60
Size of Subgroup: 1231
----------------------------------------
Quality: 0.027403476298488192
Subgroup: shot_angle_from_goal>=0.61 AND start_x>=96.60
Size of Subgroup: 996
----------------------------------------
Quality: 0.027403476298488192
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.04 AND start_x>=96.60
Size of Subgroup: 996
----------------------------------------
Quality: 0.026542634686763367
Subgroup: start_x>=96.60
Size of Subgroup: 1753
--------

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.030672,shot_distance_from_goal<11.04,1582,7979,420,884,6397,0.198270,0.801730,0.475113,0.524887,0.265487,0.072534,0.110791,2.396288
1,0.030533,shot_angle_from_goal>=0.61,1601,7979,421,884,6378,0.200652,0.799348,0.476244,0.523756,0.262961,0.072593,0.110791,2.373488
2,0.029298,shot_angle_from_goal>=0.61 AND shot_distance_f...,1347,7979,383,884,6632,0.168818,0.831182,0.433258,0.566742,0.284336,0.075543,0.110791,2.566418
3,0.028778,shot_distance_from_goal<11.04 AND start_x>=96.60,1231,7979,366,884,6748,0.154280,0.845720,0.414027,0.585973,0.297319,0.076763,0.110791,2.683609
4,0.027403,shot_angle_from_goal>=0.61 AND start_x>=96.60,996,7979,329,884,6983,0.124828,0.875172,0.372172,0.627828,0.330321,0.079479,0.110791,2.981486
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.008452,num_events>=8 AND shot_angle_from_goal>=0.61,321,7979,103,884,7658,0.040231,0.959769,0.116516,0.883484,0.320872,0.101985,0.110791,2.896199
96,0.008448,play_duration<1.31 AND shot_distance_from_goal...,231,7979,93,884,7748,0.028951,0.971049,0.105204,0.894796,0.402597,0.102091,0.110791,3.633851
97,0.008422,play_distance_towards_goal: [0.0:7.35[ AND rat...,278,7979,98,884,7701,0.034841,0.965159,0.110860,0.889140,0.352518,0.102065,0.110791,3.181834
98,0.008421,play_speed_towards_goal: [0.0:0.76[ AND shot_a...,260,7979,96,884,7719,0.032586,0.967414,0.108597,0.891403,0.369231,0.102086,0.110791,3.332684


In [30]:
show_coverage(result, Spain_shots_df_cp)

0.19827045995738815
0.2301040230605339
0.2301040230605339
0.2301040230605339
0.2301040230605339
0.2301040230605339
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.5109662865020679
0.5109662865020679
0.5109662865020679
0.5109662865020679
0.5109662865020679
0.5109662865020679
0.5109662865020679
0.510966286

### xG

In [31]:
England_shots_df_cp = England_shots_df.copy()
England_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

X = England_shots_df_cp[["bodypart_name", "shot_distance_from_goal", "shot_angle_from_goal"]]
X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)
England_shots_df_cp["xg"] = rfc.predict(X=X)

target = ps.BinaryTarget('xg', 1)

search_space = ps.create_selectors(England_shots_df_cp, ignore=['xg', 'result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(England_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)


Quality: 0.020278817516265642
Subgroup: shot_angle_from_goal>=0.61 AND start_x>=96.60
Size of Subgroup: 1096
----------------------------------------
Quality: 0.020278817516265642
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.26 AND start_x>=96.60
Size of Subgroup: 1096
----------------------------------------
Quality: 0.019811633536383207
Subgroup: shot_distance_from_goal<11.26 AND start_x>=96.60
Size of Subgroup: 1332
----------------------------------------
Quality: 0.01969935312114981
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.26
Size of Subgroup: 1449
----------------------------------------
Quality: 0.01923216914126737
Subgroup: shot_distance_from_goal<11.26
Size of Subgroup: 1685
----------------------------------------
Quality: 0.018968669311578236
Subgroup: shot_angle_from_goal>=0.61
Size of Subgroup: 1694
----------------------------------------
Quality: 0.018513330852494826
Subgroup: start_x>=96.60
Size of Subgroup: 1807
-------

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.020279,shot_angle_from_goal>=0.61 AND start_x>=96.60,1096,8451,199,213,7355,0.129689,0.870311,0.934272,0.065728,0.181569,0.001903,0.025204,7.203955
1,0.020279,shot_angle_from_goal>=0.61 AND shot_distance_f...,1096,8451,199,213,7355,0.129689,0.870311,0.934272,0.065728,0.181569,0.001903,0.025204,7.203955
2,0.019812,shot_distance_from_goal<11.26 AND start_x>=96.60,1332,8451,201,213,7119,0.157614,0.842386,0.943662,0.056338,0.150901,0.001686,0.025204,5.987153
3,0.019699,shot_angle_from_goal>=0.61 AND shot_distance_f...,1449,8451,203,213,7002,0.171459,0.828541,0.953052,0.046948,0.140097,0.001428,0.025204,5.558481
4,0.019232,shot_distance_from_goal<11.26,1685,8451,205,213,6766,0.199385,0.800615,0.962441,0.037559,0.121662,0.001182,0.025204,4.827057
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.006950,shot_angle_from_goal>=0.61 AND start_x>=96.60 ...,209,8451,64,213,8242,0.024731,0.975269,0.300469,0.699531,0.306220,0.018078,0.025204,12.149606
96,0.006948,bodypart_name=='foot_right' AND shot_distance_...,170,8451,63,213,8281,0.020116,0.979884,0.295775,0.704225,0.370588,0.018114,0.025204,14.703480
97,0.006937,play_mean_distance_to_the_goal<8.40 AND shot_a...,253,8451,65,213,8198,0.029937,0.970063,0.305164,0.694836,0.256917,0.018053,0.025204,10.193453
98,0.006934,shot_angle_from_goal>=0.61 AND shot_distance_f...,254,8451,65,213,8197,0.030056,0.969944,0.305164,0.694836,0.255906,0.018055,0.025204,10.153322


In [32]:
show_coverage(result, England_shots_df_cp)

0.12968879422553545
0.12968879422553545
0.15761448349307774
0.19938468820257957
0.19938468820257957
0.22837534019642647
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.2845817063069459
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807833392497929
0.3807

In [33]:
Spain_shots_df_cp = Spain_shots_df.copy()
Spain_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

X = Spain_shots_df_cp[["bodypart_name", "shot_distance_from_goal", "shot_angle_from_goal"]]
X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)
Spain_shots_df_cp["xg"] = rfc.predict(X=X)

target = ps.BinaryTarget('xg', 1)

search_space = ps.create_selectors(Spain_shots_df_cp, ignore=['xg', 'result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(Spain_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)


Quality: 0.019539824436689862
Subgroup: shot_angle_from_goal>=0.61 AND start_x>=96.60
Size of Subgroup: 996
----------------------------------------
Quality: 0.019539824436689862
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.04 AND start_x>=96.60
Size of Subgroup: 996
----------------------------------------
Quality: 0.019353063981194782
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.04
Size of Subgroup: 1347
----------------------------------------
Quality: 0.019328733287707654
Subgroup: shot_distance_from_goal<11.04 AND start_x>=96.60
Size of Subgroup: 1231
----------------------------------------
Quality: 0.01914197283221257
Subgroup: shot_distance_from_goal<11.04
Size of Subgroup: 1582
----------------------------------------
Quality: 0.01858305800564557
Subgroup: shot_angle_from_goal>=0.61
Size of Subgroup: 1601
----------------------------------------
Quality: 0.017746280062366365
Subgroup: start_x>=96.60
Size of Subgroup: 1753
---------

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.019540,shot_angle_from_goal>=0.61 AND start_x>=96.60,996,7979,180,193,6983,0.124828,0.875172,0.932642,0.067358,0.180723,0.001862,0.024188,7.471440
1,0.019540,shot_angle_from_goal>=0.61 AND shot_distance_f...,996,7979,180,193,6983,0.124828,0.875172,0.932642,0.067358,0.180723,0.001862,0.024188,7.471440
2,0.019353,shot_angle_from_goal>=0.61 AND shot_distance_f...,1347,7979,187,193,6632,0.168818,0.831182,0.968912,0.031088,0.138827,0.000905,0.024188,5.739382
3,0.019329,shot_distance_from_goal<11.04 AND start_x>=96.60,1231,7979,184,193,6748,0.154280,0.845720,0.953368,0.046632,0.149472,0.001334,0.024188,6.179466
4,0.019142,shot_distance_from_goal<11.04,1582,7979,191,193,6397,0.198270,0.801730,0.989637,0.010363,0.120733,0.000313,0.024188,4.991350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.006467,play_distance_towards_goal: [0.0:7.35[ AND sho...,306,7979,59,193,7673,0.038351,0.961649,0.305699,0.694301,0.192810,0.017464,0.024188,7.971164
96,0.006458,play_speed_towards_goal: [0.0:0.76[ AND shot_a...,309,7979,59,193,7670,0.038727,0.961273,0.305699,0.694301,0.190939,0.017471,0.024188,7.893774
97,0.006437,player_rank>=0.02 AND shot_distance_from_goal<...,357,7979,60,193,7622,0.044742,0.955258,0.310881,0.689119,0.168067,0.017449,0.024188,6.948230
98,0.006415,num_dribbles: [0:1[ AND play_distance_towards_...,323,7979,59,193,7656,0.040481,0.959519,0.305699,0.694301,0.182663,0.017503,0.024188,7.551629


In [34]:
show_coverage(result, Spain_shots_df_cp)

0.12482767264068179
0.12482767264068179
0.16881814763754857
0.19827045995738815
0.19827045995738815
0.2301040230605339
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.2955257551071563
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.3982955257551072
0.39829

### VAEP

In [35]:
England_shots_df_cp = England_shots_df.copy()
Spain_shots_df_cp = Spain_shots_df.copy()
all_action_values_cp = all_action_values.copy()
all_action_values_cp.drop(["original_event_id", "result_name", "action_id", "type_name", "end_x", "end_y"], inplace=True, axis=1)

England_shots_df_cp = England_shots_df_cp.merge(all_action_values_cp, on=['game_id', 'player_id', 'start_x', 'start_y'], how='left')
Spain_shots_df_cp = Spain_shots_df_cp.merge(all_action_values_cp, on=['game_id', 'player_id', 'start_x', 'start_y'], how='left')

In [36]:
England_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.NumericTarget('Pscores')

search_space = ps.create_selectors(England_shots_df_cp, ignore=["Pscores", "Pconcedes", "offensive_value", "defensive_value", "vaep_value"])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(England_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.StandardQFNumeric(1.0))
# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)

result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().tail(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    # print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

# Cálculo WRAcc=(size_sg/size_dataset)*(mean_sg−mean_dataset)

Quality: 78.1030622124672
Subgroup: play_duration<1.39 AND shot_angle_from_goal>=0.61
----------------------------------------
Quality: 78.09218579530716
Subgroup: play_speed_towards_goal: [0.0:0.61[ AND ratio_distance: [0.0:0.10[ AND shot_angle_from_goal>=0.61
----------------------------------------
Quality: 77.74706348776817
Subgroup: shot_distance_from_goal<11.26 AND total_time_per_play<0.65
----------------------------------------
Quality: 77.69067320227623
Subgroup: play_duration<1.39 AND shot_distance_from_goal<11.26 AND total_time_per_play<0.65
----------------------------------------
Quality: 77.60794448852539
Subgroup: play_distance_towards_goal: [0.0:6.30[ AND shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.26
----------------------------------------
Quality: 77.53052169084549
Subgroup: play_speed_towards_goal: [0.0:0.61[ AND ratio_distance: [0.0:0.10[ AND shot_distance_from_goal<11.26
----------------------------------------
Quality: 77.31219002604485
Subgroup: pl

Unnamed: 0,quality,subgroup,size_sg,size_dataset,mean_sg,mean_dataset,std_sg,std_dataset,median_sg,median_dataset,max_sg,max_dataset,min_sg,min_dataset,mean_lift,median_lift
0,270.026178,shot_distance_from_goal<11.26,1687,8528,0.287464,0.127401,0.419320,0.295678,0.036383,0.026265,0.995717,0.996881,0.014080,0.002416,2.256369,1.385209
1,268.956481,shot_angle_from_goal>=0.61,1707,8528,0.284962,0.127401,0.418119,0.295678,0.035959,0.026265,0.995717,0.996881,0.014080,0.002416,2.236730,1.369085
2,254.301583,shot_angle_from_goal>=0.61 AND shot_distance_f...,1460,8528,0.301580,0.127401,0.426455,0.295678,0.036595,0.026265,0.995717,0.996881,0.014080,0.002416,2.367170,1.393301
3,240.865721,shot_distance_from_goal<11.26 AND start_x>=96.60,1332,8528,0.308231,0.127401,0.429490,0.295678,0.036623,0.026265,0.995717,0.996881,0.014080,0.002416,2.419375,1.394351
4,232.259416,start_x>=96.60,1813,8528,0.255509,0.127401,0.401293,0.295678,0.035342,0.026265,0.995717,0.996881,0.011900,0.002416,2.005546,1.345591
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,77.530522,play_speed_towards_goal: [0.0:0.61[ AND ratio_...,298,8528,0.387571,0.127401,0.457406,0.295678,0.040574,0.026265,0.990374,0.996881,0.019242,0.002416,3.042127,1.544783
96,77.312190,play_duration<1.39 AND play_std_distance_to_th...,293,8528,0.391265,0.127401,0.457657,0.295678,0.041428,0.026265,0.990374,0.996881,0.021422,0.002416,3.071126,1.577291
97,77.271016,player_rank>=0.02 AND shot_distance_from_goal<...,302,8528,0.383266,0.127401,0.456171,0.295678,0.041924,0.026265,0.994042,0.996881,0.018457,0.002416,3.008334,1.596163
98,77.238108,num_dribbles: [0:1[ AND shot_distance_from_goa...,294,8528,0.390116,0.127401,0.457272,0.295678,0.041245,0.026265,0.990374,0.996881,0.021422,0.002416,3.062104,1.570315


In [37]:
show_coverage(result, England_shots_df_cp)

0.1978189493433396
0.22678236397748594
0.22678236397748594
0.22678236397748594
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0.28318480300187615
0

In [38]:
Spain_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.NumericTarget('Pscores')

search_space = ps.create_selectors(Spain_shots_df_cp, ignore=["Pscores", "Pconcedes", "offensive_value", "defensive_value", "vaep_value"])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(Spain_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.StandardQFNumeric(1.0))
# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)

result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().tail(10).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    # print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

Quality: 67.78081412613392
Subgroup: play_distance_towards_goal>=53.55 AND shot_distance_from_goal<11.04
----------------------------------------
Quality: 67.75137293338776
Subgroup: bodypart_name=='foot_left' AND shot_angle_from_goal>=0.61 AND start_x>=96.60
----------------------------------------
Quality: 66.89733941853046
Subgroup: play_speed_towards_goal: [0.0:0.76[ AND shot_angle_from_goal>=0.61
----------------------------------------
Quality: 66.89585070312023
Subgroup: shot_distance_from_goal<11.04 AND total_time_per_play<0.63
----------------------------------------
Quality: 66.89585070312023
Subgroup: num_dribbles: [0:1[ AND shot_distance_from_goal<11.04 AND total_time_per_play<0.63
----------------------------------------
Quality: 66.35547921061516
Subgroup: ratio_distance: [0.0:0.12[ AND shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.04
----------------------------------------
Quality: 66.21693554520607
Subgroup: play_distance_towards_goal: [0.0:7.35[ AND shot_a

Unnamed: 0,quality,subgroup,size_sg,size_dataset,mean_sg,mean_dataset,std_sg,std_dataset,median_sg,median_dataset,max_sg,max_dataset,min_sg,min_dataset,mean_lift,median_lift
0,243.790569,shot_distance_from_goal<11.04,1588,8043,0.284270,0.13075,0.417027,0.298985,0.036685,0.026685,0.995627,0.995627,0.015709,0.003558,2.174153,1.374755
1,243.062725,shot_angle_from_goal>=0.61,1611,8043,0.281627,0.13075,0.415756,0.298985,0.036599,0.026685,0.995627,0.995627,0.009808,0.003558,2.153935,1.371526
2,231.454275,shot_angle_from_goal>=0.61 AND shot_distance_f...,1353,8043,0.301817,0.13075,0.425935,0.298985,0.037228,0.026685,0.995627,0.995627,0.016668,0.003558,2.308356,1.395114
3,226.882050,shot_distance_from_goal<11.04 AND start_x>=96.60,1234,8043,0.314609,0.13075,0.431803,0.298985,0.037358,0.026685,0.995627,0.995627,0.015709,0.003558,2.406188,1.399988
4,214.545707,shot_angle_from_goal>=0.61 AND start_x>=96.60,999,8043,0.345510,0.13075,0.444272,0.298985,0.038502,0.026685,0.995627,0.995627,0.016668,0.003558,2.642528,1.442854
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,66.355479,ratio_distance: [0.0:0.12[ AND shot_angle_from...,266,8043,0.380207,0.13075,0.454908,0.298985,0.040802,0.026685,0.989558,0.995627,0.019290,0.003558,2.907891,1.529055
96,66.216936,play_distance_towards_goal: [0.0:7.35[ AND sho...,306,8043,0.347145,0.13075,0.444545,0.298985,0.039875,0.026685,0.989558,0.995627,0.017399,0.003558,2.655031,1.494289
97,66.211374,play_duration<1.31 AND shot_distance_from_goal...,280,8043,0.367219,0.13075,0.450280,0.298985,0.041740,0.026685,0.990125,0.995627,0.015709,0.003558,2.808560,1.564181
98,66.211374,num_dribbles: [0:1[ AND play_duration<1.31 AND...,280,8043,0.367219,0.13075,0.450280,0.298985,0.041740,0.026685,0.990125,0.995627,0.015709,0.003558,2.808560,1.564181


In [39]:
show_coverage(result, Spain_shots_df_cp)

0.19743876662936716
0.22951634962078826
0.22951634962078826
0.22951634962078826
0.22951634962078826
0.22951634962078826
0.2950391644908616
0.2950391644908616
0.2950391644908616
0.2950391644908616
0.2950391644908616
0.2950391644908616
0.2950391644908616
0.2950391644908616
0.2950391644908616
0.2950391644908616
0.2950391644908616
0.2950391644908616
0.2950391644908616
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.3982344896183016
0.5119980106925277
0.5119980106925277
0.5119980106925277
0.5119980106925277
0.5119980106925277
0.5119980106925277
0.5119980106925277
0.5119

## Avaliação Times (Topo VS Meio VS Baixo da tabela)

### Pre-process

In [40]:
# 1º: Manchester City - 1625
# 10º: Newcastle - 1613
# 20º: West Bromwich - 1627

England_df = pd.read_csv(f"{PATH}England.csv", index_col=0)
England_df.drop(["original_event_id", "result_name","bodypart_id", "type_id"], inplace=True, axis=1)

MC_df = England_df[England_df["team_id"] == 1625]
MC_shots_df = generate_shots_with_counts_events(MC_df)

NC_df = England_df[England_df["team_id"] == 1613]
NC_shots_df = generate_shots_with_counts_events(NC_df)

WB_df = England_df[England_df["team_id"] == 1627]
WB_shots_df = generate_shots_with_counts_events(WB_df)

In [41]:
MC_shots_df

Unnamed: 0,game_id,period_id,team_id,player_id,time_seconds,start_x,start_y,num_events,num_passes,num_dribbles,...,play_mean_distance_to_the_goal,play_std_distance_to_the_goal,play_distance_towards_goal,ratio_distance,total_time_per_play,play_speed,play_speed_towards_goal,shot_distance_from_goal,shot_angle_from_goal,result_id
0,2499720,1,1625,340386,189.560864,93.45,42.84,47,39,3,...,53.868916,19.735366,42.00,0.044038,3.961749,5.122023,0.225561,14.544693,0.441421,0
1,2499720,1,1625,70083,534.526010,84.00,48.28,65,51,4,...,45.904966,13.137510,21.00,0.017086,5.201843,3.635010,0.062108,25.395244,0.261031,0
2,2499720,1,1625,105339,794.071176,93.45,27.88,53,46,2,...,51.809342,13.198073,55.65,0.057894,4.195384,4.322957,0.250275,13.071224,0.537975,0
3,2499720,1,1625,340386,1995.689167,96.60,29.92,145,119,3,...,50.788555,17.775042,52.50,0.017514,8.032031,2.573904,0.045078,9.338437,0.756460,0
4,2499720,1,1625,340386,1998.335772,97.65,39.44,1,0,0,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,9.144184,0.715772,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
598,2500095,2,1625,245364,896.992909,82.95,46.92,34,24,1,...,61.007314,24.876650,51.45,0.065115,7.045725,3.298353,0.214774,25.556387,0.270101,0
599,2500095,2,1625,11066,902.415248,92.40,27.88,2,0,0,...,14.007655,0.000000,7.35,0.803790,1.248029,3.663450,2.944643,14.007655,0.509981,0
600,2500095,2,1625,9380,926.967286,92.40,38.08,2,0,0,...,13.244108,0.000000,-12.60,-0.314133,1.021896,19.625494,-6.165011,13.244108,0.563864,0
601,2500095,2,1625,38021,978.686885,84.00,26.52,7,3,0,...,28.525122,6.073473,-21.00,-0.198078,3.090426,4.900801,-0.970740,22.292384,0.336059,0


### Binário

In [61]:
MC_shots_df_cp = MC_shots_df.copy()
MC_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.BinaryTarget('result_id', 1)

search_space = ps.create_selectors(MC_shots_df_cp, ignore=['result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(MC_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(30).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe().head(30)

Quality: 0.04832113616549646
Subgroup: shot_angle_from_goal>=0.62
Size of Subgroup: 121
----------------------------------------
Quality: 0.04717980027997106
Subgroup: shot_distance_from_goal<10.90
Size of Subgroup: 119
----------------------------------------
Quality: 0.044657860504002916
Subgroup: start_x>=96.60
Size of Subgroup: 148
----------------------------------------
Quality: 0.04405831538823297
Subgroup: shot_angle_from_goal>=0.62 AND shot_distance_from_goal<10.90
Size of Subgroup: 99
----------------------------------------
Quality: 0.04162438223476317
Subgroup: shot_distance_from_goal<10.90 AND start_x>=96.60
Size of Subgroup: 102
----------------------------------------
Quality: 0.03850289734302506
Subgroup: shot_angle_from_goal>=0.62 AND start_x>=96.60
Size of Subgroup: 82
----------------------------------------
Quality: 0.03850289734302506
Subgroup: shot_angle_from_goal>=0.62 AND shot_distance_from_goal<10.90 AND start_x>=96.60
Size of Subgroup: 82
---------------------

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.048321,shot_angle_from_goal>=0.62,121,603,48,94,482,0.200663,0.799337,0.510638,0.489362,0.396694,0.095436,0.155887,2.544751
1,0.04718,shot_distance_from_goal<10.90,119,603,47,94,484,0.197347,0.802653,0.5,0.5,0.394958,0.097107,0.155887,2.533613
2,0.044658,start_x>=96.60,148,603,50,94,455,0.245439,0.754561,0.531915,0.468085,0.337838,0.096703,0.155887,2.167194
3,0.044058,shot_angle_from_goal>=0.62 AND shot_distance_f...,99,603,42,94,504,0.164179,0.835821,0.446809,0.553191,0.424242,0.103175,0.155887,2.72147
4,0.041624,shot_distance_from_goal<10.90 AND start_x>=96.60,102,603,41,94,501,0.169154,0.830846,0.43617,0.56383,0.401961,0.105788,0.155887,2.578536
5,0.038503,shot_angle_from_goal>=0.62 AND start_x>=96.60,82,603,36,94,521,0.135987,0.864013,0.382979,0.617021,0.439024,0.111324,0.155887,2.816295
6,0.038503,shot_angle_from_goal>=0.62 AND shot_distance_f...,82,603,36,94,521,0.135987,0.864013,0.382979,0.617021,0.439024,0.111324,0.155887,2.816295
7,0.024183,shot_distance_from_goal<10.90 AND start_y: [31...,54,603,23,94,549,0.089552,0.910448,0.244681,0.755319,0.425926,0.129326,0.155887,2.73227
8,0.024183,shot_angle_from_goal>=0.62 AND shot_distance_f...,54,603,23,94,549,0.089552,0.910448,0.244681,0.755319,0.425926,0.129326,0.155887,2.73227
9,0.023602,player_rank: [0.02:0.03[ AND shot_angle_from_g...,37,603,20,94,566,0.06136,0.93864,0.212766,0.787234,0.540541,0.130742,0.155887,3.46751


In [43]:
show_coverage(result, MC_shots_df_cp)

0.20066334991708126
0.23383084577114427
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527363185
0.43781094527

In [72]:
NC_shots_df_cp = NC_shots_df.copy()
NC_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.BinaryTarget('result_id', 1)

search_space = ps.create_selectors(NC_shots_df_cp, ignore=['result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(NC_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(50).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

Quality: 0.04048780487804878
Subgroup: shot_distance_from_goal<11.04
Size of Subgroup: 82
----------------------------------------
Quality: 0.038048780487804884
Subgroup: start_x>=96.60
Size of Subgroup: 82
----------------------------------------
Quality: 0.038048780487804884
Subgroup: shot_angle_from_goal>=0.61
Size of Subgroup: 82
----------------------------------------
Quality: 0.038030933967876254
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.04
Size of Subgroup: 71
----------------------------------------
Quality: 0.036472337894110654
Subgroup: shot_distance_from_goal<11.04 AND start_x>=96.60
Size of Subgroup: 67
----------------------------------------
Quality: 0.03509815585960738
Subgroup: shot_angle_from_goal>=0.61 AND start_y: [33.32:38.08[
Size of Subgroup: 40
----------------------------------------
Quality: 0.03419988102320048
Subgroup: shot_distance_from_goal<11.04 AND start_y: [33.32:38.08[
Size of Subgroup: 33
-------------------------------------

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.040488,shot_distance_from_goal<11.04,82,410,24,37,328,0.2,0.8,0.648649,0.351351,0.292683,0.039634,0.090244,3.243243
1,0.038049,start_x>=96.60,82,410,23,37,328,0.2,0.8,0.621622,0.378378,0.280488,0.042683,0.090244,3.108108
2,0.038049,shot_angle_from_goal>=0.61,82,410,23,37,328,0.2,0.8,0.621622,0.378378,0.280488,0.042683,0.090244,3.108108
3,0.038031,shot_angle_from_goal>=0.61 AND shot_distance_f...,71,410,22,37,339,0.173171,0.826829,0.594595,0.405405,0.309859,0.044248,0.090244,3.433574
4,0.036472,shot_distance_from_goal<11.04 AND start_x>=96.60,67,410,21,37,343,0.163415,0.836585,0.567568,0.432432,0.313433,0.046647,0.090244,3.473175
5,0.035098,shot_angle_from_goal>=0.61 AND start_y: [33.32...,40,410,18,37,370,0.097561,0.902439,0.486486,0.513514,0.45,0.051351,0.090244,4.986486
6,0.0342,shot_distance_from_goal<11.04 AND start_y: [33...,33,410,17,37,377,0.080488,0.919512,0.459459,0.540541,0.515152,0.05305,0.090244,5.708436
7,0.0342,shot_angle_from_goal>=0.61 AND shot_distance_f...,33,410,17,37,377,0.080488,0.919512,0.459459,0.540541,0.515152,0.05305,0.090244,5.708436
8,0.034015,shot_angle_from_goal>=0.61 AND start_x>=96.60,56,410,19,37,354,0.136585,0.863415,0.513514,0.486486,0.339286,0.050847,0.090244,3.759653
9,0.034015,shot_angle_from_goal>=0.61 AND shot_distance_f...,56,410,19,37,354,0.136585,0.863415,0.513514,0.486486,0.339286,0.050847,0.090244,3.759653


In [45]:
show_coverage(result, NC_shots_df_cp)

0.2
0.23658536585365852
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.

In [71]:
WB_shots_df_cp = WB_shots_df.copy()
WB_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.BinaryTarget('result_id', 1)

search_space = ps.create_selectors(WB_shots_df_cp, ignore=['result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(WB_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(50).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe().head(30)

Quality: 0.029317943847858376
Subgroup: shot_distance_from_goal<10.52 AND start_x>=96.60
Size of Subgroup: 57
----------------------------------------
Quality: 0.029106906599784092
Subgroup: shot_distance_from_goal<10.52
Size of Subgroup: 70
----------------------------------------
Quality: 0.028871518900008934
Subgroup: shot_angle_from_goal>=0.63
Size of Subgroup: 71
----------------------------------------
Quality: 0.026517641902257288
Subgroup: start_x>=96.60
Size of Subgroup: 81
----------------------------------------
Quality: 0.025973815147604325
Subgroup: shot_angle_from_goal>=0.63 AND start_x>=96.60
Size of Subgroup: 47
----------------------------------------
Quality: 0.025973815147604325
Subgroup: shot_angle_from_goal>=0.63 AND shot_distance_from_goal<10.52 AND start_x>=96.60
Size of Subgroup: 47
----------------------------------------
Quality: 0.025762777899530038
Subgroup: shot_angle_from_goal>=0.63 AND shot_distance_from_goal<10.52
Size of Subgroup: 60
-------------------

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.029318,shot_distance_from_goal<10.52 AND start_x>=96.60,57,351,15,29,294,0.162393,0.837607,0.517241,0.482759,0.263158,0.047619,0.082621,3.185118
1,0.029107,shot_distance_from_goal<10.52,70,351,16,29,281,0.19943,0.80057,0.551724,0.448276,0.228571,0.046263,0.082621,2.766502
2,0.028872,shot_angle_from_goal>=0.63,71,351,16,29,280,0.202279,0.797721,0.551724,0.448276,0.225352,0.046429,0.082621,2.727538
3,0.026518,start_x>=96.60,81,351,16,29,270,0.230769,0.769231,0.551724,0.448276,0.197531,0.048148,0.082621,2.390805
4,0.025974,shot_angle_from_goal>=0.63 AND start_x>=96.60,47,351,13,29,304,0.133903,0.866097,0.448276,0.551724,0.276596,0.052632,0.082621,3.347762
5,0.025974,shot_angle_from_goal>=0.63 AND shot_distance_f...,47,351,13,29,304,0.133903,0.866097,0.448276,0.551724,0.276596,0.052632,0.082621,3.347762
6,0.025763,shot_angle_from_goal>=0.63 AND shot_distance_f...,60,351,14,29,291,0.17094,0.82906,0.482759,0.517241,0.233333,0.051546,0.082621,2.824138
7,0.020722,shot_distance_from_goal<10.52 AND start_y: [32...,33,351,10,29,318,0.094017,0.905983,0.344828,0.655172,0.30303,0.059748,0.082621,3.667712
8,0.020722,shot_angle_from_goal>=0.63 AND shot_distance_f...,33,351,10,29,318,0.094017,0.905983,0.344828,0.655172,0.30303,0.059748,0.082621,3.667712
9,0.019756,start_x>=96.60 AND start_y: [32.64:38.08[,25,351,9,29,326,0.071225,0.928775,0.310345,0.689655,0.36,0.06135,0.082621,4.357241


In [47]:
show_coverage(result, WB_shots_df_cp)

0.1623931623931624
0.19943019943019943
0.23076923076923078
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.4045584045584046
0.4045584045584046
0.4045584045584046
0.4045584045584046
0.4045584045584046
0.4045584045584046
0.4045584045584046
0.4045584045584046
0.4045584045584046
0.4045584045584046
0.4045584045584046
0.4045584045

### xG

In [64]:
MC_shots_df_cp = MC_shots_df.copy()
MC_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

X = MC_shots_df_cp[["bodypart_name", "shot_distance_from_goal", "shot_angle_from_goal"]]
X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)
MC_shots_df_cp["xg"] = rfc.predict(X=X)

target = ps.BinaryTarget('xg', 1)

search_space = ps.create_selectors(MC_shots_df_cp, ignore=['xg', 'result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(MC_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(30).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe().head(30)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)


Quality: 0.03031278103677302
Subgroup: shot_distance_from_goal<10.90 AND start_x>=96.60
Size of Subgroup: 102
----------------------------------------
Quality: 0.029864497303422083
Subgroup: shot_angle_from_goal>=0.62 AND start_x>=96.60
Size of Subgroup: 82
----------------------------------------
Quality: 0.029864497303422083
Subgroup: shot_angle_from_goal>=0.62 AND shot_distance_from_goal<10.90 AND start_x>=96.60
Size of Subgroup: 82
----------------------------------------
Quality: 0.029284203636323634
Subgroup: shot_distance_from_goal<10.90
Size of Subgroup: 119
----------------------------------------
Quality: 0.0288359199029727
Subgroup: shot_angle_from_goal>=0.62 AND shot_distance_from_goal<10.90
Size of Subgroup: 99
----------------------------------------
Quality: 0.027529571600262922
Subgroup: start_x>=96.60
Size of Subgroup: 148
----------------------------------------
Quality: 0.027504819737685263
Subgroup: shot_angle_from_goal>=0.62
Size of Subgroup: 121
------------------

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.030313,shot_distance_from_goal<10.90 AND start_x>=96.60,102,603,22,22,501,0.169154,0.830846,1.0,0.0,0.215686,0.0,0.036484,5.911765
1,0.029864,shot_angle_from_goal>=0.62 AND start_x>=96.60,82,603,21,22,521,0.135987,0.864013,0.954545,0.045455,0.256098,0.001919,0.036484,7.019401
2,0.029864,shot_angle_from_goal>=0.62 AND shot_distance_f...,82,603,21,22,521,0.135987,0.864013,0.954545,0.045455,0.256098,0.001919,0.036484,7.019401
3,0.029284,shot_distance_from_goal<10.90,119,603,22,22,484,0.197347,0.802653,1.0,0.0,0.184874,0.0,0.036484,5.067227
4,0.028836,shot_angle_from_goal>=0.62 AND shot_distance_f...,99,603,21,22,504,0.164179,0.835821,0.954545,0.045455,0.212121,0.001984,0.036484,5.81405
5,0.02753,start_x>=96.60,148,603,22,22,455,0.245439,0.754561,1.0,0.0,0.148649,0.0,0.036484,4.074324
6,0.027505,shot_angle_from_goal>=0.62,121,603,21,22,482,0.200663,0.799337,0.954545,0.045455,0.173554,0.002075,0.036484,4.75695
7,0.022334,start_x>=96.60 AND start_y: [31.96:37.40[,42,603,15,22,561,0.069652,0.930348,0.681818,0.318182,0.357143,0.012478,0.036484,9.788961
8,0.022334,shot_distance_from_goal<10.90 AND start_x>=96....,42,603,15,22,561,0.069652,0.930348,0.681818,0.318182,0.357143,0.012478,0.036484,9.788961
9,0.022334,shot_angle_from_goal>=0.62 AND start_x>=96.60 ...,42,603,15,22,561,0.069652,0.930348,0.681818,0.318182,0.357143,0.012478,0.036484,9.788961


In [49]:
show_coverage(result, MC_shots_df_cp)

0.1691542288557214
0.1691542288557214
0.1691542288557214
0.19734660033167495
0.19734660033167495
0.2736318407960199
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.3101160862354892
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.4079601990049751
0.49585406301824214
0.49585406301824214
0.49585406301824214
0.49585406301824214
0.49585406301824214
0.495

In [65]:
NC_shots_df_cp = NC_shots_df.copy()
NC_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

X = NC_shots_df_cp[["bodypart_name", "shot_distance_from_goal", "shot_angle_from_goal"]]
X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)
NC_shots_df_cp["xg"] = rfc.predict(X=X)

target = ps.BinaryTarget('xg', 1)

search_space = ps.create_selectors(NC_shots_df_cp, ignore=['xg', 'result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(NC_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(30).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)


Quality: 0.02466983938132064
Subgroup: shot_distance_from_goal<11.04 AND start_y: [33.32:38.08[
Size of Subgroup: 33
----------------------------------------
Quality: 0.02466983938132064
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.04 AND start_y: [33.32:38.08[
Size of Subgroup: 33
----------------------------------------
Quality: 0.02421177870315289
Subgroup: shot_angle_from_goal>=0.61 AND start_y: [33.32:38.08[
Size of Subgroup: 40
----------------------------------------
Quality: 0.02275431290898275
Subgroup: start_x>=96.60 AND start_y: [33.32:38.08[
Size of Subgroup: 25
----------------------------------------
Quality: 0.02275431290898275
Subgroup: shot_distance_from_goal<11.04 AND start_x>=96.60 AND start_y: [33.32:38.08[
Size of Subgroup: 25
----------------------------------------
Quality: 0.02275431290898275
Subgroup: shot_angle_from_goal>=0.61 AND start_x>=96.60 AND start_y: [33.32:38.08[
Size of Subgroup: 25
----------------------------------------
Qual

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.024670,shot_distance_from_goal<11.04 AND start_y: [33...,33,410,11,11,377,0.080488,0.919512,1.000000,0.000000,0.333333,0.000000,0.026829,12.424242
1,0.024670,shot_angle_from_goal>=0.61 AND shot_distance_f...,33,410,11,11,377,0.080488,0.919512,1.000000,0.000000,0.333333,0.000000,0.026829,12.424242
2,0.024212,shot_angle_from_goal>=0.61 AND start_y: [33.32...,40,410,11,11,370,0.097561,0.902439,1.000000,0.000000,0.275000,0.000000,0.026829,10.250000
3,0.022754,start_x>=96.60 AND start_y: [33.32:38.08[,25,410,10,11,385,0.060976,0.939024,0.909091,0.090909,0.400000,0.002597,0.026829,14.909091
4,0.022754,shot_distance_from_goal<11.04 AND start_x>=96....,25,410,10,11,385,0.060976,0.939024,0.909091,0.090909,0.400000,0.002597,0.026829,14.909091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.009167,play_distance_towards_goal>=61.95 AND play_spe...,9,410,4,11,401,0.021951,0.978049,0.363636,0.636364,0.444444,0.017456,0.026829,16.565657
96,0.009167,play_distance: [452.59:864.79[ AND shot_angle_...,9,410,4,11,401,0.021951,0.978049,0.363636,0.636364,0.444444,0.017456,0.026829,16.565657
97,0.009167,num_passes<5 AND shot_angle_from_goal>=0.61 AN...,9,410,4,11,401,0.021951,0.978049,0.363636,0.636364,0.444444,0.017456,0.026829,16.565657
98,0.009167,num_events<10 AND shot_angle_from_goal>=0.61 A...,9,410,4,11,401,0.021951,0.978049,0.363636,0.636364,0.444444,0.017456,0.026829,16.565657


In [51]:
show_coverage(result, NC_shots_df_cp)

0.08048780487804878
0.08048780487804878
0.0975609756097561
0.0975609756097561
0.0975609756097561
0.0975609756097561
0.1902439024390244
0.21707317073170732
0.22682926829268293
0.35121951219512193
0.35121951219512193
0.35121951219512193
0.35121951219512193
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.38

In [66]:
WB_shots_df_cp = WB_shots_df.copy()
WB_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

X = WB_shots_df_cp[["bodypart_name", "shot_distance_from_goal", "shot_angle_from_goal"]]
X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)
WB_shots_df_cp["xg"] = rfc.predict(X=X)

target = ps.BinaryTarget('xg', 1)

search_space = ps.create_selectors(WB_shots_df_cp, ignore=['xg', 'result_id'])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(WB_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.WRAccQF())

# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)
result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(30).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["bodypart_name"] = X["bodypart_name"].apply(lambda val: 0 if val == "foot_right" else 1 if val == "foot_left" else 2)


Quality: 0.030705919594808485
Subgroup: shot_angle_from_goal>=0.63 AND shot_distance_from_goal<10.52
Size of Subgroup: 60
----------------------------------------
Quality: 0.02965073335443706
Subgroup: shot_distance_from_goal<10.52
Size of Subgroup: 70
----------------------------------------
Quality: 0.029545214730399918
Subgroup: shot_angle_from_goal>=0.63
Size of Subgroup: 71
----------------------------------------
Quality: 0.029228658858288487
Subgroup: shot_angle_from_goal>=0.63 AND start_x>=96.60
Size of Subgroup: 47
----------------------------------------
Quality: 0.029228658858288487
Subgroup: shot_angle_from_goal>=0.63 AND shot_distance_from_goal<10.52 AND start_x>=96.60
Size of Subgroup: 47
----------------------------------------
Quality: 0.028173472617917063
Subgroup: shot_distance_from_goal<10.52 AND start_x>=96.60
Size of Subgroup: 57
----------------------------------------
Quality: 0.02564102564102564
Subgroup: start_x>=96.60
Size of Subgroup: 81
---------------------

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.030706,shot_angle_from_goal>=0.63 AND shot_distance_f...,60,351,13,13,291,0.170940,0.829060,1.000000,0.000000,0.216667,0.000000,0.037037,5.850000
1,0.029651,shot_distance_from_goal<10.52,70,351,13,13,281,0.199430,0.800570,1.000000,0.000000,0.185714,0.000000,0.037037,5.014286
2,0.029545,shot_angle_from_goal>=0.63,71,351,13,13,280,0.202279,0.797721,1.000000,0.000000,0.183099,0.000000,0.037037,4.943662
3,0.029229,shot_angle_from_goal>=0.63 AND start_x>=96.60,47,351,12,13,304,0.133903,0.866097,0.923077,0.076923,0.255319,0.003289,0.037037,6.893617
4,0.029229,shot_angle_from_goal>=0.63 AND shot_distance_f...,47,351,12,13,304,0.133903,0.866097,0.923077,0.076923,0.255319,0.003289,0.037037,6.893617
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.014878,num_dribbles: [0:2[ AND play_duration<71.54 AN...,21,351,6,13,330,0.059829,0.940171,0.461538,0.538462,0.285714,0.021212,0.037037,7.714286
96,0.014773,num_events<10 AND play_mean_distance_to_the_go...,22,351,6,13,329,0.062678,0.937322,0.461538,0.538462,0.272727,0.021277,0.037037,7.363636
97,0.014773,num_events<10 AND num_passes<5 AND start_x>=96.60,22,351,6,13,329,0.062678,0.937322,0.461538,0.538462,0.272727,0.021277,0.037037,7.363636
98,0.014773,num_dribbles: [0:2[ AND play_mean_distance_to_...,22,351,6,13,329,0.062678,0.937322,0.461538,0.538462,0.272727,0.021277,0.037037,7.363636


In [53]:
show_coverage(result, WB_shots_df_cp)

0.17094017094017094
0.19943019943019943
0.23076923076923078
0.23076923076923078
0.23076923076923078
0.23076923076923078
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.29914529914529914
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887
0.39886039886039887


### VAEP

In [67]:
all_action_values_cp = all_action_values.copy()
all_action_values_cp.drop(["original_event_id", "result_name", "action_id", "type_name", "end_x", "end_y"], inplace=True, axis=1)

MC_shots_df_cp = MC_shots_df.copy()
NC_shots_df_cp = NC_shots_df.copy()
WB_shots_df_cp = WB_shots_df.copy()

MC_shots_df_cp = MC_shots_df_cp.merge(all_action_values_cp, on=['game_id', 'player_id', 'start_x', 'start_y'], how='left')
NC_shots_df_cp = NC_shots_df_cp.merge(all_action_values_cp, on=['game_id', 'player_id', 'start_x', 'start_y'], how='left')
WB_shots_df_cp = WB_shots_df_cp.merge(all_action_values_cp, on=['game_id', 'player_id', 'start_x', 'start_y'], how='left')


In [68]:
MC_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.NumericTarget('Pscores')

search_space = ps.create_selectors(MC_shots_df_cp, ignore=["Pscores", "Pconcedes", "offensive_value", "defensive_value", "vaep_value"])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(MC_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.StandardQFNumeric(1.0))
# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)

result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(30).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    # print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

Quality: 29.6915140748024
Subgroup: shot_distance_from_goal<11.03
----------------------------------------
Quality: 29.60809600353241
Subgroup: shot_angle_from_goal>=0.61
----------------------------------------
Quality: 26.88896682858467
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.03
----------------------------------------
Quality: 26.594013959169388
Subgroup: start_x>=96.60
----------------------------------------
Quality: 25.6968614757061
Subgroup: shot_distance_from_goal<11.03 AND start_x>=96.60
----------------------------------------
Quality: 22.89431345462799
Subgroup: shot_angle_from_goal>=0.61 AND start_x>=96.60
----------------------------------------
Quality: 22.89431345462799
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.03 AND start_x>=96.60
----------------------------------------
Quality: 14.21689122915268
Subgroup: player_rank: [0.02:0.03[ AND shot_distance_from_goal<11.03
----------------------------------------
Quality: 1

Unnamed: 0,quality,subgroup,size_sg,size_dataset,mean_sg,mean_dataset,std_sg,std_dataset,median_sg,median_dataset,max_sg,max_dataset,min_sg,min_dataset,mean_lift,median_lift
0,29.691514,shot_distance_from_goal<11.03,122,610,0.417650,0.174277,0.463023,0.344937,0.046805,0.028802,0.994042,0.994042,0.020629,0.00368,2.396475,1.625026
1,29.608096,shot_angle_from_goal>=0.61,122,610,0.416966,0.174277,0.463104,0.344937,0.046295,0.028802,0.994042,0.994042,0.020629,0.00368,2.392551,1.607331
2,26.888967,shot_angle_from_goal>=0.61 AND shot_distance_f...,101,610,0.440504,0.174277,0.466797,0.344937,0.048291,0.028802,0.994042,0.994042,0.020629,0.00368,2.527613,1.676618
3,26.594014,start_x>=96.60,151,610,0.350396,0.174277,0.445330,0.344937,0.041428,0.028802,0.994042,0.994042,0.018457,0.00368,2.010572,1.438346
4,25.696861,shot_distance_from_goal<11.03 AND start_x>=96.60,103,610,0.423761,0.174277,0.464014,0.344937,0.048291,0.028802,0.994042,0.994042,0.020629,0.00368,2.431540,1.676618
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,8.121586,num_dribbles: [2:3[ AND shot_distance_from_goa...,17,610,0.652017,0.174277,0.449189,0.344937,0.979673,0.028802,0.988301,0.994042,0.022437,0.00368,3.741274,34.013573
96,8.118326,num_passes: [6:18[ AND play_distance: [183.48:...,24,610,0.512540,0.174277,0.471692,0.344937,0.524886,0.028802,0.993361,0.994042,0.021840,0.00368,2.940956,18.223688
97,8.113190,shot_angle_from_goal>=0.61 AND total_time_per_...,30,610,0.444716,0.174277,0.470419,0.344937,0.042437,0.028802,0.994042,0.994042,0.021840,0.00368,2.551783,1.473369
98,8.112234,play_speed_towards_goal>=0.56,122,610,0.240770,0.174277,0.395328,0.344937,0.033571,0.028802,0.993361,0.994042,0.004829,0.00368,1.381541,1.165554


In [56]:
show_coverage(result, MC_shots_df_cp)

0.2
0.23442622950819672
0.23442622950819672
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.31311475409836065
0.44098360655737706
0.44098360655737706
0.44098360655737706
0.44098360655737706
0.44098360655737706
0.44098360655737706
0.44098360655737

In [69]:
NC_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.NumericTarget('Pscores')

search_space = ps.create_selectors(NC_shots_df_cp, ignore=["Pscores", "Pconcedes", "offensive_value", "defensive_value", "vaep_value"])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(NC_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.StandardQFNumeric(1.0))
# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)

result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(30).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    # print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

Quality: 16.405627071857452
Subgroup: shot_distance_from_goal<11.04
----------------------------------------
Quality: 15.50608491897583
Subgroup: start_x>=96.60
----------------------------------------
Quality: 15.489017486572266
Subgroup: shot_angle_from_goal>=0.61
----------------------------------------
Quality: 15.372848868370056
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.04
----------------------------------------
Quality: 14.712131202220917
Subgroup: shot_distance_from_goal<11.04 AND start_x>=96.60
----------------------------------------
Quality: 13.986570835113525
Subgroup: shot_angle_from_goal>=0.61 AND start_y: [33.32:38.08[
----------------------------------------
Quality: 13.679353475570679
Subgroup: shot_angle_from_goal>=0.61 AND start_x>=96.60
----------------------------------------
Quality: 13.679353475570679
Subgroup: shot_angle_from_goal>=0.61 AND shot_distance_from_goal<11.04 AND start_x>=96.60
----------------------------------------
Quality

Unnamed: 0,quality,subgroup,size_sg,size_dataset,mean_sg,mean_dataset,std_sg,std_dataset,median_sg,median_dataset,max_sg,max_dataset,min_sg,min_dataset,mean_lift,median_lift
0,16.405627,shot_distance_from_goal<11.04,82,410,0.310007,0.109939,0.431152,0.274172,0.034518,0.024189,0.988178,0.988178,0.023678,0.003785,2.819821,1.427021
1,15.506085,start_x>=96.60,82,410,0.299037,0.109939,0.425354,0.274172,0.035051,0.024189,0.988178,0.988178,0.023678,0.003785,2.720038,1.449084
2,15.489017,shot_angle_from_goal>=0.61,82,410,0.298829,0.109939,0.425491,0.274172,0.034890,0.024189,0.988178,0.988178,0.023678,0.003785,2.718144,1.442411
3,15.372849,shot_angle_from_goal>=0.61 AND shot_distance_f...,71,410,0.326458,0.109939,0.438107,0.274172,0.034755,0.024189,0.988178,0.988178,0.023678,0.003785,2.969453,1.436836
4,14.712131,shot_distance_from_goal<11.04 AND start_x>=96.60,67,410,0.329523,0.109939,0.439657,0.274172,0.034755,0.024189,0.988178,0.988178,0.023678,0.003785,2.997333,1.436836
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,5.951542,play_std_distance_to_the_goal>=24.10 AND shot_...,9,410,0.771221,0.109939,0.389843,0.274172,0.977037,0.024189,0.983566,0.988178,0.031373,0.003785,7.015013,40.392460
96,5.893279,play_mean_distance_to_the_goal<41.87 AND shot_...,10,410,0.699267,0.109939,0.431853,0.274172,0.979014,0.024189,0.988178,0.988178,0.027732,0.003785,6.360517,40.474224
97,5.857538,play_std_distance_to_the_goal>=24.10 AND shot_...,10,410,0.695692,0.109939,0.432935,0.274172,0.976806,0.024189,0.985690,0.988178,0.030586,0.003785,6.328006,40.382912
98,5.822192,play_mean_distance_to_the_goal<41.87 AND shot_...,11,410,0.639229,0.109939,0.453418,0.274172,0.978259,0.024189,0.988178,0.988178,0.027732,0.003785,5.814414,40.442997


In [58]:
show_coverage(result, NC_shots_df_cp)

0.2
0.23658536585365852
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.2634146341463415
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.3878048780487805
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.43414634146341463
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.47560975609756095
0.5902439024390244
0.5902439024390244
0.59

In [70]:
WB_shots_df_cp.drop(["game_id", "period_id", "team_id", "player_id", "time_seconds", "result_id"], inplace=True, axis=1)

# Definir o alvo (target) da descoberta de subgrupos
# Aqui, vamos assumir que queremos encontrar subgrupos de chutes bem-sucedidos (result_name == 'Goal')
target = ps.NumericTarget('Pscores')

search_space = ps.create_selectors(WB_shots_df_cp, ignore=["Pscores", "Pconcedes", "offensive_value", "defensive_value", "vaep_value"])

# Configurar a tarefa de descoberta de subgrupos
task = ps.SubgroupDiscoveryTask(WB_shots_df_cp, target, search_space, result_set_size=100, depth=3, qf=ps.StandardQFNumeric(1.0))
# Configurar e executar o Beam Search
search_algorithm = ps.BeamSearch(250)

result = search_algorithm.execute(task)

# Exibir os resultados
for sg_result in result.to_dataframe().head(30).itertuples(index=False):
    print(f"Quality: {sg_result.quality}")
    print(f"Subgroup: {sg_result.subgroup}")
    # print(f"Size of Subgroup: {sg_result.size_sg}")
    # print(f"Size of Dataset: {sg_result.size_dataset}")
    # print(f"Positives in Subgroup: {sg_result.positives_sg}")
    # print(f"Positives in Dataset: {sg_result.positives_dataset}")
    # print(f"Size of Complement: {sg_result.size_complement}")
    # print(f"Relative Size of Subgroup: {sg_result.relative_size_sg}")
    # print(f"Relative Size of Complement: {sg_result.relative_size_complement}")
    # print(f"Coverage of Subgroup: {sg_result.coverage_sg}")
    # print(f"Coverage of Complement: {sg_result.coverage_complement}")
    # print(f"Target Share in Subgroup: {sg_result.target_share_sg}")
    # print(f"Target Share in Complement: {sg_result.target_share_complement}")
    # print(f"Target Share in Dataset: {sg_result.target_share_dataset}")
    # print(f"Lift: {sg_result.lift}")
    print("-" * 40)

result.to_dataframe()

Quality: 10.211431980133057
Subgroup: shot_distance_from_goal<10.52
----------------------------------------
Quality: 10.207541853189468
Subgroup: shot_distance_from_goal<10.52 AND start_x>=96.60
----------------------------------------
Quality: 10.141963601112366
Subgroup: shot_angle_from_goal>=0.63
----------------------------------------
Quality: 9.412968255579472
Subgroup: start_x>=96.60
----------------------------------------
Quality: 9.115802884101868
Subgroup: shot_angle_from_goal>=0.63 AND shot_distance_from_goal<10.52
----------------------------------------
Quality: 9.111915946006775
Subgroup: shot_angle_from_goal>=0.63 AND start_x>=96.60
----------------------------------------
Quality: 9.111915946006775
Subgroup: shot_angle_from_goal>=0.63 AND shot_distance_from_goal<10.52 AND start_x>=96.60
----------------------------------------
Quality: 7.19771146774292
Subgroup: shot_distance_from_goal<10.52 AND start_y: [32.64:38.08[
----------------------------------------
Quality: 

Unnamed: 0,quality,subgroup,size_sg,size_dataset,mean_sg,mean_dataset,std_sg,std_dataset,median_sg,median_dataset,max_sg,max_dataset,min_sg,min_dataset,mean_lift,median_lift
0,10.211432,shot_distance_from_goal<10.52,70,354,0.248672,0.102794,0.397485,0.261647,0.033162,0.025747,0.983416,0.983416,0.019465,0.004961,2.419124,1.287981
1,10.207542,shot_distance_from_goal<10.52 AND start_x>=96.60,57,354,0.281874,0.102794,0.416593,0.261647,0.033802,0.025747,0.983416,0.983416,0.019465,0.004961,2.742119,1.312838
2,10.141964,shot_angle_from_goal>=0.63,71,354,0.245639,0.102794,0.395517,0.261647,0.033536,0.025747,0.983416,0.983416,0.019465,0.004961,2.389617,1.302480
3,9.412968,start_x>=96.60,81,354,0.219004,0.102794,0.377130,0.261647,0.032830,0.025747,0.983416,0.983416,0.015445,0.004961,2.130507,1.275088
4,9.115803,shot_angle_from_goal>=0.63 AND shot_distance_f...,59,354,0.257299,0.102794,0.402549,0.261647,0.033802,0.025747,0.983416,0.983416,0.019465,0.004961,2.503054,1.312838
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,3.737160,num_passes: [26:51[ AND play_speed_towards_goa...,25,354,0.252281,0.102794,0.408153,0.261647,0.028045,0.025747,0.981349,0.983416,0.008776,0.004961,2.454231,1.089232
96,3.735970,play_speed: [1.83:2.28[ AND shot_distance_from...,14,354,0.369649,0.102794,0.454085,0.261647,0.036641,0.025747,0.981349,0.983416,0.021926,0.004961,3.596013,1.423091
97,3.729418,play_duration: [415.49:762.68[ AND shot_angle_...,14,354,0.369181,0.102794,0.453762,0.261647,0.032905,0.025747,0.981349,0.983416,0.027506,0.004961,3.591461,1.278003
98,3.727112,play_duration: [415.49:762.68[ AND shot_distan...,14,354,0.369016,0.102794,0.453884,0.261647,0.031752,0.025747,0.981349,0.983416,0.027506,0.004961,3.589859,1.233219


In [60]:
show_coverage(result, WB_shots_df_cp)

0.1977401129943503
0.1977401129943503
0.23163841807909605
0.2994350282485876
0.2994350282485876
0.2994350282485876
0.2994350282485876
0.2994350282485876
0.2994350282485876
0.2994350282485876
0.2994350282485876
0.2994350282485876
0.2994350282485876
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4011299435028249
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.4067796610169492
0.406779661