In [1]:
from statsbombpy import sb
import pandas as pd
from pymongo import MongoClient

# --- 1. Extrair dados da API ---
events_df = sb.competition_events(
    country="Germany",
    division="1. Bundesliga",
    season="2023/2024",
    gender="male"
)



In [2]:
frames_df = sb.competition_frames(
    country="Germany",
    division="1. Bundesliga",
    season="2023/2024",
    gender="male"
)

frames_df.rename(columns={'event_uuid': 'id'}, inplace=True)



In [3]:
# Verificar duplicados em frames_df
n_dup_frames = frames_df.duplicated(subset=["match_id", "id"]).sum()
print(f"Duplicados em frames_df: {n_dup_frames}")

# Verificar duplicados em events_df
n_dup_events = events_df.duplicated(subset=["match_id", "id"]).sum()
print(f"Duplicados em events_df: {n_dup_events}")


Duplicados em frames_df: 1834601
Duplicados em events_df: 0


In [4]:
# Merge entre events e frames
# NÃO mudar o nome da coluna event_uuid
# frames_df.rename(columns={'event_uuid': 'id'}, inplace=True)  # NÃO fazer isto

# Merge correto usando match_id + event_uuid
merged_df = pd.merge(frames_df, events_df, how="left", on=["match_id", "id"])


In [5]:
# Contar número total de linhas
n_total = len(merged_df)

# Contar número de linhas únicas pelo par (match_id, id)
n_unicos = merged_df.drop_duplicates(subset=["match_id", "id"]).shape[0]

# Calcular número de duplicados
n_duplicados = n_total - n_unicos

# Mostrar resultados
print(f"Linhas totais: {n_total}")
print(f"Linhas únicas (match_id + id): {n_unicos}")
print(f"Duplicados encontrados: {n_duplicados}")


Linhas totais: 1953182
Linhas únicas (match_id + id): 118581
Duplicados encontrados: 1834601


In [6]:
merged_df.head()

Unnamed: 0,actor,id,keeper,location_x,match_id,teammate,visible_area,50_50,bad_behaviour_card,ball_receipt_outcome,...,substitution_outcome,substitution_outcome_id,substitution_replacement,substitution_replacement_id,tactics,team,team_id,timestamp,type,under_pressure
0,False,ff56e821-21e9-4cef-ba2a-7eb5eb3769c6,False,"[33.623681824113135, 40.159018633164074]",3895302,True,"[16.638549335883955, 80.0, 42.634221931834276,...",,,,...,,,,,,Werder Bremen,176,00:00:04.732,Pass,
1,False,ff56e821-21e9-4cef-ba2a-7eb5eb3769c6,False,"[36.74041423873898, 53.63999223538578]",3895302,True,"[16.638549335883955, 80.0, 42.634221931834276,...",,,,...,,,,,,Werder Bremen,176,00:00:04.732,Pass,
2,False,ff56e821-21e9-4cef-ba2a-7eb5eb3769c6,False,"[41.68140190196648, 24.747150774999632]",3895302,True,"[16.638549335883955, 80.0, 42.634221931834276,...",,,,...,,,,,,Werder Bremen,176,00:00:04.732,Pass,
3,False,ff56e821-21e9-4cef-ba2a-7eb5eb3769c6,False,"[45.75852507051164, 34.30730339844553]",3895302,True,"[16.638549335883955, 80.0, 42.634221931834276,...",,,,...,,,,,,Werder Bremen,176,00:00:04.732,Pass,
4,False,ff56e821-21e9-4cef-ba2a-7eb5eb3769c6,False,"[49.77081997053183, 49.75472239512554]",3895302,True,"[16.638549335883955, 80.0, 42.634221931834276,...",,,,...,,,,,,Werder Bremen,176,00:00:04.732,Pass,


In [None]:
# ==========================
# 1. Importar bibliotecas
# ==========================
import polars as pl
import os

# ==========================
# 2. Caminho local (mesmo diretório do notebook)
# ==========================
CAMINHO_OUT = os.path.join(os.getcwd(), "statsbomb_bundesliga_23_24.parquet")

# ==========================
# 3. Converter pandas → polars
# ==========================
df_pl = pl.from_pandas(merged_df)

# ==========================
# 4. Guardar em Parquet (compressão ZSTD)
# ==========================
df_pl.write_parquet(CAMINHO_OUT, compression="zstd")

# ==========================
# 5. Verificar leitura
# ==========================
df_lido = pl.read_parquet(CAMINHO_OUT)
df_lido.head()


actor,id,keeper,location_x,match_id,teammate,visible_area,50_50,bad_behaviour_card,ball_receipt_outcome,ball_recovery_offensive,ball_recovery_recovery_failure,block_deflection,block_offensive,block_save_block,carry_end_location,clearance_aerial_won,clearance_body_part,clearance_head,clearance_left_foot,clearance_other,clearance_right_foot,counterpress,dribble_no_touch,dribble_nutmeg,dribble_outcome,dribble_overrun,duel_outcome,duel_type,duration,foul_committed_advantage,foul_committed_card,foul_committed_offensive,foul_committed_penalty,foul_committed_type,foul_won_advantage,foul_won_defensive,…,pass_through_ball,pass_type,period,play_pattern,player,player_id,position,possession,possession_team,possession_team_id,related_events,second,shot_aerial_won,shot_body_part,shot_deflected,shot_end_location,shot_first_time,shot_freeze_frame,shot_key_pass_id,shot_one_on_one,shot_open_goal,shot_outcome,shot_saved_off_target,shot_saved_to_post,shot_statsbomb_xg,shot_technique,shot_type,substitution_outcome,substitution_outcome_id,substitution_replacement,substitution_replacement_id,tactics,team,team_id,timestamp,type,under_pressure
bool,str,bool,list[f64],i64,bool,list[f64],struct[1],null,str,bool,bool,bool,bool,bool,list[f64],bool,str,bool,bool,bool,bool,bool,bool,bool,str,bool,str,str,f64,bool,str,bool,bool,str,bool,bool,…,bool,str,i64,str,str,f64,str,i64,str,i64,list[str],i64,bool,str,bool,list[f64],bool,list[struct[4]],str,bool,bool,str,bool,bool,f64,str,str,null,f64,null,f64,null,str,i64,str,str,bool
False,"""ff56e821-21e9-4cef-ba2a-7eb5eb…",False,"[33.623682, 40.159019]",3895302,True,"[16.638549, 80.0, … 80.0]",,,,,,,,,,,,,,,,,,,,,,,1.995802,,,,,,,,…,,,1,"""From Kick Off""","""Marvin Ducksch""",12299.0,"""Left Center Forward""",2,"""Werder Bremen""",176,"[""ae74c490-d721-45ca-936f-3975b25d1c94""]",4,,,,,,,,,,,,,,,,,,,,,"""Werder Bremen""",176,"""00:00:04.732""","""Pass""",
False,"""ff56e821-21e9-4cef-ba2a-7eb5eb…",False,"[36.740414, 53.639992]",3895302,True,"[16.638549, 80.0, … 80.0]",,,,,,,,,,,,,,,,,,,,,,,1.995802,,,,,,,,…,,,1,"""From Kick Off""","""Marvin Ducksch""",12299.0,"""Left Center Forward""",2,"""Werder Bremen""",176,"[""ae74c490-d721-45ca-936f-3975b25d1c94""]",4,,,,,,,,,,,,,,,,,,,,,"""Werder Bremen""",176,"""00:00:04.732""","""Pass""",
False,"""ff56e821-21e9-4cef-ba2a-7eb5eb…",False,"[41.681402, 24.747151]",3895302,True,"[16.638549, 80.0, … 80.0]",,,,,,,,,,,,,,,,,,,,,,,1.995802,,,,,,,,…,,,1,"""From Kick Off""","""Marvin Ducksch""",12299.0,"""Left Center Forward""",2,"""Werder Bremen""",176,"[""ae74c490-d721-45ca-936f-3975b25d1c94""]",4,,,,,,,,,,,,,,,,,,,,,"""Werder Bremen""",176,"""00:00:04.732""","""Pass""",
False,"""ff56e821-21e9-4cef-ba2a-7eb5eb…",False,"[45.758525, 34.307303]",3895302,True,"[16.638549, 80.0, … 80.0]",,,,,,,,,,,,,,,,,,,,,,,1.995802,,,,,,,,…,,,1,"""From Kick Off""","""Marvin Ducksch""",12299.0,"""Left Center Forward""",2,"""Werder Bremen""",176,"[""ae74c490-d721-45ca-936f-3975b25d1c94""]",4,,,,,,,,,,,,,,,,,,,,,"""Werder Bremen""",176,"""00:00:04.732""","""Pass""",
False,"""ff56e821-21e9-4cef-ba2a-7eb5eb…",False,"[49.77082, 49.754722]",3895302,True,"[16.638549, 80.0, … 80.0]",,,,,,,,,,,,,,,,,,,,,,,1.995802,,,,,,,,…,,,1,"""From Kick Off""","""Marvin Ducksch""",12299.0,"""Left Center Forward""",2,"""Werder Bremen""",176,"[""ae74c490-d721-45ca-936f-3975b25d1c94""]",4,,,,,,,,,,,,,,,,,,,,,"""Werder Bremen""",176,"""00:00:04.732""","""Pass""",
