In [12]:
# (Gerekirse) kurulum
!pip install -q --upgrade kagglehub

import kagglehub
path = kagglehub.dataset_download("saurabhshahane/statsbomb-football-data")
print("Dataset root:", path)

Dataset root: C:\Users\stmus\.cache\kagglehub\datasets\saurabhshahane\statsbomb-football-data\versions\1181


In [14]:
import pandas as pd, json
from pandas import json_normalize
from pathlib import Path

# --- 0) KaggleHub'ın verdiği kök yolu kullan ---
DATA_DIR = Path(r"C:\Users\stmus\.cache\kagglehub\datasets\saurabhshahane\statsbomb-football-data\versions\1181") / "data"
print("DATA_DIR:", DATA_DIR)

# --- 1) competitions ---
comps = pd.read_json(DATA_DIR / "competitions.json")
print("competitions:", comps.shape)

# --- 2) matches dosyaları ve mevcut events dosyaları ---
match_files = sorted((DATA_DIR / "matches").rglob("*.json"))
events_dir = DATA_DIR / "events"
event_ids = {int(p.stem) for p in events_dir.glob("*.json")}
print("Bulunan match dosyası sayısı:", len(match_files), "| Bulunan events dosyası sayısı:", len(event_ids))

# Herhangi bir matches dosyasından, events'i gerçekten olan ilk match_id'yi seç
match_id = None
chosen_matches_file = None
for mf in match_files:
    m = pd.read_json(mf)
    ids = m["match_id"].astype(int)
    ok = [i for i in ids if i in event_ids]
    if ok:
        match_id = int(ok[0])
        chosen_matches_file = mf
        matches = m  # ilk bulunanı tut
        break

if match_id is None:
    raise FileNotFoundError("Hiçbir matches dosyasındaki match_id için events/*.json bulunamadı.")

print("Seçilen matches dosyası:", chosen_matches_file.name, "| Seçilen match_id:", match_id)

# --- 3) events'i ROBUST şekilde oku ---
events_path = events_dir / f"{match_id}.json"
print("events_path:", events_path)

def read_events_any(path: Path) -> pd.DataFrame:
    # önce NDJSON dene
    try:
        return pd.read_json(path, lines=True)
    except ValueError:
        # JSON dizi (list) formatına düş
        with open(path, "r", encoding="utf-8") as f:
            data = json.load(f)  # list[dict]
        return pd.json_normalize(data)

events = read_events_any(events_path)
print("events satır:", len(events))

# --- 4) düzleştir (gerekirse) ---
# read_events_any zaten DataFrame döndürüyor; nested alanları normalize edelim
ev = json_normalize(events.to_dict(orient="records"))

# --- 5) işe yarar sütunlar ---
cols = [
    "id","index","period","timestamp","minute","second",
    "type.name","team.name","possession_team.name","player.name",
    "location","pass.end_location","pass.height.name","pass.outcome.name",
    "shot.outcome.name","shot.statsbomb_xg","shot.body_part.name","shot.technique.name"
]
ev = ev[[c for c in cols if c in ev.columns]].copy()

# --- 6) koordinatları ayır ---
for col in ["location", "pass.end_location"]:
    if col in ev.columns:
        xy = ev[col].apply(pd.Series)
        xy.columns = [f"{col}_x", f"{col}_y"]
        ev = pd.concat([ev, xy], axis=1)

# --- 7) hızlı alt tablolar ---
shots  = ev[ev.get("type.name").eq("Shot")] if "type.name" in ev.columns else pd.DataFrame()
passes = ev[ev.get("type.name").eq("Pass")] if "type.name" in ev.columns else pd.DataFrame()

print("events:", ev.shape, "| shots:", shots.shape, "| passes:", passes.shape)
ev.head(3)

DATA_DIR: C:\Users\stmus\.cache\kagglehub\datasets\saurabhshahane\statsbomb-football-data\versions\1181\data
competitions: (75, 12)
Bulunan match dosyası sayısı: 75 | Bulunan events dosyası sayısı: 3464
Seçilen matches dosyası: 1.json | Seçilen match_id: 9880
events_path: C:\Users\stmus\.cache\kagglehub\datasets\saurabhshahane\statsbomb-football-data\versions\1181\data\events\9880.json
events satır: 3947
events: (3947, 22) | shots: (29, 22) | passes: (1162, 22)


Unnamed: 0,id,index,period,timestamp,minute,second,type.name,team.name,possession_team.name,player.name,...,pass.height.name,pass.outcome.name,shot.outcome.name,shot.statsbomb_xg,shot.body_part.name,shot.technique.name,location_x,location_y,pass.end_location_x,pass.end_location_y
0,2f8319f2-f34c-47fd-977b-332324d7d86c,1,1,00:00:00.000,0,0,Starting XI,Barcelona,Barcelona,,...,,,,,,,,,,
1,43fae5ba-b810-4f74-a16d-cd176c51664f,2,1,00:00:00.000,0,0,Starting XI,Valencia,Barcelona,,...,,,,,,,,,,
2,67ef9f5e-fa09-47a3-ba34-9db104d76596,3,1,00:00:00.000,0,0,Half Start,Barcelona,Barcelona,,...,,,,,,,,,,


In [15]:
from pandas import json_normalize

# matches tablosunu düzleştir (takım adları kolay ulaşılır olsun)
mnorm = json_normalize(matches.to_dict(orient="records"))

cols = ["match_id",
        "home_team.home_team_name",
        "away_team.away_team_name",
        "match_date"]
print(mnorm[cols].head(10))

# Örn: Barcelona içeren maçları listele
m_barca = mnorm[mnorm["home_team.home_team_name"].str.contains("Barcelona", na=False) |
                mnorm["away_team.away_team_name"].str.contains("Barcelona", na=False)]
m_barca[cols].head(10)

   match_id home_team.home_team_name away_team.away_team_name  match_date
0      9880                Barcelona                 Valencia  2018-04-14
1      9912   RC Deportivo La Coruña                Barcelona  2018-04-29
2      9924                Barcelona              Real Madrid  2018-05-06
3      9855                Barcelona            Athletic Club  2018-03-18
4      9827               Las Palmas                Barcelona  2018-03-01
5      9799                    Eibar                Barcelona  2018-02-17
6      9636                Barcelona               Las Palmas  2017-10-01
7      9609                Barcelona                    Eibar  2017-09-19
8      9575                Barcelona               Real Betis  2017-08-20
9      9928                Barcelona               Villarreal  2018-05-09


Unnamed: 0,match_id,home_team.home_team_name,away_team.away_team_name,match_date
0,9880,Barcelona,Valencia,2018-04-14
1,9912,RC Deportivo La Coruña,Barcelona,2018-04-29
2,9924,Barcelona,Real Madrid,2018-05-06
3,9855,Barcelona,Athletic Club,2018-03-18
4,9827,Las Palmas,Barcelona,2018-03-01
5,9799,Eibar,Barcelona,2018-02-17
6,9636,Barcelona,Las Palmas,2017-10-01
7,9609,Barcelona,Eibar,2017-09-19
8,9575,Barcelona,Real Betis,2017-08-20
9,9928,Barcelona,Villarreal,2018-05-09


In [21]:
# Seçtiğin maçı yükle
MATCH_ID = 9924 # buraya istediğin id'yi yaz
events = read_events_any((DATA_DIR / "events" / f"{MATCH_ID}.json"))
ev = json_normalize(events.to_dict(orient="records"))

# alt tablolar
shots  = ev[ev.get("type.name").eq("Shot")].copy()
passes = ev[ev.get("type.name").eq("Pass")].copy()


In [34]:
shots.head()

Unnamed: 0,id,index,period,timestamp,minute,second,possession,duration,type.id,type.name,...,bad_behaviour.card.name,substitution.outcome.id,substitution.outcome.name,substitution.replacement.id,substitution.replacement.name,ball_recovery.offensive,pass.cut_back,dribble.nutmeg,block.offensive,pass.inswinging
184,b89e1c69-a2f3-4ba4-bb18-17ede738ce9d,185,1,00:03:03.474,3,3,6,0.056362,16,Shot,...,,,,,,,,,,
354,5acf5296-cca3-40a5-84d1-90ae01e6f558,355,1,00:06:58.473,6,58,15,0.791119,16,Shot,...,,,,,,,,,,
471,f38c74de-91ba-44b1-b95b-fc7e421536ea,472,1,00:09:22.688,9,22,25,0.451381,16,Shot,...,,,,,,,,,,
660,c1f8c418-e744-4dc8-8297-5b48c95c2c01,661,1,00:14:00.470,14,0,32,0.47498,16,Shot,...,,,,,,,,,,
823,f402b24b-32b1-488e-a14f-de82862adf59,824,1,00:18:46.543,18,46,41,0.872694,16,Shot,...,,,,,,,,,,


In [35]:
passes.head()

Unnamed: 0,id,index,period,timestamp,minute,second,possession,duration,type.id,type.name,...,bad_behaviour.card.name,substitution.outcome.id,substitution.outcome.name,substitution.replacement.id,substitution.replacement.name,ball_recovery.offensive,pass.cut_back,dribble.nutmeg,block.offensive,pass.inswinging
4,d40fa88a-4084-4a97-968d-9c02540537f5,5,1,00:00:00.867,0,0,2,0.922238,30,Pass,...,,,,,,,,,,
7,d8608e08-b199-4dbf-b09d-e880b501b4dd,8,1,00:00:03.113,0,3,2,1.2,30,Pass,...,,,,,,,,,,
10,0a579b70-76ca-450a-a705-bd45dc2a45c4,11,1,00:00:06.092,0,6,2,1.203526,30,Pass,...,,,,,,,,,,
13,9e00d2f8-28af-4fa9-b0a8-939628855ff4,14,1,00:00:07.742,0,7,2,1.85937,30,Pass,...,,,,,,,,,,
16,7ba572f6-415a-4bab-af5c-f857df6ae7e7,17,1,00:00:09.863,0,9,2,1.403779,30,Pass,...,,,,,,,,,,


In [25]:
passes.shape

(1161, 118)

In [26]:
shots.shape

(28, 118)

In [27]:
passes.info

<bound method DataFrame.info of                                         id  index  period     timestamp  \
4     d40fa88a-4084-4a97-968d-9c02540537f5      5       1  00:00:00.867   
7     d8608e08-b199-4dbf-b09d-e880b501b4dd      8       1  00:00:03.113   
10    0a579b70-76ca-450a-a705-bd45dc2a45c4     11       1  00:00:06.092   
13    9e00d2f8-28af-4fa9-b0a8-939628855ff4     14       1  00:00:07.742   
16    7ba572f6-415a-4bab-af5c-f857df6ae7e7     17       1  00:00:09.863   
...                                    ...    ...     ...           ...   
3813  69152c21-36fc-4465-a69e-ae3ba0cbdde6   3814       2  00:48:55.011   
3816  ade7c562-2e56-4a98-88bf-eb8706310d04   3817       2  00:48:58.363   
3822  76e1239c-e93c-4525-a610-5403f26a6bb7   3823       2  00:49:22.862   
3824  a7e10ee1-7425-47e7-8c0d-f818b32b2021   3825       2  00:49:26.134   
3826  ec3c1c6d-3242-477a-bb1b-c52e0b5c3ceb   3827       2  00:49:41.468   

      minute  second  possession  duration  type.id type.name  ... 

In [28]:
passes.describe

<bound method NDFrame.describe of                                         id  index  period     timestamp  \
4     d40fa88a-4084-4a97-968d-9c02540537f5      5       1  00:00:00.867   
7     d8608e08-b199-4dbf-b09d-e880b501b4dd      8       1  00:00:03.113   
10    0a579b70-76ca-450a-a705-bd45dc2a45c4     11       1  00:00:06.092   
13    9e00d2f8-28af-4fa9-b0a8-939628855ff4     14       1  00:00:07.742   
16    7ba572f6-415a-4bab-af5c-f857df6ae7e7     17       1  00:00:09.863   
...                                    ...    ...     ...           ...   
3813  69152c21-36fc-4465-a69e-ae3ba0cbdde6   3814       2  00:48:55.011   
3816  ade7c562-2e56-4a98-88bf-eb8706310d04   3817       2  00:48:58.363   
3822  76e1239c-e93c-4525-a610-5403f26a6bb7   3823       2  00:49:22.862   
3824  a7e10ee1-7425-47e7-8c0d-f818b32b2021   3825       2  00:49:26.134   
3826  ec3c1c6d-3242-477a-bb1b-c52e0b5c3ceb   3827       2  00:49:41.468   

      minute  second  possession  duration  type.id type.name  ..

In [30]:
shots.columns

Index(['id', 'index', 'period', 'timestamp', 'minute', 'second', 'possession',
       'duration', 'type.id', 'type.name',
       ...
       'bad_behaviour.card.name', 'substitution.outcome.id',
       'substitution.outcome.name', 'substitution.replacement.id',
       'substitution.replacement.name', 'ball_recovery.offensive',
       'pass.cut_back', 'dribble.nutmeg', 'block.offensive',
       'pass.inswinging'],
      dtype='object', length=118)

In [33]:
shots.describe

<bound method NDFrame.describe of                                         id  index  period     timestamp  \
184   b89e1c69-a2f3-4ba4-bb18-17ede738ce9d    185       1  00:03:03.474   
354   5acf5296-cca3-40a5-84d1-90ae01e6f558    355       1  00:06:58.473   
471   f38c74de-91ba-44b1-b95b-fc7e421536ea    472       1  00:09:22.688   
660   c1f8c418-e744-4dc8-8297-5b48c95c2c01    661       1  00:14:00.470   
823   f402b24b-32b1-488e-a14f-de82862adf59    824       1  00:18:46.543   
1065  df052d1d-6b66-42c7-a380-f76a0dc10c8e   1066       1  00:23:56.034   
1159  272f61f9-8802-496a-8f89-d91bdd648f53   1160       1  00:25:31.635   
1173  e31e92e2-4905-4ddc-b975-ae7e264b1bfc   1174       1  00:26:06.778   
1237  bc7fa36b-507c-4e91-a9b9-964b692068d8   1238       1  00:27:11.266   
1640  028ec253-7f76-4cec-87ec-ec30383f9766   1641       1  00:37:45.355   
1746  9d1fa265-8332-4dd4-b602-d2a37141bf35   1747       1  00:39:52.331   
1815  89d2458d-6668-4b4a-9c09-54459488c8cf   1816       1  00:41:3

In [29]:
shots.info

<bound method DataFrame.info of                                         id  index  period     timestamp  \
184   b89e1c69-a2f3-4ba4-bb18-17ede738ce9d    185       1  00:03:03.474   
354   5acf5296-cca3-40a5-84d1-90ae01e6f558    355       1  00:06:58.473   
471   f38c74de-91ba-44b1-b95b-fc7e421536ea    472       1  00:09:22.688   
660   c1f8c418-e744-4dc8-8297-5b48c95c2c01    661       1  00:14:00.470   
823   f402b24b-32b1-488e-a14f-de82862adf59    824       1  00:18:46.543   
1065  df052d1d-6b66-42c7-a380-f76a0dc10c8e   1066       1  00:23:56.034   
1159  272f61f9-8802-496a-8f89-d91bdd648f53   1160       1  00:25:31.635   
1173  e31e92e2-4905-4ddc-b975-ae7e264b1bfc   1174       1  00:26:06.778   
1237  bc7fa36b-507c-4e91-a9b9-964b692068d8   1238       1  00:27:11.266   
1640  028ec253-7f76-4cec-87ec-ec30383f9766   1641       1  00:37:45.355   
1746  9d1fa265-8332-4dd4-b602-d2a37141bf35   1747       1  00:39:52.331   
1815  89d2458d-6668-4b4a-9c09-54459488c8cf   1816       1  00:41:31.

In [32]:
#Tüm sütunları genişçe görebilmek için

with pd.option_context("display.max_columns", None, "display.width", None, "display.max_colwidth", None):
    display(passes.head(3))

Unnamed: 0,id,index,period,timestamp,minute,second,possession,duration,type.id,type.name,possession_team.id,possession_team.name,play_pattern.id,play_pattern.name,team.id,team.name,tactics.formation,tactics.lineup,related_events,location,player.id,player.name,position.id,position.name,pass.recipient.id,pass.recipient.name,pass.length,pass.angle,pass.height.id,pass.height.name,pass.end_location,pass.body_part.id,pass.body_part.name,pass.type.id,pass.type.name,carry.end_location,pass.switch,under_pressure,pass.outcome.id,pass.outcome.name,ball_receipt.outcome.id,ball_receipt.outcome.name,pass.aerial_won,duel.type.id,duel.type.name,duel.outcome.id,duel.outcome.name,ball_recovery.recovery_failure,pass.assisted_shot_id,pass.shot_assist,shot.statsbomb_xg,shot.end_location,shot.key_pass_id,shot.first_time,shot.technique.id,shot.technique.name,shot.body_part.id,shot.body_part.name,shot.type.id,shot.type.name,shot.outcome.id,shot.outcome.name,shot.freeze_frame,out,goalkeeper.end_location,goalkeeper.type.id,goalkeeper.type.name,goalkeeper.position.id,goalkeeper.position.name,pass.outswinging,pass.technique.id,pass.technique.name,clearance.aerial_won,clearance.head,clearance.body_part.id,clearance.body_part.name,pass.cross,goalkeeper.outcome.id,goalkeeper.outcome.name,interception.outcome.id,interception.outcome.name,pass.through_ball,off_camera,counterpress,goalkeeper.body_part.id,goalkeeper.body_part.name,goalkeeper.technique.id,goalkeeper.technique.name,dribble.outcome.id,dribble.outcome.name,foul_won.defensive,pass.deflected,block.deflection,pass.goal_assist,foul_committed.card.id,foul_committed.card.name,clearance.right_foot,foul_committed.advantage,foul_won.advantage,foul_committed.offensive,foul_committed.type.id,foul_committed.type.name,shot.one_on_one,dribble.overrun,shot.aerial_won,goalkeeper.success_in_play,clearance.left_foot,bad_behaviour.card.id,bad_behaviour.card.name,substitution.outcome.id,substitution.outcome.name,substitution.replacement.id,substitution.replacement.name,ball_recovery.offensive,pass.cut_back,dribble.nutmeg,block.offensive,pass.inswinging
4,d40fa88a-4084-4a97-968d-9c02540537f5,5,1,00:00:00.867,0,0,2,0.922238,30,Pass,220,Real Madrid,9,From Kick Off,220,Real Madrid,,,[6287d4b1-b61d-46ea-b1a0-3dad62c953cc],"[61.0, 40.1]",19677.0,Karim Benzema,23.0,Center Forward,5574.0,Toni Kroos,12.873616,-2.841952,1.0,Ground Pass,"[48.7, 36.3]",40.0,Right Foot,65.0,Kick Off,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,d8608e08-b199-4dbf-b09d-e880b501b4dd,8,1,00:00:03.113,0,3,2,1.2,30,Pass,220,Real Madrid,9,From Kick Off,220,Real Madrid,,,[1dc284ed-a045-4bde-97f2-a3361ce19945],"[48.7, 33.0]",5574.0,Toni Kroos,15.0,Left Center Midfield,5201.0,Sergio Ramos García,22.196396,-2.131306,1.0,Ground Pass,"[36.9, 14.2]",40.0,Right Foot,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10,0a579b70-76ca-450a-a705-bd45dc2a45c4,11,1,00:00:06.092,0,6,2,1.203526,30,Pass,220,Real Madrid,9,From Kick Off,220,Real Madrid,,,[8b448299-6910-4147-86c8-0a9b35c605c5],"[33.3, 15.0]",5201.0,Sergio Ramos García,5.0,Left Center Back,5485.0,Raphaël Varane,43.141857,1.614851,1.0,Ground Pass,"[31.4, 58.1]",40.0,Right Foot,,,,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [31]:
# Yöntem 1: value_counts
shots_per_team = shots["team.name"].value_counts().rename("shots")
print(shots_per_team)

# En çok şut çeken takım:
print("Top team:", shots_per_team.idxmax(), "→", shots_per_team.max())

team.name
Real Madrid    17
Barcelona      11
Name: shots, dtype: int64
Top team: Real Madrid → 17


In [36]:
oyuncu_pas_sayilari = passes["player.name"].value_counts()
print(oyuncu_pas_sayilari)

print("En Pasör Oyuncu:", oyuncu_pas_sayilari.idxmax(), "→", oyuncu_pas_sayilari.max() )

player.name
Ivan Rakitić                           86
Sergio Busquets i Burgos               83
Luka Modrić                            74
Toni Kroos                             72
Marcelo Vieira da Silva Júnior         72
Lionel Andrés Messi Cuccittini         61
Andrés Iniesta Luján                   58
Sergio Ramos García                    57
Jordi Alba Ramos                       48
José Ignacio Fernández Iglesias        46
Gerard Piqué Bernabéu                  45
Karim Benzema                          43
Sergi Roberto Carnicer                 41
Carlos Henrique Casimiro               39
Samuel Yves Umtiti                     36
Marco Asensio Willemsen                36
Raphaël Varane                         35
Marc-André ter Stegen                  34
Gareth Frank Bale                      34
Luis Alberto Suárez Díaz               29
Philippe Coutinho Correia              25
Lucas Vázquez Iglesias                 24
Keylor Navas Gamboa                    20
Nélson Cabral Semedo  

In [37]:
en_cok_pas_alan_oyuncular = passes["pass.recipient.name"].value_counts()
print(en_cok_pas_alan_oyuncular)

print("En Çok Pas Alan Oyuncu:", en_cok_pas_alan_oyuncular.idxmax(), "→", en_cok_pas_alan_oyuncular.max() )

pass.recipient.name
Ivan Rakitić                           79
Luka Modrić                            76
Marcelo Vieira da Silva Júnior         71
Sergio Busquets i Burgos               71
Lionel Andrés Messi Cuccittini         70
Toni Kroos                             68
Andrés Iniesta Luján                   60
Karim Benzema                          49
Sergio Ramos García                    45
José Ignacio Fernández Iglesias        40
Jordi Alba Ramos                       40
Gareth Frank Bale                      38
Marco Asensio Willemsen                37
Luis Alberto Suárez Díaz               37
Gerard Piqué Bernabéu                  36
Sergi Roberto Carnicer                 36
Samuel Yves Umtiti                     35
Carlos Henrique Casimiro               31
Philippe Coutinho Correia              29
Cristiano Ronaldo dos Santos Aveiro    25
Raphaël Varane                         23
Lucas Vázquez Iglesias                 22
José Paulo Bezzera Maciel Júnior       18
Nélson Cabral 