# Flow Recipe Book: Real Examples Using StatsBomb Open Data

This page provides a collection of practical Flow recipes using real football data. Each example demonstrates a common task you might want to perform when analysing event data - all using the `penaltyblog.matchflow` tools and StatsBomb open data.

## Find All Shots in the Match

In [None]:
from penaltyblog.matchflow import Flow

# Load events from a StatsBomb match
match_id = 266191
events = Flow.statsbomb.from_github_file(match_id, type="events").materialize()

shots = events.filter(lambda r: r.get("type", {}).get("name") == "Shot").collect()

for row in shots[:5]:
    print(row)

{'id': '8da37d1a-8477-436a-babf-426a96c9fe78', 'index': 70, 'period': 1, 'timestamp': '00:01:27.683', 'minute': 1, 'second': 27, 'type': {'id': 16, 'name': 'Shot'}, 'possession': 6, 'possession_team': {'id': 210, 'name': 'Real Sociedad'}, 'play_pattern': {'id': 3, 'name': 'From Free Kick'}, 'team': {'id': 210, 'name': 'Real Sociedad'}, 'player': {'id': 6669, 'name': 'David Zurutuza Veillet'}, 'position': {'id': 15, 'name': 'Left Center Midfield'}, 'location': [110.0, 38.2], 'duration': 0.8123, 'under_pressure': True, 'related_events': ['2cff7b1f-e203-4055-ba78-e8a9ee428d3a', 'e66fdb14-3432-474a-bf98-af8289f2af95'], 'shot': {'statsbomb_xg': 0.13791905, 'end_location': [119.3, 37.5, 2.0], 'key_pass_id': 'b9240206-5a95-41b2-9353-94c65c504c27', 'outcome': {'id': 100, 'name': 'Saved'}, 'technique': {'id': 93, 'name': 'Normal'}, 'aerial_won': True, 'body_part': {'id': 37, 'name': 'Head'}, 'type': {'id': 87, 'name': 'Open Play'}, 'freeze_frame': [{'location': [110.0, 37.3], 'player': {'id': 5

## Sum xG Per Player

In [4]:
# Summarise total xG per player
results = (
    events.filter(lambda r: r.get("type", {}).get("name") == "Shot")
    .assign(
        player_name=lambda r: r.get("player", {}).get("name"),
        xg=lambda r: r.get("shot", {}).get("statsbomb_xg", 0),
    )
    .group_by("player_name")
    .summary(total_xg=("xg", "sum"))
    .sort(by="total_xg", reverse=True)
)

# Print top contributors
for row in results.collect()[:5]:
    print(f"{row['player_name']:25} {row['total_xg']:.2f}")

Carlos Alberto Vela Garrido 0.57
Willian José da Silva     0.40
Xabier Prieto Argarate    0.33
Lionel Andrés Messi Cuccittini 0.20
Javier Alejandro Mascherano 0.19


## Calculate Shot Accuracy Per Team

In [5]:
def accuracy(group):
    on_target = sum(1 for r in group if r.get("shot_type") in ["Goal", "Saved"])
    total = len(group)
    return round(100 * on_target / total, 1) if total else 0


events = Flow.statsbomb.from_github_file(match_id, type="events")

results = (
    events.filter(lambda r: r.get("type", {}).get("name") == "Shot")
    .assign(
        shot_type=lambda r: r.get("shot", {}).get("outcome", {}).get("name"),
        team_name=lambda r: r.get("team", {}).get("name"),
    )
    .group_by("team_name")
    .summary(shots="count", shot_accuracy=accuracy)
)

for row in results.collect():
    print(row)

{'team_name': 'Real Sociedad', 'shots': 18, 'shot_accuracy': 33.3}
{'team_name': 'Barcelona', 'shots': 10, 'shot_accuracy': 20.0}
