# Interactive dashboard in Jupyter (Plotly + ipywidgets)

Goal:
- Load analysis ready CSV outputs from the StatsBomb JSON task
- Build a small interactive dashboard (team and player filters)
- Keep it simple and show how this sits at the end of a pipeline

In [37]:
# If plotly is not installed, install it once
import sys

try:
    import plotly
except ImportError:
    !{sys.executable} -m pip install -q plotly

# Imports
from pathlib import Path
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display

## 1. Load pipeline outputs

These CSVs are assumed to be in the outputs folder within the repo.

In [38]:
from pathlib import Path

BASE_DIR = Path("/Users/robparkes/Library/Mobile Documents/com~apple~CloudDocs/TUD/Thesis/Practice Task - Statsbomb Pipeline")

events_path = BASE_DIR / "outputs" / "events_basic_15946.csv"
passes_path = BASE_DIR / "outputs" / "passes_15946.csv"

print("Using BASE_DIR:", BASE_DIR)

if not events_path.exists() or not passes_path.exists():
    raise FileNotFoundError("Could not find CSVs in outputs folder.")

print("Files found successfully.")

Using BASE_DIR: /Users/robparkes/Library/Mobile Documents/com~apple~CloudDocs/TUD/Thesis/Practice Task - Statsbomb Pipeline
Files found successfully.


In [39]:
# Load data
events = pd.read_csv(events_path)
passes = pd.read_csv(passes_path)

print("events shape:", events.shape)
print("passes shape:", passes.shape)

display(events.head(3))
display(passes.head(3))

events shape: (3762, 17)
passes shape: (1163, 20)


Unnamed: 0,match_id,event_id,index,period,timestamp,minute,second,type,team,team_id,player,player_id,possession,possession_team,possession_team_id,x,y
0,15946,9f6e2ecf-6685-45df-a62e-c2db3090f6c1,1,1,00:00:00.000,0,0,Starting XI,Barcelona,217,,,1,Barcelona,217,,
1,15946,0300039d-150d-41e4-b29a-76602ef002e6,2,1,00:00:00.000,0,0,Starting XI,Deportivo Alavés,206,,,1,Barcelona,217,,
2,15946,491e8901-7630-4cc8-b57b-937dddff2eaa,3,1,00:00:00.000,0,0,Half Start,Barcelona,217,,,1,Barcelona,217,,


Unnamed: 0,match_id,event_id,minute,second,team,team_id,player,player_id,recipient,recipient_id,start_x,start_y,end_x,end_y,outcome,length,angle,height,body_part,pass_type
0,15946,549567bd-36de-4ac8-b8dc-6b5d3f1e4be8,0,0,Deportivo Alavés,206,Jonathan Rodríguez Menéndez,6581,Guillermo Alfonso Maripán Loaysa,6855.0,61.0,40.1,33.8,28.0,,29.76995,-2.723027,Ground Pass,Left Foot,Kick Off
1,15946,4e4e4cad-9897-43ec-842d-585a4077f6ce,0,3,Deportivo Alavés,206,Guillermo Alfonso Maripán Loaysa,6855,Rubén Sobrino Pozuelo,6613.0,36.8,27.3,86.5,74.2,Incomplete,68.335205,0.756421,High Pass,Right Foot,
2,15946,be27cc25-92b5-4696-b43c-aad957a6119a,0,7,Barcelona,217,Sergio Busquets i Burgos,5203,Ivan Rakitić,5470.0,33.6,5.9,35.1,18.3,,12.490396,1.450414,High Pass,,Recovery


## Filters

- Team dropdown controls the whole dashboard
- Player dropdown updates based on the chosen team

In [43]:
teams = sorted(passes["team"].dropna().unique())

team_dropdown = widgets.Dropdown(
    options=teams,
    value=teams[0] if teams else None,
    description="Team:"
)

player_dropdown = widgets.Dropdown(
    options=[],
    description="Player:"
)

def update_players(*args):
    team = team_dropdown.value
    if team is None:
        player_dropdown.options = []
        player_dropdown.value = None
        return

    players = sorted(passes.loc[passes["team"] == team, "player"].dropna().unique())
    player_dropdown.options = players
    player_dropdown.value = players[0] if players else None

team_dropdown.observe(update_players, names="value")
update_players()

display(team_dropdown, player_dropdown)

Dropdown(description='Team:', options=('Barcelona', 'Deportivo Alavés'), value='Barcelona')

Dropdown(description='Player:', options=('Arthur Henrique Ramos de Oliveira Melo', 'Arturo Erasmo Vidal Pardo'…

## 3. Build dashboard figure

Dashboard includes:
1. Events over time (events per minute)  
2. Pass count by player (top 15)  
3. Pass start locations for selected player with useful hover fields

In [41]:
def build_dashboard(team, player):
    # Filter
    events_team = events[events["team"] == team].copy()
    passes_team = passes[passes["team"] == team].copy()

    # Aggregations
    events_per_min = (
        events_team.groupby("minute")["event_id"]
        .count()
        .reset_index(name="event_count")
    )

    passes_by_player = (
        passes_team["player"]
        .value_counts()
        .head(15)
        .reset_index()
    )
    passes_by_player.columns = ["player", "pass_count"]

    # Player passes for scatter
    if player is not None:
        passes_player = passes_team[passes_team["player"] == player].copy()
    else:
        passes_player = passes_team.iloc[0:0].copy()

    # Layout: 2x2 with bottom right empty
    fig = make_subplots(
        rows=2,
        cols=2,
        subplot_titles=(
            "Events per minute",
            "Top 15 passers",
            "Pass start locations",
            ""
        ),
        specs=[
            [{}, {}],
            [{}, None]
        ]
    )

    # Plot 1: events over time
    fig.add_trace(
        go.Scatter(
            x=events_per_min["minute"],
            y=events_per_min["event_count"],
            mode="lines"
        ),
        row=1, col=1
    )

    # Plot 2: passes by player
    fig.add_trace(
        go.Bar(
            x=passes_by_player["player"],
            y=passes_by_player["pass_count"]
        ),
        row=1, col=2
    )

    # Plot 3: pass locations
    if not passes_player.empty:
        hover_text = (
            "min: " + passes_player["minute"].astype(str) +
            " | sec: " + passes_player["second"].astype(str) +
            " | to: " + passes_player["recipient"].fillna("unknown").astype(str) +
            " | outcome: " + passes_player["outcome"].fillna("Complete").astype(str) +
            " | length: " + passes_player["length"].round(2).astype(str)
        )

        fig.add_trace(
            go.Scatter(
                x=passes_player["start_x"],
                y=passes_player["start_y"],
                mode="markers",
                text=hover_text,
                hovertemplate="%{text}<extra></extra>"
            ),
            row=2, col=1
        )

        # Make it look more like a pitch coordinate system
        fig.update_yaxes(autorange="reversed", row=2, col=1)

    fig.update_layout(
        height=750,
        width=1100,
        title_text=f"Interactive dashboard (Jupyter) — {team}",
        showlegend=False
    )

    return fig

## 4. Render dashboard and make it interactive

Changing team or player updates the dashboard.

In [42]:
dashboard_output = widgets.Output()

def refresh_dashboard(*args):
    dashboard_output.clear_output(wait=True)
    with dashboard_output:
        team = team_dropdown.value
        player = player_dropdown.value
        if team is None:
            print("No team available in passes table.")
            return
        fig = build_dashboard(team, player)
        fig.show()

team_dropdown.observe(refresh_dashboard, names="value")
player_dropdown.observe(refresh_dashboard, names="value")

display(dashboard_output)
refresh_dashboard()

Output()

## Reflection (pipeline relevance)

What worked well because the data was structured properly
- The CSV outputs loaded directly with no cleaning
- One row per event and one row per pass made filtering and grouping easy

What would have been painful if the pipeline was not clean
- Nested JSON would force extra parsing inside the visual layer
- Inconsistent ids or missing key fields would break interactivity

Why this reinforces upstream engineering
- Dashboards are only smooth when upstream tables are tidy and consistent
- This is why pipeline design and data modelling matter more than visual polish