# Team Tactical Profiles & Families

Visualize individual team profiles and discover tactical families via K-Means clustering.

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from IPython.display import display, Image
import warnings
warnings.filterwarnings('ignore')

# -- Paths --
docs = Path("/Users/jorgepadilla/Documents")
for d in docs.iterdir():
    if "Jorge" in d.name and "MacBook" in d.name and d.is_dir():
        RAW = d / "thesis_data" / "raw_data"
        PROCESSED = d / "thesis_data" / "processed_data"
        REPO = d / "twelve-transfer-modelling"
        break

FIG_DIR = REPO / "clustering" / "team_dna" / "figures"
FIG_DIR.mkdir(parents=True, exist_ok=True)

FAMILY_COLORS = ['#1B4F72', '#922B21', '#1E8449', '#B7950B', '#6C3483']

# Bipolar labels: what negative (left) and positive (right) z-scores mean
BIPOLAR = {
    "Defence":         ("Low Block",      "High Press"),
    "Def. Transition": ("Regroup",        "Counter-Press"),
    "Att. Transition": ("Build-Up",       "Counter-Attack"),
    "Attack":          ("Short Passing",  "Direct / Long"),
    "Penetration":     ("Crosses",        "Carries"),
    "Chance Creation": ("Sustained",      "Direct Chances"),
    "Outcome":         ("Worse",          "Better"),
}

qdf = pd.read_parquet(PROCESSED / "team_styles" / "team_qualities.parquet")

style_dims = ["Defence", "Def. Transition", "Att. Transition", "Attack", "Penetration", "Chance Creation"]
print(f"Loaded: {len(qdf):,} team-seasons")

def save_and_show(fig, filename):
    """Save figure as PNG and display inline (lightweight)."""
    path = str(FIG_DIR / filename)
    fig.write_image(path, scale=2)
    display(Image(filename=path))


def twelve_dot_strip(team_row, all_data, dims, team_name, subtitle, filename,
                     bg='#1a2332', dot_bg='rgba(74,124,89,0.25)', dot_hl='#6ee06e'):
    """Twelve Football style dot-strip with bipolar labels per dimension."""
    fig = go.Figure()
    n_dims = len(dims)

    for i, dim in enumerate(dims):
        all_vals = all_data[dim].dropna().values
        team_val = team_row[dim]
        rng = np.random.default_rng(42 + i)
        jitter = rng.uniform(-0.18, 0.18, len(all_vals))

        # Background swarm
        fig.add_trace(go.Scatter(
            x=all_vals, y=[i + j for j in jitter],
            mode='markers',
            marker=dict(color=dot_bg, size=4),
            showlegend=False, hoverinfo='skip',
        ))
        # Highlighted team
        fig.add_trace(go.Scatter(
            x=[team_val], y=[i],
            mode='markers',
            marker=dict(color=dot_hl, size=14,
                        line=dict(color='white', width=2)),
            showlegend=False,
            hovertemplate=f'{dim}: {team_val:+.2f}<extra>{team_name}</extra>',
        ))

    # Build y-axis tick labels with bipolar indicators
    tick_labels = []
    left_annots = []
    right_annots = []
    for i, dim in enumerate(dims):
        tick_labels.append(dim)
        left, right = BIPOLAR.get(dim, ("âˆ’", "+"))
        left_annots.append(
            dict(x=-3.3, y=i, text=f"<i>{left}</i>", showarrow=False,
                 font=dict(color='#999', size=9), xanchor='right', yanchor='middle',
                 xref='x', yref='y')
        )
        right_annots.append(
            dict(x=3.3, y=i, text=f"<i>{right}</i>", showarrow=False,
                 font=dict(color='#999', size=9), xanchor='left', yanchor='middle',
                 xref='x', yref='y')
        )

    fig.update_layout(
        height=55 * n_dims + 120, width=750,
        template='plotly_dark',
        paper_bgcolor=bg, plot_bgcolor=bg,
        title=dict(
            text=(f"<b>{team_name}</b>"
                  f"<br><span style='font-size:12px;color:#aaa'>{subtitle}</span>"),
            font=dict(size=16, color='white'),
        ),
        yaxis=dict(
            tickvals=list(range(n_dims)), ticktext=tick_labels,
            tickfont=dict(color='white', size=11), showgrid=False,
            autorange='reversed',
        ),
        xaxis=dict(
            showgrid=False, zeroline=True,
            zerolinecolor='rgba(255,255,255,0.2)', zerolinewidth=1,
            showticklabels=False, range=[-3.5, 3.5],
        ),
        annotations=left_annots + right_annots,
        margin=dict(t=80, b=30, l=140, r=120),
    )
    save_and_show(fig, filename)


## Reading a Tactical Profile

Each dot is one team-season from across the world. The highlighted dot is the focus team.

Left and right labels show what each pole of the dimension means.

In [None]:
# -- Three contrasting EPL teams --
examples = [
    (1625, 2024, 'Man City',     'English Premier League 2024/2025'),
    (1646, 2023, 'Burnley',      'English Premier League 2023/2024'),
    (1614, 2024, 'Aston Villa',  'English Premier League 2024/2025'),
]

all_dims_with_outcome = style_dims + ['Outcome']

for tid, season, tname, subtitle in examples:
    row = qdf[(qdf['team_id'] == tid) & (qdf['season'] == season)]
    if len(row) == 0:
        print(f'WARNING: no data for {tname}')
        continue
    fname = f"profile_{tname.lower().replace(' ', '_')}.png"
    twelve_dot_strip(row.iloc[0], qdf, all_dims_with_outcome, tname, subtitle, fname)


## Finding Tactical Families

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

qdf_clean = qdf.dropna(subset=style_dims).copy()
X_raw = qdf_clean[style_dims].values
scaler = StandardScaler()
X = scaler.fit_transform(X_raw)

print(f"Clustering {len(qdf_clean):,} team-seasons...\n")
for k in range(3, 9):
    km = KMeans(n_clusters=k, n_init=20, random_state=42)
    labels = km.fit_predict(X)
    sizes = np.bincount(labels)
    print(f"  k={k}: sizes = {sorted(sizes, reverse=True)}")

BEST_K = 5
km_final = KMeans(n_clusters=BEST_K, n_init=20, random_state=42)
qdf_clean["family_id"] = km_final.fit_predict(X)

def name_family(profile, dims):
    s = {d: profile[d] for d in dims}
    if s['Defence'] > 0.4 and s['Def. Transition'] > 0.3:
        if s['Attack'] < -0.3:
            return "Possession & Press"
        return "Total Football"
    if s['Attack'] > 0.3 and s['Chance Creation'] > 0.3:
        if s['Defence'] < -0.3:
            return "Direct & Reactive"
        return "All-Action Intensity"
    if s['Penetration'] > 0.4:
        return "Progressive Carriers"
    if s['Penetration'] < -0.4:
        return "Wing & Cross"
    if s['Att. Transition'] > 0.3 and s['Defence'] < -0.3:
        return "Counter-Attack"
    return "Pragmatic Mid-Block"

FAMILY_NAMES = {}
family_profiles = {}
for fid in range(BEST_K):
    mask = qdf_clean["family_id"] == fid
    family_profiles[fid] = qdf_clean.loc[mask, style_dims].mean()
    FAMILY_NAMES[fid] = name_family(family_profiles[fid], style_dims)

qdf_clean["family_name"] = qdf_clean["family_id"].map(FAMILY_NAMES)

print(f"\n{'Family':<25s} {'N':>6s} {'%':>6s}  Profile")
print("-" * 90)
for fid in range(BEST_K):
    mask = qdf_clean["family_id"] == fid
    n = mask.sum()
    pct = n / len(qdf_clean) * 100
    prof = " | ".join(f"{d}: {family_profiles[fid][d]:+.2f}" for d in style_dims)
    outcome = qdf_clean.loc[mask, "Outcome"].mean()
    print(f"{FAMILY_NAMES[fid]:<25s} {n:>6,d} {pct:>5.1f}%  {prof}  | Outcome: {outcome:+.2f}")


## The Tactical Families

Bright dots = teams in this family. Large dot = family centroid (average profile).

In [None]:
for fid in range(BEST_K):
    fname_fam = FAMILY_NAMES.get(fid, f'Family {fid}')
    n_teams = (qdf_clean['family_id'] == fid).sum()
    pct = n_teams / len(qdf_clean) * 100
    outcome = qdf_clean.loc[qdf_clean['family_id'] == fid, 'Outcome'].mean()

    fig = go.Figure()
    n_dims = len(style_dims)

    for i, dim in enumerate(style_dims):
        all_vals = qdf_clean[dim].dropna().values
        rng = np.random.default_rng(42 + i)

        jit = rng.uniform(-0.18, 0.18, len(all_vals))
        fig.add_trace(go.Scatter(
            x=all_vals, y=[i + j for j in jit],
            mode='markers',
            marker=dict(color='rgba(74,124,89,0.12)', size=3),
            showlegend=False, hoverinfo='skip',
        ))

        fam_vals = qdf_clean.loc[qdf_clean['family_id'] == fid, dim].dropna().values
        jit_f = rng.uniform(-0.18, 0.18, len(fam_vals))
        fig.add_trace(go.Scatter(
            x=fam_vals, y=[i + j for j in jit_f],
            mode='markers',
            marker=dict(color=FAMILY_COLORS[fid], size=6, opacity=0.6),
            showlegend=False, hoverinfo='skip',
        ))

        centroid_val = family_profiles[fid][dim]
        fig.add_trace(go.Scatter(
            x=[centroid_val], y=[i],
            mode='markers',
            marker=dict(color=FAMILY_COLORS[fid], size=16,
                        line=dict(color='white', width=2.5)),
            showlegend=False,
            hovertemplate=f'{dim}: {centroid_val:+.2f}<extra>{fname_fam}</extra>',
        ))

    # Bipolar annotations
    left_annots = []
    right_annots = []
    for i, dim in enumerate(style_dims):
        left, right = BIPOLAR.get(dim, ("-", "+"))
        left_annots.append(
            dict(x=-3.3, y=i, text=f"<i>{left}</i>", showarrow=False,
                 font=dict(color='#999', size=9), xanchor='right', yanchor='middle',
                 xref='x', yref='y'))
        right_annots.append(
            dict(x=3.3, y=i, text=f"<i>{right}</i>", showarrow=False,
                 font=dict(color='#999', size=9), xanchor='left', yanchor='middle',
                 xref='x', yref='y'))

    fig.update_layout(
        height=55 * n_dims + 120, width=750,
        template='plotly_dark',
        paper_bgcolor='#1a2332', plot_bgcolor='#1a2332',
        title=dict(
            text=(f"<b>{fname_fam}</b>"
                  f"<br><span style='font-size:12px;color:#aaa'>"
                  f"{n_teams:,} team-seasons ({pct:.0f}%) \u00b7 Avg outcome {outcome:+.2f}"
                  f"</span>"),
            font=dict(size=16, color='white'),
        ),
        yaxis=dict(
            tickvals=list(range(n_dims)), ticktext=list(style_dims),
            tickfont=dict(color='white', size=11), showgrid=False,
            autorange='reversed',
        ),
        xaxis=dict(
            showgrid=False, zeroline=True,
            zerolinecolor='rgba(255,255,255,0.2)', zerolinewidth=1,
            showticklabels=False, range=[-3.5, 3.5],
        ),
        annotations=left_annots + right_annots,
        margin=dict(t=80, b=30, l=140, r=120),
    )
    save_and_show(fig, f"family_{fid}_{fname_fam.lower().replace(' ', '_').replace('&', 'and')}.png")


## Export

In [None]:
out_cols = ["team_id", "competition_id", "season"] + style_dims + ["Outcome", "family_id", "family_name"]
out_df_export = qdf_clean[out_cols].copy()
out_path = PROCESSED / "team_styles" / "team_style_clusters.parquet"
out_path.parent.mkdir(parents=True, exist_ok=True)
out_df_export.to_parquet(out_path, index=False)
print(f"\u2705 Saved: {out_path}  ({len(out_df_export):,} rows)")

# Also update full qualities
qdf.to_parquet(PROCESSED / "team_styles" / "team_qualities.parquet", index=False)
print(f"\u2705 Saved: team_qualities.parquet  ({len(qdf):,} rows)")
