---
### Load Games
---

In [1]:
from pinsdb.models import Game
import attrs
import polars as pl

from pinsdb.namespace.expressions import Bowling

In [2]:
all_games = sorted(
    Game.load_games(), key=lambda g: (g.date, g.game_id, g.bowler.bowler_id)
)
bowler_frame = (
    pl.DataFrame([attrs.asdict(game) for game in all_games])
    .with_columns(pl.col("bowler").struct.field("bowler_id"))
    .drop("bowler")
)

[32m2025-02-22 10:23:59.546[0m | [32m[1mSUCCESS [0m | [36mpinsdb.models[0m:[36mload_games[0m:[36m125[0m - [32m[1mLoaded 491 games from the database[0m


In [3]:
import attrs
import polars as pl
from polars import selectors as cs

from pinsdb.namespace.expressions import Bowling

scoring_columns = ["Pins", "Points", "Lowest", "Highest"]
rates_columns = ["Per Game", "Per Frame", "Per Pin"]
frequency_columns = ["Strikes", "Spares", "Wombats", "Gutters"]

n_recent_games: int = 50
col_recent_games: str = f"Recent Games ({n_recent_games:,})"


bowler_frame = (
    pl.DataFrame([attrs.asdict(game) for game in all_games])
    .with_columns(pl.col("bowler").struct.field("bowler_id"))
    .sort("date", "game_id")
    .drop("bowler")
)

frames_data = (
    bowler_frame.with_columns(pl.col("throws").bowling.construct_frames())
    .explode("frames")
    .drop("throws")
    .with_columns(
        pl.col("frames").bowling.is_gutter(),
        pl.col("frames").bowling.is_strike(),
        pl.col("frames").bowling.is_spare(),
        pl.col("frames").bowling.is_wombat(),
    )
)

summary_detection_table = (
    frames_data.group_by("bowler_id")
    .agg(
        pl.col("frames").count().alias("Frames"),
        pl.col("is_strike").sum().alias("Strikes"),
        pl.col("is_spare").sum().alias("Spares"),
        pl.col("is_wombat").sum().alias("Wombats"),
        pl.col("is_gutter").sum().alias("Gutters"),
    )
    .sort("Strikes", "Spares", "Wombats", descending=True)
)

summary_statistics_table = (
    bowler_frame.with_columns(
        pl.col("throws").bowling.compute_score(),
        pl.col("throws").list.sum().alias("pins"),
    )
    .group_by("bowler_id")
    .agg(
        pl.col("pins").count().alias("Games"),
        pl.col("pins").sum().alias("Pins"),
        pl.col("score").sum().alias("Points"),
        pl.col("score").min().alias("Lowest"),
        pl.col("score").max().alias("Highest"),
        pl.col("score").tail(n_recent_games).alias(col_recent_games),
    )
)

summary_table = (
    summary_statistics_table.join(summary_detection_table, on="bowler_id")
    .with_columns(
        (pl.col("Points") / pl.col("Games")).round(2).alias("Per Game"),
        (pl.col("Points") / pl.col("Frames")).round(2).alias("Per Frame"),
        (pl.col("Points") / pl.col("Pins")).round(3).alias("Per Pin"),
    )
    .sort("Points", descending=True)
    .select(
        "bowler_id",
        "Games",
        "Frames",
        *scoring_columns,
        *rates_columns,
        *frequency_columns,
    )  # , col_recent_games)
)
summary_table

bowler_id,Games,Frames,Pins,Points,Lowest,Highest,Per Game,Per Frame,Per Pin,Strikes,Spares,Wombats,Gutters
str,u32,u32,i64,i64,i64,i64,f64,f64,f64,u32,u32,u32,u32
"""Spencer""",103,1060,9507,13192,61,189,128.08,12.45,1.388,238,249,11,4
"""Lucas""",103,1064,9176,12783,28,220,124.11,12.01,1.393,247,221,21,22
"""Cam""",110,1130,9536,12474,45,177,113.4,11.04,1.308,204,209,12,23
"""Ryley""",83,840,7621,10373,51,189,124.98,12.35,1.361,160,228,5,8
"""Jake""",37,375,2885,3580,67,174,96.76,9.55,1.241,55,46,10,20
…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Sara""",7,71,533,642,68,122,91.71,9.04,1.205,9,6,2,4
"""Tristan""",5,51,429,522,91,117,104.4,10.24,1.217,6,9,0,0
"""Ryan""",2,22,204,305,112,193,152.5,13.86,1.495,8,6,0,0
"""Karly""",2,20,170,209,104,105,104.5,10.45,1.229,2,3,0,0


In [4]:
from great_tables import GT, style, loc


scoring_palette = "GnBu"
rates_pallete = "Purples"
frequency_palette = "Reds"

gt_table = (
    GT(summary_table)
    .tab_header(
        title="Bowling Statistics",
        subtitle=f"Overall scoring statistics from {all_games[0].date} to {all_games[-1].date}",
    )
    .tab_stub(rowname_col="bowler_id")
    .tab_spanner(label="Scoring", columns=scoring_columns)
    .tab_spanner(label="Rates", columns=rates_columns)
    .tab_spanner(label="Frequency", columns=frequency_columns)
    # .fmt_nanoplot(col_recent_games, reference_line=200)
    .data_color(
        columns=["Pins"],
        palette=scoring_palette,
    )
    .data_color(
        columns=["Points"],
        palette=scoring_palette,
    )
    .data_color(
        columns=["Lowest", "Highest"],
        palette=scoring_palette,
    )
    .data_color(
        columns=["Per Game"],
        palette=rates_pallete,
    )
    .data_color(
        columns=["Per Frame"],
        palette=rates_pallete,
    )
    .data_color(
        columns=["Per Pin"],
        palette=rates_pallete,
    )
    .data_color(
        columns=["Strikes", "Spares"],
        palette=frequency_palette,
    )
    .data_color(columns=["Wombats"], palette=frequency_palette)
    .data_color(
        columns=["Gutters"],
        palette=frequency_palette,
    )
)

gt_table = gt_table.fmt_number(
    columns=["Pins", "Points", "Frames", "Strikes", "Spares", "Wombats"], decimals=0
)
gt_table = gt_table.fmt_number(columns=["Per Pin"], decimals=3)
gt_table

Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics
Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20,Overall scoring statistics from 2024-07-23 to 2025-02-20
Unnamed: 0_level_2,Games,Frames,Scoring,Scoring,Scoring,Scoring,Rates,Rates,Rates,Frequency,Frequency,Frequency,Frequency
Unnamed: 0_level_3,Games,Frames,Pins,Points,Lowest,Highest,Per Game,Per Frame,Per Pin,Strikes,Spares,Wombats,Gutters
Spencer,103,1060,9507,13192,61,189,128.08,12.45,1.388,238,249,11,4
Lucas,103,1064,9176,12783,28,220,124.11,12.01,1.393,247,221,21,22
Cam,110,1130,9536,12474,45,177,113.4,11.04,1.308,204,209,12,23
Ryley,83,840,7621,10373,51,189,124.98,12.35,1.361,160,228,5,8
Jake,37,375,2885,3580,67,174,96.76,9.55,1.241,55,46,10,20
Alek,31,313,2528,3286,43,175,106.0,10.5,1.3,39,71,4,12
Drew,6,60,562,945,131,197,157.5,15.75,1.681,17,21,0,0
Sara,7,71,533,642,68,122,91.71,9.04,1.205,9,6,2,4
Tristan,5,51,429,522,91,117,104.4,10.24,1.217,6,9,0,0
Ryan,2,22,204,305,112,193,152.5,13.86,1.495,8,6,0,0


In [20]:
bowler_frame.filter(pl.col("bowler_id") == "Lucas").with_columns(pl.col("throws").bowling.compute_score())

game_id,throws,date,bowler_id,score
str,list[i64],date,str,i64
"""1""","[0, 0, … 0]",2024-07-23,"""Lucas""",105
"""3""","[8, 0, … 7]",2024-07-23,"""Lucas""",88
"""1""","[0, 8, … 3]",2024-07-30,"""Lucas""",84
"""2""","[10, 7, … 0]",2024-07-30,"""Lucas""",74
"""3""","[7, 0, … 0]",2024-07-30,"""Lucas""",112
…,…,…,…,…
"""1""","[9, 0, … 0]",2025-02-18,"""Lucas""",159
"""2""","[10, 10, … 0]",2025-02-18,"""Lucas""",190
"""3""","[10, 0, … 0]",2025-02-18,"""Lucas""",144
"""1""","[9, 0, … 0]",2025-02-20,"""Lucas""",103


---
### Plot Visuals
---

In [None]:
summary_detection_table

#### Summarize DataFrame

In [None]:
from pinsdb.namespace.compute import score_game, score_pins

import seaborn as sns
import polars as pl
import matplotlib.pyplot as plt

sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})


sample_data = pl.DataFrame(
    [
        {
            "game_id": game.game_id,
            "bowler_id": game.bowler.bowler_id,
            "score": score_game(game.throws),
            "pins": score_pins(game.throws),
            "date": game.date,
        }
        for game in all_games
    ]
)

#### Score Dominance

In [None]:
sns.displot(sample_data, x="score", hue="bowler_id", kind="kde", multiple="fill")

In [None]:
sns.displot(sample_data, x="score", hue="bowler_id", stat="count", kind="ecdf")

#### Score Variance
---

In [None]:
from pinsdb.bowlers import registered_bowlers

palette = sns.color_palette("magma", n_colors=len(registered_bowlers))
sns.set_theme(style="darkgrid", palette=palette)

sns.boxplot(sample_data, x="score", y="bowler_id", hue="bowler_id", palette=palette)
sns.stripplot(sample_data, x="score", y="bowler_id", color=".3", jitter=0)

#### Pins Ordering

In [None]:
import pandas as pd


for bowler in [registered_bowlers[:6][-1]]:
    print(f"HEATMAP FOR {bowler.bowler_id.upper()}")
    throws_data = pd.DataFrame(
        [
            {
                "game_id": game.game_id,
                "bowler_id": game.bowler.bowler_id,
                "frames": game.construct_frames()[:9],
            }
            for game in all_games
            if game.bowler.bowler_id == bowler.bowler_id
        ]
    )
    throws_data = throws_data.explode("frames")
    throws_data[["first_throw", "second_throw"]] = [
        throw + [0] if throw == [10] else throw[:2]
        for throw in throws_data["frames"].to_list()
    ]
    # throws_frequency = pd.DataFrame(throws_data['frames'].value_counts()).reset_index()

    throws_crosstab = pd.crosstab(
        throws_data["second_throw"], throws_data["first_throw"]
    )
    sns.heatmap(throws_crosstab)
    break

#### Ridge Graph

In [None]:
pal = sns.cubehelix_palette(10, rot=-0.25, light=0.7)
ridge_graph = sns.FacetGrid(
    sample_data, row="bowler_id", hue="bowler_id", aspect=15, height=0.5, palette=pal
)
ridge_graph.map(
    sns.kdeplot,
    "score",
    bw_adjust=0.5,
    clip_on=False,
    fill=True,
    alpha=1,
    linewidth=1.5,
)
ridge_graph.map(sns.kdeplot, "score", clip_on=False, color="w", lw=2, bw_adjust=0.5)
ridge_graph.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)


# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
    ax = plt.gca()
    ax.text(
        0,
        0.2,
        label,
        fontweight="bold",
        color=color,
        ha="left",
        va="center",
        transform=ax.transAxes,
    )


ridge_graph.map(label, "score")

# Set the subplots to overlap
ridge_graph.figure.subplots_adjust(hspace=-0.05)

# Remove axes details that don't play well with overlap
ridge_graph.set_titles("")
ridge_graph.set(yticks=[], ylabel="")
ridge_graph.despine(bottom=True, left=True)

In [None]:
import itertools
import statistics

games_sorted = sorted(all_games, key=lambda g: (g.bowler.bowler_id, g.date, g.game_id))
games_stats = dict()

for group, games in itertools.groupby(games_sorted, key=lambda g: g.bowler.bowler_id):
    games = list(games)
    total_pins = [game.score_pins() for game in games]
    total_points = [game.score_game() for game in games]
    stats = {
        "games": len(games),
        "total_pins": sum([len(game.construct_frames()) * 10 for game in games]),
        "pins": sum(total_pins),
        "points": sum(total_points),
        "mean_pins": round(statistics.mean(total_pins), 1),
        "mean_points": round(statistics.mean(total_points), 1),
        "median_pins": statistics.median(total_pins),
        "median_points": statistics.median(total_points),
    }
    stats["pct_pins"] = round((stats["pins"] / stats["total_pins"]) * 100, 1)
    games_stats[group] = (
        pl.DataFrame(stats)
        .unpivot()
        .with_columns(bowler=pl.lit(group))
        .select("bowler", "variable", "value")
    )

games_data = pl.concat(games_stats.values())

In [None]:
g = sns.barplot(
    games_data.filter(pl.col("variable").is_in(["total_pins", "pins", "points"])),
    x="variable",
    y="value",
    hue="bowler",
)
sns.move_legend(
    g, "lower center", bbox_to_anchor=(0.5, 1), ncol=len(registered_bowlers), title=None
)

In [None]:
g = sns.barplot(
    games_data.filter(~pl.col("variable").is_in(["total_pins", "pins", "points"])),
    x="variable",
    y="value",
    hue="bowler",
)
sns.move_legend(
    g, "lower center", bbox_to_anchor=(0.5, 1), ncol=len(registered_bowlers), title=None
)

#### Score Over Time

In [None]:
strip_data = (
    sample_data.group_by("bowler_id")
    .agg(
        max_score=pl.col("score").max(),
        min_score=pl.col("score").min(),
        # max_pins=pl.col("pins").max(),
        # min_pins=pl.col("pins").min(),
    )
    .unpivot(index="bowler_id")
    .sort("value", descending=True)
)

sns.pointplot(strip_data, x="value", y="bowler_id", hue="bowler_id", estimator="max")
sns.stripplot(
    strip_data,
    x="value",
    y="bowler_id",
    hue="bowler_id",
    jitter=0,
)

In [None]:
g = sns.relplot(sample_data, x="date", y="score", hue="bowler_id", kind="line")
sns.move_legend(
    g, "lower center", bbox_to_anchor=(0.5, 1), ncol=len(registered_bowlers), title=None
)