---
### Load Games
---

In [1]:
from pinsdb.models import Game

In [4]:
all_games = Game.load_games()

[32m2025-02-17 08:52:26.162[0m | [32m[1mSUCCESS [0m | [36mpinsdb.models[0m:[36mload_games[0m:[36m144[0m - [32m[1mLoaded 471 games from the database[0m


In [21]:
(
    bowler_frame
    .with_columns(
        pl.col("throws").bowling.construct_frames()
    )
    .explode("frames")
    .drop("throws")
    .with_columns(
        pl.col("frames").bowling.is_strike(),
        pl.col("frames").bowling.is_spare(),
        pl.col("frames").bowling.is_wombat(),
    )
    .filter(pl.col("is_wombat"))
    .sort(pl.col("frames").list.len(), descending=True)
)

game_id,date,bowler_id,frames,is_strike,is_spare,is_wombat
str,date,str,list[i8],bool,bool,bool
"""2""",2024-08-29,"""Ryley""","[9, 1, 10]",false,false,true
"""2""",2024-08-27,"""Lucas""","[6, 4, 10]",false,false,true
"""2""",2025-02-16,"""Lucas""","[0, 10, 10]",false,false,true
"""3""",2025-02-16,"""Cam""","[7, 3, 10]",false,false,true
"""1""",2025-02-16,"""Spencer""","[3, 7, 10]",false,false,true
…,…,…,…,…,…,…
"""4""",2024-09-10,"""Cam""","[0, 10]",false,true,true
"""4""",2024-09-10,"""Spencer""","[0, 10]",false,true,true
"""1""",2025-01-13,"""Spencer""","[0, 10]",false,true,true
"""2""",2025-01-15,"""Ryley""","[8, 10]",false,false,true


In [None]:
import attrs
import polars as pl

from pinsdb.namespace.expressions import Bowling


bowler_frame = (
    pl.DataFrame([attrs.asdict(game) for game in all_games])
    .with_columns(pl.col("bowler").struct.field("bowler_id"))
    .drop("bowler")
)

summary_detection_table = (
    bowler_frame
    .with_columns(
        pl.col("throws").bowling.construct_frames()
    )
    .explode("frames")
    .drop("throws")
    .with_columns(
        pl.col("frames").bowling.is_strike(),
        pl.col("frames").bowling.is_spare(),
        pl.col("frames").bowling.is_wombat(),
    )
    .group_by("bowler_id")
    .agg(
        pl.col("frames").count().alias("Frames"),
        pl.col("is_strike").sum().alias("Strikes"),
        pl.col("is_spare").sum().alias("Spares"),
        pl.col("is_wombat").sum().alias("Wombats"),
    )
    .sort("Strikes", "Spares", "Wombats", descending=True)
)

summary_statistics_table = (
    bowler_frame
    .with_columns(
        pl.col("throws").bowling.compute_score(),
        pl.col("throws").list.sum().alias("pins"),
    )
    .group_by("bowler_id")
    .agg(
        pl.col("pins").count().alias("Games"),
        pl.col("pins").sum().alias("Pins"),
        pl.col("score").sum().alias("Points"),
    )
    .with_columns(
        (pl.col("Points") / pl.col("Pins")).round(3).alias("Points / Pin")
    )
    .sort("Points / Pin", descending=True)
)

summary_table = summary_statistics_table.join(summary_detection_table, on="bowler_id").sort("Points", descending=True)
summary_table

bowler_id,Games,Pins,Points,Points / Pin,Frames,Strikes,Spares,Wombats
str,u32,i64,i64,f64,u32,u32,u32,u32
"""Spencer""",98,9042,12648,1.399,977,214,212,16
"""Lucas""",98,8732,12215,1.399,980,216,191,25
"""Cam""",105,9041,11935,1.32,1050,184,176,13
"""Ryley""",78,7172,9985,1.392,780,145,181,8
"""Jake""",37,2885,3580,1.241,370,55,46,10
…,…,…,…,…,…,…,…,…
"""Sara""",7,533,642,1.205,70,9,6,2
"""Tristan""",5,429,522,1.217,50,6,9,0
"""Ryan""",2,204,305,1.495,20,6,4,2
"""Karly""",2,170,209,1.229,20,2,3,0


In [6]:
from great_tables import GT, style, loc

gt_table = (
    GT(summary_table)
    .tab_header(
        title="Bowling Statistics",
        subtitle="Overall scoring statistics from Aug 07, 2024 to Feb 10, 2025"
    )
    .tab_stub(rowname_col="bowler_id")
    .tab_spanner(label="Scoring", columns=["Pins", "Points", "Points / Pin"])
    .tab_spanner(label="Frequency", columns=["Strikes", "Spares", "Wombats"])
    .data_color(
        columns=["Pins"],
        palette="GnBu",
    )
    .data_color(
        columns=["Points"],
        palette="GnBu",
    )
    .data_color(
        columns=["Points / Pin"],
        palette="GnBu",
    )
    .data_color(
        columns=["Strikes", "Spares"],
        palette="Oranges",
    )
    .data_color(
        columns=["Wombats"],
        palette="Oranges",
        na_color="white"
    )
)

gt_table = gt_table.fmt_number(columns=["Pins", "Points", "Strikes", "Spares", "Wombats"], decimals=0)
gt_table = gt_table.fmt_number(columns=["Points / Pin"], decimals=3)
gt_table

Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics,Bowling Statistics
"Overall scoring statistics from Aug 07, 2024 to Feb 10, 2025","Overall scoring statistics from Aug 07, 2024 to Feb 10, 2025","Overall scoring statistics from Aug 07, 2024 to Feb 10, 2025","Overall scoring statistics from Aug 07, 2024 to Feb 10, 2025","Overall scoring statistics from Aug 07, 2024 to Feb 10, 2025","Overall scoring statistics from Aug 07, 2024 to Feb 10, 2025","Overall scoring statistics from Aug 07, 2024 to Feb 10, 2025","Overall scoring statistics from Aug 07, 2024 to Feb 10, 2025","Overall scoring statistics from Aug 07, 2024 to Feb 10, 2025"
Unnamed: 0_level_2,Games,Scoring,Scoring,Scoring,Frames,Frequency,Frequency,Frequency
Unnamed: 0_level_3,Games,Pins,Points,Points / Pin,Frames,Strikes,Spares,Wombats
Spencer,98,9042,12648,1.399,977,214,212,16
Lucas,98,8732,12215,1.399,980,216,191,25
Cam,105,9041,11935,1.32,1050,184,176,13
Ryley,78,7172,9985,1.392,780,145,181,8
Jake,37,2885,3580,1.241,370,55,46,10
Alek,31,2528,3317,1.312,310,39,64,4
Drew,6,562,945,1.681,60,17,21,0
Sara,7,533,642,1.205,70,9,6,2
Tristan,5,429,522,1.217,50,6,9,0
Ryan,2,204,305,1.495,20,6,4,2


In [None]:
# for game in games:
#     print(f"{game.bowler.bowler_id=}  {game.game_id=} | {game.score_game()}")
#     print(game.construct_frames())

---
### Reconcile Bowler IDs
---

---
### Plot Visuals
---

#### Summarize DataFrame

In [None]:
from pinsdb.namespace.compute import score_game, score_pins

import seaborn as sns
import polars as pl
import matplotlib.pyplot as plt

sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})


sample_data = pl.DataFrame(
    [
        {
            "game_id": game.game_id,
            "bowler_id": game.bowler.bowler_id,
            "score": score_game(game.throws),
            "pins": score_pins(game.throws),
            "date": game.date,
        }
        for game in all_games
    ]
)

#### Score Dominance

In [None]:
sns.displot(sample_data, x="score", hue="bowler_id", kind="kde", multiple="fill")

In [None]:
sns.displot(sample_data, x="score", hue="bowler_id", stat="count", kind="ecdf")

#### Score Variance
---

In [None]:
from pinsdb.bowlers import registered_bowlers

palette = sns.color_palette("magma", n_colors=len(registered_bowlers))
sns.set_theme(style="darkgrid", palette=palette)

sns.boxplot(sample_data, x="score", y="bowler_id", hue="bowler_id", palette=palette)
sns.stripplot(sample_data, x="score", y="bowler_id", color=".3", jitter=0)

#### Pins Ordering

In [None]:
import pandas as pd


for bowler in [registered_bowlers[:6][-1]]:
    print(f"HEATMAP FOR {bowler.bowler_id.upper()}")
    throws_data = pd.DataFrame(
        [
            {
                "game_id": game.game_id,
                "bowler_id": game.bowler.bowler_id,
                "frames": game.construct_frames()[:9],
            }
            for game in all_games
            if game.bowler.bowler_id == bowler.bowler_id
        ]
    )
    throws_data = throws_data.explode("frames")
    throws_data[["first_throw", "second_throw"]] = [
        throw + [0] if throw == [10] else throw[:2]
        for throw in throws_data["frames"].to_list()
    ]
    # throws_frequency = pd.DataFrame(throws_data['frames'].value_counts()).reset_index()

    throws_crosstab = pd.crosstab(
        throws_data["second_throw"], throws_data["first_throw"]
    )
    sns.heatmap(throws_crosstab)
    break

#### Ridge Graph

In [None]:
pal = sns.cubehelix_palette(10, rot=-0.25, light=0.7)
ridge_graph = sns.FacetGrid(
    sample_data, row="bowler_id", hue="bowler_id", aspect=15, height=0.5, palette=pal
)
ridge_graph.map(
    sns.kdeplot,
    "score",
    bw_adjust=0.5,
    clip_on=False,
    fill=True,
    alpha=1,
    linewidth=1.5,
)
ridge_graph.map(sns.kdeplot, "score", clip_on=False, color="w", lw=2, bw_adjust=0.5)
ridge_graph.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)


# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
    ax = plt.gca()
    ax.text(
        0,
        0.2,
        label,
        fontweight="bold",
        color=color,
        ha="left",
        va="center",
        transform=ax.transAxes,
    )


ridge_graph.map(label, "score")

# Set the subplots to overlap
ridge_graph.figure.subplots_adjust(hspace=-0.05)

# Remove axes details that don't play well with overlap
ridge_graph.set_titles("")
ridge_graph.set(yticks=[], ylabel="")
ridge_graph.despine(bottom=True, left=True)

In [None]:
import itertools
import statistics

games_sorted = sorted(all_games, key=lambda g: (g.bowler.bowler_id, g.date, g.game_id))
games_stats = dict()

for group, games in itertools.groupby(games_sorted, key=lambda g: g.bowler.bowler_id):
    games = list(games)
    total_pins = [game.score_pins() for game in games]
    total_points = [game.score_game() for game in games]
    stats = {
        "games": len(games),
        "total_pins": sum([len(game.construct_frames()) * 10 for game in games]),
        "pins": sum(total_pins),
        "points": sum(total_points),
        "mean_pins": round(statistics.mean(total_pins), 1),
        "mean_points": round(statistics.mean(total_points), 1),
        "median_pins": statistics.median(total_pins),
        "median_points": statistics.median(total_points),
    }
    stats["pct_pins"] = round((stats["pins"] / stats["total_pins"]) * 100, 1)
    games_stats[group] = (
        pl.DataFrame(stats)
        .unpivot()
        .with_columns(bowler=pl.lit(group))
        .select("bowler", "variable", "value")
    )

games_data = pl.concat(games_stats.values())

In [None]:
g = sns.barplot(
    games_data.filter(pl.col("variable").is_in(["total_pins", "pins", "points"])),
    x="variable",
    y="value",
    hue="bowler",
)
sns.move_legend(
    g, "lower center", bbox_to_anchor=(0.5, 1), ncol=len(registered_bowlers), title=None
)

In [None]:
g = sns.barplot(
    games_data.filter(~pl.col("variable").is_in(["total_pins", "pins", "points"])),
    x="variable",
    y="value",
    hue="bowler",
)
sns.move_legend(
    g, "lower center", bbox_to_anchor=(0.5, 1), ncol=len(registered_bowlers), title=None
)

#### Score Over Time

In [None]:
strip_data = (
    sample_data.group_by("bowler_id")
    .agg(
        max_score=pl.col("score").max(),
        min_score=pl.col("score").min(),
        # max_pins=pl.col("pins").max(),
        # min_pins=pl.col("pins").min(),
    )
    .unpivot(index="bowler_id")
    .sort("value", descending=True)
)

sns.pointplot(strip_data, x="value", y="bowler_id", hue="bowler_id", estimator="max")
sns.stripplot(
    strip_data,
    x="value",
    y="bowler_id",
    hue="bowler_id",
    jitter=0,
)

In [None]:
g = sns.relplot(sample_data, x="date", y="score", hue="bowler_id", kind="line")
sns.move_legend(
    g, "lower center", bbox_to_anchor=(0.5, 1), ncol=len(registered_bowlers), title=None
)