## Collect data

In [1]:
import csv
import polars as pl
from great_tables import GT, html, style, loc
import polars.selectors as cs


input_filename = "comebuy.csv"
columns = [
    "類別",
    "品名",
    "總熱量(Kcal)",
    "標準糖量(g)",
    "咖啡因總含量(mg)",
    "過敏原_麩質製品",
    "過敏原_牛奶製品",
    "售價(大杯)",
]


def _collect_data(filename) -> None:
    data = [
        dict(
            zip(
                columns,
                ("原葉茶", "鮮萃大麥紅茶", 91.8, 21.5, 118.1, "Y", "N", 40),
            )
        ),
        dict(
            zip(
                columns,
                ("原葉茶", "海神", 183.6, 43.1, 148.8, "N", "N", 45),
            )
        ),
        dict(
            zip(
                columns,
                ("奶茶", "鮮萃大麥奶茶", 525.4, 50.1, 177.1, "Y", "Y", 60),
            )
        ),
        dict(
            zip(
                columns,
                ("奶茶", "海神奶茶", 525.4, 50.1, 223.2, "N", "Y", 65),
            )
        ),
        dict(
            zip(
                columns,
                ("鮮奶茶", "四季春拿鐵", 298.1, 52.3, 216.0, "N", "Y", 65),
            )
        ),
        dict(
            zip(
                columns,
                ("鮮奶茶", "玫瑰普洱拿鐵", 298.1, 52.3, 294.8, "N", "Y", 70),
            )
        ),
    ]

    # suggest assigning `encoding` and `newline` in the Windows systems
    with open(filename, "w", encoding="utf-8", newline="\n") as f:
        writer = csv.DictWriter(f, fieldnames=columns)
        writer.writeheader()
        writer.writerows(data)


_collect_data(input_filename)

## Generate df

In [2]:
allergy_cols = cs.contains("過敏原")
caffeine_cols = cs.contains("咖啡因")


def create_bar(
    df_: pl.DataFrame, max_width: int, height: int
) -> pl.DataFrame:
    tmp_columns = {
        "div_before": (
            pl.lit(f"""\
            <div style="width: {max_width}px; background-color: #434343;">\
                <div style="height:{height}px;width:
            """)
        ),
        "px_width": (
            pl.col("咖啡因總含量(mg)")
            .truediv(300)
            .mul(max_width)
            .round(2)
            .cast(pl.Utf8)
            .add("px")
        ),
        "div_after": (
            pl.lit("""\
            ;background-color:lightblue;"></div>\
            </div>       
            """)
        ),
    }
    return df_.with_columns(
        **tmp_columns,
        caff_perc=pl.col("咖啡因總含量(mg)")
        .truediv(300)
        .mul(100)
        .cast(pl.Int64),
    ).select(
        *df_.columns,
        "caff_perc",
        pl.col("caff_perc")
        .cast(pl.Utf8)
        .add("%")
        .add(pl.concat_str(tmp_columns))
        .alias("咖啡因佔每日建議攝取量(%)"),
    )


def tweak_df(filename: str) -> pl.DataFrame:
    return (
        pl.read_csv(filename)
        .with_columns(allergy_cols.str.replace_many(["N", "Y"], ["", "🈶"]))
        .pipe(create_bar, max_width=100, height=20)
    )


df = tweak_df("comebuy.csv")
df

類別,品名,總熱量(Kcal),標準糖量(g),咖啡因總含量(mg),過敏原_麩質製品,過敏原_牛奶製品,售價(大杯),caff_perc,咖啡因佔每日建議攝取量(%)
str,str,f64,f64,f64,str,str,i64,i64,str
"""原葉茶""","""鮮萃大麥紅茶""",91.8,21.5,118.1,"""🈶""","""""",40,39,"""39% <div style=""wid…"
"""原葉茶""","""海神""",183.6,43.1,148.8,"""""","""""",45,49,"""49% <div style=""wid…"
"""奶茶""","""鮮萃大麥奶茶""",525.4,50.1,177.1,"""🈶""","""🈶""",60,59,"""59% <div style=""wid…"
"""奶茶""","""海神奶茶""",525.4,50.1,223.2,"""""","""🈶""",65,74,"""74% <div style=""wid…"
"""鮮奶茶""","""四季春拿鐵""",298.1,52.3,216.0,"""""","""🈶""",65,72,"""72% <div style=""wid…"
"""鮮奶茶""","""玫瑰普洱拿鐵""",298.1,52.3,294.8,"""""","""🈶""",70,98,"""98% <div style=""wid…"


## Make table

In [3]:
def make_gt(df: pl.DataFrame) -> GT:
    return (
        GT(df)
        .tab_header("Comebuy", "2024 July 18")
        .tab_stub(rowname_col="品名", groupname_col="類別")
        .tab_stubhead("茶種")
        .tab_spanner(label="過敏原", columns=allergy_cols)
        .tab_spanner(label="咖啡因資訊", columns=caffeine_cols)
        .tab_options(table_background_color="#F1F1F1")
        .cols_label(
            **{
                "總熱量(Kcal)": html("總熱量<br>(Kcal)"),
                "標準糖量(g)": html("標準糖量<br>(g)"),
                "咖啡因總含量(mg)": html("總含量<br>(mg)"),
                "咖啡因佔每日建議攝取量(%)": html(
                    "佔每日建議攝取量<br>300mg百分比"
                ),
                "過敏原_麩質製品": html("麩質<br></br>"),
                "過敏原_牛奶製品": html("牛奶<br></br>"),
                "售價(大杯)": html("大杯<br>售價"),
            }
        )
        .cols_move_to_start(
            ["售價(大杯)", "總熱量(Kcal)", "標準糖量(g)", allergy_cols]
        )
        .cols_align(align="center", columns=[allergy_cols, caffeine_cols])
        .cols_hide("caff_perc")
        .tab_style(
            style=style.borders(
                sides=["top", "left", "bottom"],
                color="green",
                style="dashed",
                weight="3px",
            ),
            locations=loc.body(
                columns="過敏原_麩質製品",
                rows=pl.all_horizontal(allergy_cols.eq("🈶")),
            ),
        )
        .tab_style(
            style=style.borders(
                sides=["top", "right", "bottom"],
                color="green",
                style="dashed",
                weight="3px",
            ),
            locations=loc.body(
                columns="過敏原_牛奶製品",
                rows=pl.all_horizontal(allergy_cols.eq("🈶")),
            ),
        )
        .tab_style(
            style=[style.text(color="red"), style.text(weight="Bold")],
            locations=loc.body(
                columns=caffeine_cols, rows=pl.col("caff_perc").gt(80)
            ),
        )
        .tab_style(
            style=[style.fill(color="papayawhip"), style.text(weight="Bold")],
            locations=loc.body(
                columns="總熱量(Kcal)", rows=pl.col("總熱量(Kcal)").gt(500)
            ),
        )
        .tab_source_note(
            html("""\
             資料來源：
            <I>
            <br>
            1: https://www.comebuy2002.com.tw/safety_promise-detail/calorie01/
            <br>
            2: https://www.comebuy2002.com.tw/products-detail/comebuy_menu/
            </i>\
            """)
        )
        .opt_stylize(style=1, color="pink")
    )


gtbl = make_gt(df)
gtbl

Comebuy,Comebuy,Comebuy,Comebuy,Comebuy,Comebuy,Comebuy,Comebuy
2024 July 18,2024 July 18,2024 July 18,2024 July 18,2024 July 18,2024 July 18,2024 July 18,2024 July 18
茶種,大杯 售價,總熱量 (Kcal),標準糖量 (g),過敏原,過敏原,咖啡因資訊,咖啡因資訊
茶種,大杯 售價,總熱量 (Kcal),標準糖量 (g),麩質,牛奶,總含量 (mg),佔每日建議攝取量 300mg百分比
原葉茶,原葉茶,原葉茶,原葉茶,原葉茶,原葉茶,原葉茶,原葉茶
鮮萃大麥紅茶,40,91.8,21.5,🈶,,118.1,39%
海神,45,183.6,43.1,,,148.8,49%
奶茶,奶茶,奶茶,奶茶,奶茶,奶茶,奶茶,奶茶
鮮萃大麥奶茶,60,525.4,50.1,🈶,🈶,177.1,59%
海神奶茶,65,525.4,50.1,,🈶,223.2,74%
鮮奶茶,鮮奶茶,鮮奶茶,鮮奶茶,鮮奶茶,鮮奶茶,鮮奶茶,鮮奶茶
四季春拿鐵,65,298.1,52.3,,🈶,216.0,72%
玫瑰普洱拿鐵,70,298.1,52.3,,🈶,294.8,98%
資料來源：  1: https://www.comebuy2002.com.tw/safety_promise-detail/calorie01/  2: https://www.comebuy2002.com.tw/products-detail/comebuy_menu/,資料來源：  1: https://www.comebuy2002.com.tw/safety_promise-detail/calorie01/  2: https://www.comebuy2002.com.tw/products-detail/comebuy_menu/,資料來源：  1: https://www.comebuy2002.com.tw/safety_promise-detail/calorie01/  2: https://www.comebuy2002.com.tw/products-detail/comebuy_menu/,資料來源：  1: https://www.comebuy2002.com.tw/safety_promise-detail/calorie01/  2: https://www.comebuy2002.com.tw/products-detail/comebuy_menu/,資料來源：  1: https://www.comebuy2002.com.tw/safety_promise-detail/calorie01/  2: https://www.comebuy2002.com.tw/products-detail/comebuy_menu/,資料來源：  1: https://www.comebuy2002.com.tw/safety_promise-detail/calorie01/  2: https://www.comebuy2002.com.tw/products-detail/comebuy_menu/,資料來源：  1: https://www.comebuy2002.com.tw/safety_promise-detail/calorie01/  2: https://www.comebuy2002.com.tw/products-detail/comebuy_menu/,資料來源：  1: https://www.comebuy2002.com.tw/safety_promise-detail/calorie01/  2: https://www.comebuy2002.com.tw/products-detail/comebuy_menu/


## Output HTML

In [4]:
output_filename = "comebuy.html"


def _write_html(gtbl: GT, filename: str) -> None:
    with open(filename, "w") as f:
        f.write(gtbl.as_raw_html())


_write_html(gtbl, output_filename)