In [3]:
import os
import pandas as pd
import matplotlib.pyplot as plt

## ALL

In [10]:
#!/usr/bin/env python3
# UNIQUE BOOKS ONLY — single folder layout (no primary/enhanced)
# Files like:
#   ORIGINAL_<K>recommendation.csv
#   enhanced_<Genre>_<RUN>_<K>recommendation.csv
# Output per genre under: <BASE_DIR>/figure/<GENRE>/
#   - <GENRE>_unique_totals.txt
#   - <GENRE>_unique_totals.png
# Plus a master file with all genres:
#   - <BASE_DIR>/figure/ALL_unique_totals.txt

import re
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

# ====================== CONFIG ======================
BASE_DIR = Path("/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift")

GENRE_COL = "genres_all"
BOOK_COL  = "book_id"

K_LIST = [15, 25, 35]
RUNS   = [25, 400, 1000, 5000, 10000]   # match what's actually present in your folder listing
# Genres as they appear in filenames (underscores ok)
GENRES = [
    "Adult", "Adventure", "Children_s", "Classics", "Drama", "Fantasy",
    "Historical", "Horror", "Mystery", "Nonfiction", "Romance",
    "Science_Fiction", "Thriller"
]
# ====================================================

def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def _normalize_genre_for_match(g: str) -> str:
    x = g.strip().lower().replace("_", " ")
    x = re.sub(r"\bchildren s\b", "children's", x)
    return x

def _split_genres_cell(cell):
    if pd.isna(cell):
        return []
    parts = re.split(r"[;,]", str(cell))
    return [_normalize_genre_for_match(p) for p in parts]

def count_unique_books_for_genre(csv_path: Path, target_genre_token: str) -> int:
    """Count UNIQUE book_id values in this file whose genres include the target genre."""
    if not csv_path.exists():
        raise FileNotFoundError(csv_path)
    df = pd.read_csv(csv_path, usecols=lambda c: c in {BOOK_COL, GENRE_COL})
    missing = {BOOK_COL, GENRE_COL} - set(df.columns)
    if missing:
        raise ValueError(f"{csv_path} missing columns: {missing}")
    tgt = _normalize_genre_for_match(target_genre_token)
    mask = df[GENRE_COL].apply(lambda cell: tgt in _split_genres_cell(cell))
    return df.loc[mask, BOOK_COL].nunique()

def build_unique_df_for_folder(genre: str) -> pd.DataFrame:
    """
    Returns tidy DF for this folder:
      columns = ['genre','K','label','unique_books']
      label ∈ {'ORIGINAL', f'n{run}' for run in RUNS}
    If ORIGINAL_<K> is missing, that K is skipped entirely.
    Missing variants are included with 0 to keep bar alignment.
    """
    rows = []
    for K in K_LIST:
        # ORIGINAL
        orig_path = BASE_DIR / f"ORIGINAL_{K}recommendation.csv"
        try:
            tot_orig = int(count_unique_books_for_genre(orig_path, genre))
        except Exception as e:
            print(f"[WARN] {genre} | K={K}: ORIGINAL missing/invalid -> {e}; skipping this K")
            continue
        rows.append({"genre": genre, "K": K, "label": "ORIGINAL", "unique_books": tot_orig})

        # Variants enhanced_<Genre>_<RUN>_<K>
        for n in RUNS:
            var_path = BASE_DIR / f"enhanced_{genre}_{n}_{K}recommendation.csv"
            try:
                tot_var = int(count_unique_books_for_genre(var_path, genre))
            except Exception as e:
                print(f"[WARN] {genre} | K={K} | n={n}: variant missing/invalid -> {e}; using 0")
                tot_var = 0
            rows.append({"genre": genre, "K": K, "label": f"n{n}", "unique_books": tot_var})

    return pd.DataFrame(rows, columns=["genre","K","label","unique_books"])

def _labels():
    # dynamic label order for plotting/printing
    return ["ORIGINAL"] + [f"n{n}" for n in RUNS]

def make_genre_summary_lines(genre: str, df_uni: pd.DataFrame, include_header: bool) -> list[str]:
    """Build the lines that describe this genre's unique-book totals."""
    labels = _labels()
    lines = []
    if include_header:
        lines.append(f"[{genre}]")
    for K in sorted(df_uni["K"].unique()):
        sub = df_uni[df_uni["K"] == K]
        for lab in labels:
            v = sub[sub["label"] == lab]["unique_books"]
            if v.empty:
                continue
            lines.append(f"K={K} | {lab} unique_books: {int(v.iloc[0])}")
        lines.append("")
    return lines

def write_txt_unique_per_genre(df_uni: pd.DataFrame, out_txt: Path, genre: str):
    """Write the per-genre TXT (no header to match your original style)."""
    lines = make_genre_summary_lines(genre, df_uni, include_header=False)
    ensure_dir(out_txt.parent)
    with open(out_txt, "w", encoding="utf-8") as f:
        f.write("\n".join(lines))

def plot_grouped_unique(df_uni: pd.DataFrame, title: str, out_png: Path):
    """Grouped bar chart: x=K, bars=ORIGINAL + dynamic RUN labels; y=unique book count."""
    if df_uni.empty:
        print(f"[INFO] Nothing to plot for {title}")
        return
    labels = _labels()
    K_vals = sorted(df_uni["K"].unique().tolist())
    series = {lab: [] for lab in labels}
    for K in K_vals:
        sub = df_uni[df_uni["K"] == K]
        for lab in labels:
            row = sub[sub["label"] == lab]
            series[lab].append(int(row["unique_books"].iloc[0]) if not row.empty else 0)

    x = list(range(len(K_vals)))
    n_series = len(labels)
    width = 0.8 / n_series

    fig, ax = plt.subplots(figsize=(12, 6))
    for i, lab in enumerate(labels):
        xs = [xx + (i - (n_series-1)/2.0)*width for xx in x]
        ax.bar(xs, series[lab], width, label=lab)

    ax.set_xticks(x)
    ax.set_xticklabels([f"K={K}" for K in K_vals])
    ax.set_xlabel("K")
    ax.set_ylabel("Unique books with target genre")
    ax.set_title(title)
    ax.legend()
    ax.grid(axis="y", alpha=0.2)

    ensure_dir(out_png.parent)
    plt.tight_layout()
    plt.savefig(out_png, dpi=160)
    plt.close(fig)

def main():
    all_lines = []  # accumulate for master file
    master_txt = BASE_DIR / "figure" / "ALL_unique_totals.txt"
    ensure_dir(master_txt.parent)

    for g in GENRES:
        df_uni = build_unique_df_for_folder(g)

        # save per-genre outputs under figure/<GENRE>/
        out_dir = BASE_DIR / "figure" / g
        txt_path = out_dir / f"{g}_unique_totals.txt"
        png_path = out_dir / f"{g}_unique_totals.png"

        # Write individual TXT (no header) and PNG
        write_txt_unique_per_genre(df_uni, txt_path, g)
        plot_grouped_unique(df_uni, title=f"{g} – UNIQUE books (no clustering)", out_png=png_path)
        print(f"[OK] Wrote {txt_path} and {png_path}")

        # Append this genre's block (with header) to the master list
        all_lines.extend(make_genre_summary_lines(g, df_uni, include_header=True))

    # Write the combined master TXT once at the end
    with open(master_txt, "w", encoding="utf-8") as f:
        f.write("\n".join(all_lines))
    print(f"[OK] Wrote master summary → {master_txt}")

if __name__ == "__main__":
    main()


[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/Adult/Adult_unique_totals.txt and /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/Adult/Adult_unique_totals.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/Adventure/Adventure_unique_totals.txt and /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/Adventure/Adventure_unique_totals.png
[WARN] Children_s | K=15 | n=400: variant missing/invalid -> /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/enhanced_Children_s_400_15recommendation.csv; using 0
[WARN] Children_s | K=15 | n=5000: variant missing/invalid -> /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/enhanced_Children_s_5000_15recommendation.csv; using 0
[WARN] Children_s | K=25 | n=400: variant missing/invalid -> /home/mos

In [5]:
#!/usr/bin/env python3
# UNIQUE BOOKS ONLY — single folder layout (no primary/enhanced)
# Files like:
#   ORIGINAL_<K>recommendation.csv
#   enhanced_<Genre>_<RUN>_<K>recommendation.csv
# Output per genre under: <BASE_DIR>/figure/<GENRE>/
#   - <GENRE>_unique_totals.txt
#   - <GENRE>_unique_totals.png
# Plus a master file with all genres:
#   - <BASE_DIR>/figure/ALL_unique_totals.txt

import re
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

# ====================== CONFIG ======================
BASE_DIR = Path("/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/attack")

GENRE_COL = "genres_all"
BOOK_COL  = "book_id"

K_LIST = [15, 25, 35]
RUNS   = [25, 400, 1000, 5000, 10000]   # match what's actually present in your folder listing
# Genres as they appear in filenames (underscores ok)
GENRES = [
    "Adult", "Adventure", "Children_s", "Classics", "Drama", "Fantasy",
    "Historical", "Horror", "Mystery", "Nonfiction", "Romance",
    "Science_Fiction", "Thriller"
]
# ====================================================

def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def _normalize_genre_for_match(g: str) -> str:
    x = g.strip().lower().replace("_", " ")
    x = re.sub(r"\bchildren s\b", "children's", x)
    return x

def _split_genres_cell(cell):
    if pd.isna(cell):
        return []
    parts = re.split(r"[;,]", str(cell))
    return [_normalize_genre_for_match(p) for p in parts]

def count_unique_books_for_genre(csv_path: Path, target_genre_token: str) -> int:
    """Count UNIQUE book_id values in this file whose genres include the target genre."""
    if not csv_path.exists():
        raise FileNotFoundError(csv_path)
    df = pd.read_csv(csv_path, usecols=lambda c: c in {BOOK_COL, GENRE_COL})
    missing = {BOOK_COL, GENRE_COL} - set(df.columns)
    if missing:
        raise ValueError(f"{csv_path} missing columns: {missing}")
    tgt = _normalize_genre_for_match(target_genre_token)
    mask = df[GENRE_COL].apply(lambda cell: tgt in _split_genres_cell(cell))
    return df.loc[mask, BOOK_COL].nunique()

def build_unique_df_for_folder(genre: str) -> pd.DataFrame:
    """
    Returns tidy DF for this folder:
      columns = ['genre','K','label','unique_books']
      label ∈ {'ORIGINAL', f'n{run}' for run in RUNS}
    If ORIGINAL_<K> is missing, that K is skipped entirely.
    Missing variants are included with 0 to keep bar alignment.
    """
    rows = []
    for K in K_LIST:
        # ORIGINAL
        orig_path = BASE_DIR / f"ORIGINAL_{K}recommendation.csv"
        try:
            tot_orig = int(count_unique_books_for_genre(orig_path, genre))
        except Exception as e:
            print(f"[WARN] {genre} | K={K}: ORIGINAL missing/invalid -> {e}; skipping this K")
            continue
        rows.append({"genre": genre, "K": K, "label": "ORIGINAL", "unique_books": tot_orig})

        # Variants enhanced_<Genre>_<RUN>_<K>
        for n in RUNS:
            var_path = BASE_DIR / f"enhanced_{genre}_{n}_{K}recommendation.csv"
            try:
                tot_var = int(count_unique_books_for_genre(var_path, genre))
            except Exception as e:
                print(f"[WARN] {genre} | K={K} | n={n}: variant missing/invalid -> {e}; using 0")
                tot_var = 0
            rows.append({"genre": genre, "K": K, "label": f"n{n}", "unique_books": tot_var})

    return pd.DataFrame(rows, columns=["genre","K","label","unique_books"])

def _labels():
    # dynamic label order for plotting/printing
    return ["ORIGINAL"] + [f"n{n}" for n in RUNS]

def make_genre_summary_lines(genre: str, df_uni: pd.DataFrame, include_header: bool) -> list[str]:
    """Build the lines that describe this genre's unique-book totals."""
    labels = _labels()
    lines = []
    if include_header:
        lines.append(f"[{genre}]")
    for K in sorted(df_uni["K"].unique()):
        sub = df_uni[df_uni["K"] == K]
        for lab in labels:
            v = sub[sub["label"] == lab]["unique_books"]
            if v.empty:
                continue
            lines.append(f"K={K} | {lab} unique_books: {int(v.iloc[0])}")
        lines.append("")
    return lines

def write_txt_unique_per_genre(df_uni: pd.DataFrame, out_txt: Path, genre: str):
    """Write the per-genre TXT (no header to match your original style)."""
    lines = make_genre_summary_lines(genre, df_uni, include_header=False)
    ensure_dir(out_txt.parent)
    with open(out_txt, "w", encoding="utf-8") as f:
        f.write("\n".join(lines))

def plot_grouped_unique(df_uni: pd.DataFrame, title: str, out_png: Path):
    """Grouped bar chart: x=K, bars=ORIGINAL + dynamic RUN labels; y=unique book count."""
    if df_uni.empty:
        print(f"[INFO] Nothing to plot for {title}")
        return
    labels = _labels()
    K_vals = sorted(df_uni["K"].unique().tolist())
    series = {lab: [] for lab in labels}
    for K in K_vals:
        sub = df_uni[df_uni["K"] == K]
        for lab in labels:
            row = sub[sub["label"] == lab]
            series[lab].append(int(row["unique_books"].iloc[0]) if not row.empty else 0)

    x = list(range(len(K_vals)))
    n_series = len(labels)
    width = 0.8 / n_series

    fig, ax = plt.subplots(figsize=(12, 6))
    for i, lab in enumerate(labels):
        xs = [xx + (i - (n_series-1)/2.0)*width for xx in x]
        ax.bar(xs, series[lab], width, label=lab)

    ax.set_xticks(x)
    ax.set_xticklabels([f"K={K}" for K in K_vals])
    ax.set_xlabel("K")
    ax.set_ylabel("Unique books with target genre")
    ax.set_title(title)
    ax.legend()
    ax.grid(axis="y", alpha=0.2)

    ensure_dir(out_png.parent)
    plt.tight_layout()
    plt.savefig(out_png, dpi=160)
    plt.close(fig)

def main():
    all_lines = []  # accumulate for master file
    master_txt = BASE_DIR / "figure" / "ALL_unique_totals.txt"
    ensure_dir(master_txt.parent)

    for g in GENRES:
        df_uni = build_unique_df_for_folder(g)

        # save per-genre outputs under figure/<GENRE>/
        out_dir = BASE_DIR / "figure" / g
        txt_path = out_dir / f"{g}_unique_totals.txt"
        png_path = out_dir / f"{g}_unique_totals.png"

        # Write individual TXT (no header) and PNG
        write_txt_unique_per_genre(df_uni, txt_path, g)
        plot_grouped_unique(df_uni, title=f"{g} – UNIQUE books (no clustering)", out_png=png_path)
        print(f"[OK] Wrote {txt_path} and {png_path}")

        # Append this genre's block (with header) to the master list
        all_lines.extend(make_genre_summary_lines(g, df_uni, include_header=True))

    # Write the combined master TXT once at the end
    with open(master_txt, "w", encoding="utf-8") as f:
        f.write("\n".join(all_lines))
    print(f"[OK] Wrote master summary → {master_txt}")

if __name__ == "__main__":
    main()


[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/attack/figure/Adult/Adult_unique_totals.txt and /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/attack/figure/Adult/Adult_unique_totals.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/attack/figure/Adventure/Adventure_unique_totals.txt and /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/attack/figure/Adventure/Adventure_unique_totals.png
[WARN] Children_s | K=15 | n=25: variant missing/invalid -> /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/attack/enhanced_Children_s_25_15recommendation.csv; using 0
[WARN] Children_s | K=15 | n=400: variant missing/invalid -> /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/attack/enhanced_Children_s_400_15recommendation.csv; using 0
[WARN] Children_s | K=15 | n=5000: variant missing/invalid -> /home/moshtasa/Research/

In [11]:
#!/usr/bin/env python3
# plot_from_all_txt_sections.py
# Parses all.txt with possibly escaped headers like \[Adult] and fields like unique\_books

import re
from pathlib import Path
import matplotlib.pyplot as plt

ALL_TXT = Path("/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/all.txt")
ROOT    = ALL_TXT.parent
OUT_DIR = ROOT / "Z_M0928"
OUT_DIR.mkdir(parents=True, exist_ok=True)

EXPECTED_KS = [15, 25, 35]
BAR_ORDER   = ["ORIGINAL", "n25", "n400", "n1000", "n5000", "n10000"]

# Allow optional backslashes before [ and ]  e.g., \[Adult]
header_re = re.compile(r"^\s*\\?\[(?P<genre>[^\]]+)\]\\?\s*$")

# Allow optional backslash in 'unique_books' e.g., unique\_books
line_re   = re.compile(r"K=(\d+)\s*\|\s*([A-Za-z0-9_]+)\s+unique\\?_books:\s*(\d+)")

def tidy_genre(raw: str) -> str:
    # Remove literal backslash before underscores (e.g., Children\_s -> Children_s)
    g = raw.strip().replace("\\_", "_").replace("_", " ")
    # Specific nice names
    g = g.replace("Children s", "Children's")
    g = g.replace("Science Fiction", "Science Fiction")  # idempotent
    return g

def ensure_all_labels(kmap: dict) -> dict:
    for k in EXPECTED_KS:
        kmap.setdefault(k, {})
        for lab in BAR_ORDER:
            kmap[k].setdefault(lab, 0)
    return kmap

def plot_grouped(genre: str, kmap: dict, out_path: Path):
    series = {lab: [kmap.get(k, {}).get(lab, 0) for k in EXPECTED_KS] for lab in BAR_ORDER}

    num_groups = len(EXPECTED_KS)
    bars_per_group = len(BAR_ORDER)
    group_width = 0.8
    bar_width = group_width / bars_per_group
    group_centers = list(range(num_groups))

    plt.figure(figsize=(10, 6))
    for bi, lab in enumerate(BAR_ORDER):
        xs = [gi - group_width/2 + (bi + 0.5) * bar_width for gi in group_centers]
        plt.bar(xs, series[lab], width=bar_width, label=lab)

    plt.xticks(group_centers, [f"K={k}" for k in EXPECTED_KS])
    plt.ylabel("Unique Books")
    plt.title(f"{tidy_genre(genre)} — Unique Books per Top-K (post-lift)")
    plt.legend(ncol=3, fontsize=9)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(out_path, dpi=200)
    plt.close()

def main():
    text_lines = ALL_TXT.read_text(encoding="utf-8", errors="ignore").splitlines()

    sections = {}
    current_genre = None

    for ln in text_lines:
        # Normalize common stray characters
        raw = ln.strip("\ufeff").rstrip()

        # Header?
        mh = header_re.match(raw)
        if mh:
            current_genre = mh.group("genre").strip()
            sections.setdefault(current_genre, {})
            continue

        # Data line?
        md = line_re.search(raw)
        if md and current_genre is not None:
            k = int(md.group(1))
            label = md.group(2)
            val = int(md.group(3))
            sections[current_genre].setdefault(k, {})[label] = val

    if not sections:
        print("[WARN] No sections parsed from all.txt")
        # Optional: print a few sample lines for quick debugging
        for i, l in enumerate(text_lines[:10]):
            print(f"{i:02d}: {l}")
        return

    for genre, kmap in sections.items():
        kmap = ensure_all_labels(kmap)
        out_path = OUT_DIR / f"{genre}_unique_totals_grouped.png"
        plot_grouped(genre, kmap, out_path)
        print(f"[OK] Wrote {out_path}")

if __name__ == "__main__":
    main()


[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/Z_M0928/Adult_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/Z_M0928/Adventure_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/Z_M0928/Children\_s_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/Z_M0928/Classics_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/Z_M0928/Drama_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/Z_M0928/Fantasy_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/Z_M0928/Historical_unique_total

In [14]:
#!/usr/bin/env python3
# plot_unique_totals_by_genre.py
# Reads each <Genre>/<Genre>_unique_totals.txt and creates grouped bar charts in one output folder.

import os
import re
from pathlib import Path
import matplotlib.pyplot as plt

# ---------- CONFIG ----------
ROOT = Path("/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure")
OUT_DIR = ROOT / "RAJ"
OUT_DIR.mkdir(parents=True, exist_ok=True)

EXPECTED_KS = [15, 25, 35]
BAR_ORDER   = ["ORIGINAL", "n25", "n400", "n1000", "n5000", "n10000"]
TXT_SUFFIX  = "_unique_totals.txt"
# ----------------------------

def nice_genre_name(folder: str) -> str:
    # Tidy display names
    name = folder.replace("_", " ")
    name = name.replace("Children s", "Children's")
    return name

def parse_unique_totals(txt_path: Path):
    """
    Parse lines like:
      K=15 | ORIGINAL unique_books: 86
      K=15 | n25 unique_books: 89
    Return dict: {15: {"ORIGINAL":86, "n25":89, ...}, 25: {...}, 35: {...}}
    """
    data = {}
    line_re = re.compile(r"K=(\d+)\s*\|\s*([A-Za-z0-9_]+)\s+unique_books:\s*(\d+)")
    with txt_path.open("r", encoding="utf-8") as f:
        for line in f:
            m = line_re.search(line)
            if not m:
                continue
            k = int(m.group(1))
            label = m.group(2)
            val = int(m.group(3))
            data.setdefault(k, {})[label] = val
    return data

def ensure_all_labels(kmap: dict):
    for k in EXPECTED_KS:
        kmap.setdefault(k, {})
        for lab in BAR_ORDER:
            kmap[k].setdefault(lab, 0)
    return kmap

def plot_grouped(genre: str, kmap: dict, out_path: Path):
    num_groups = len(EXPECTED_KS)
    bars_per_group = len(BAR_ORDER)
    group_width = 0.8
    bar_width = group_width / bars_per_group
    group_centers = list(range(num_groups))

    # Prepare series per label
    series = {lab: [] for lab in BAR_ORDER}
    for k in EXPECTED_KS:
        for lab in BAR_ORDER:
            series[lab].append(kmap[k][lab])

    plt.figure(figsize=(10, 6))
    for bi, lab in enumerate(BAR_ORDER):
        lab_xs = [gi - group_width/2 + (bi + 0.5) * bar_width for gi in group_centers]
        plt.bar(lab_xs, series[lab], width=bar_width, label=lab)

    plt.xticks(group_centers, [f"K={k}" for k in EXPECTED_KS])
    plt.ylabel("Unique Books")
    plt.title(f"{nice_genre_name(genre)} — Unique Books per Top-K (post-lift)")
    plt.legend(ncol=3, fontsize=9)
    plt.grid(axis="y", linestyle="--", alpha=0.4)
    plt.tight_layout()
    plt.savefig(out_path, dpi=200)
    plt.close()

def main():
    for entry in sorted(ROOT.iterdir()):
        if not entry.is_dir():
            continue
        genre = entry.name
        txt_path = entry / f"{genre}{TXT_SUFFIX}"
        if not txt_path.exists():
            candidates = list(entry.glob(f"*{TXT_SUFFIX}"))
            if not candidates:
                continue
            txt_path = candidates[0]

        kmap = parse_unique_totals(txt_path)
        if not kmap:
            continue

        kmap = ensure_all_labels(kmap)
        out_path = OUT_DIR / f"{genre}_unique_totals_grouped.png"
        plot_grouped(genre, kmap, out_path)
        print(f"[OK] Wrote {out_path}")

if __name__ == "__main__":
    main()


[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/RAJ/Adult_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/RAJ/Adventure_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/RAJ/Children_s_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/RAJ/Classics_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/RAJ/Drama_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/RAJ/Fantasy_unique_totals_grouped.png
[OK] Wrote /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0928/SVD/postlift/figure/RAJ/Historical_unique_totals_grouped.png
[OK] Wrote /hom