In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt

## analysisis(also saved as .py)

In [None]:
#!/usr/bin/env python3
# count_pairs_pos5and7_k15_25_35_from_genres_all.py
#
# Purpose:
#   Scan both pos=5 and pos=7 directories, discover ALL unordered genre pairs
#   that appear in either branch, report total count, save inventory,
#   and compute per-user average pair counts.
#
# Input:
#   /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/{5,7}
#
# Output:
#   /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/result/pair_summary/all/
#     |_ _inventory/discovered_pairs.txt
#     |_ _inventory/discovered_pairs.csv
#     |_ <pair>/k15_25_35_genresall_counts.csv
#     |_ ALL_k15_25_35_genresall_counts.csv

from pathlib import Path
import re
import pandas as pd
from typing import Iterable, Tuple, List, Set

# ======== CONFIG ========
BASE = Path("/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair")
POS_DIRS = [BASE / "5", BASE / "7"]
OUT_ROOT = BASE / "result" / "pair_summary" / "all"
INV_DIR = OUT_ROOT / "_inventory"

K_LIST = [15, 25, 35]
N_LIST = [25, 50, 100, 200]

# ======== HELPERS ========
def slugify_pair(a: str, b: str) -> str:
    import re as _re
    def sg(x): return _re.sub(r"[^A-Za-z0-9]+", "_", x).strip("_").lower()
    return f"{sg(a)}__{sg(b)}"

def normalize_tag(t: str) -> str:
    t = str(t).strip().replace("_", " ")
    if t == "Children s":
        t = "Children's"
    if t.lower() == "science fiction":
        t = "Science Fiction"
    if t.lower() == "historical":
        t = "Historical"
    if t.lower() == "nonfiction":
        t = "Nonfiction"
    return t

def book_has_both(gen_all: str, A: str, B: str) -> bool:
    if pd.isna(gen_all) or not str(gen_all).strip():
        return False
    tags = [normalize_tag(x) for x in str(gen_all).split(",") if str(x).strip()]
    return (A in tags) and (B in tags)

def per_user_avg_pair_count(rec_df: pd.DataFrame, A: str, B: str) -> tuple[float, int]:
    need = {"user_id", "book_id", "genres_all"}
    missing = need - set(rec_df.columns)
    if missing:
        raise ValueError(f"CSV missing columns: {missing}")
    users = rec_df["user_id"].drop_duplicates().sort_values()
    users_count = int(users.shape[0])
    mask = rec_df["genres_all"].apply(lambda s: book_has_both(s, A, B))
    pair_df = rec_df[mask].copy()
    if pair_df.empty:
        return (0.0, users_count)
    per_user = (pair_df.groupby("user_id", as_index=False)["book_id"]
                        .count()
                        .rename(columns={"book_id": "count"}))
    all_users = pd.DataFrame({"user_id": users})
    all_users = all_users.merge(per_user, on="user_id", how="left").fillna({"count": 0})
    return (float(all_users["count"].mean()), users_count)

def injected_files_for_pair_k_n(pos_dir: Path, A: str, B: str, k: int, n: int) -> list[Path]:
    aT, bT = A.replace(" ", "_").replace("'", "_"), B.replace(" ", "_").replace("'", "_")
    aT = re.sub(r"_+", "_", aT).strip("_")
    bT = re.sub(r"_+", "_", bT).strip("_")
    pat1 = re.compile(rf"^fpair_{aT}__{bT}_{n}u_pos[57]_neg0_sample_{k}recommendation\.csv$")
    pat2 = re.compile(rf"^fpair_{bT}__{aT}_{n}u_pos[57]_neg0_sample_{k}recommendation\.csv$")
    out = []
    for p in pos_dir.glob(f"*sample_{k}recommendation.csv"):
        if pat1.match(p.name) or pat2.match(p.name):
            out.append(p)
    return sorted(out)

def discover_pairs_from_dirs(pos_dirs: Iterable[Path], k_list: Iterable[int], n_list: Iterable[int]) -> List[Tuple[str, str]]:
    pair_set: Set[Tuple[str, str]] = set()
    regex = re.compile(
        r"^fpair_(?P<A>[^_][A-Za-z0-9_'_]+)__"
        r"(?P<B>[A-Za-z0-9_'_]+)_(?P<N>\d+)u_pos[57]_neg0_sample_"
        r"(?P<K>\d+)recommendation\.csv$"
    )
    valid_k = set(map(int, k_list))
    valid_n = set(map(int, n_list))
    for pos_dir in pos_dirs:
        for p in pos_dir.glob("fpair_*u_pos*_neg0_sample_*recommendation.csv"):
            m = regex.match(p.name)
            if not m:
                continue
            k = int(m.group("K"))
            n = int(m.group("N"))
            if k not in valid_k or n not in valid_n:
                continue
            A_disp = normalize_tag(m.group("A").replace("_", " "))
            B_disp = normalize_tag(m.group("B").replace("_", " "))
            a_c, b_c = sorted([A_disp, B_disp], key=lambda x: x.lower())
            pair_set.add((a_c, b_c))
    return sorted(pair_set, key=lambda ab: (ab[0].lower(), ab[1].lower()))

def _n_to_order(v):
    s = str(v).strip()
    if s.upper() == "ORIGINAL":
        return -1
    try:
        return int(s)
    except Exception:
        return 10**9

# ======== MAIN ========
def main():
    OUT_ROOT.mkdir(parents=True, exist_ok=True)
    INV_DIR.mkdir(parents=True, exist_ok=True)
    all_rows = []

    # -------- Discover pairs from BOTH pos5 and pos7 --------
    PAIRS = discover_pairs_from_dirs(POS_DIRS, K_LIST, N_LIST)
    if not PAIRS:
        print("[WARN] No pairs found in either /5 or /7 directories.")
        return

    print(f"[INFO] Found {len(PAIRS)} unique unordered pairs across /5 and /7")
    with open(INV_DIR / "discovered_pairs.txt", "w", encoding="utf-8") as f:
        for a, b in PAIRS:
            f.write(f"{a},{b}\n")
    pd.DataFrame(PAIRS, columns=["A", "B"]).to_csv(INV_DIR / "discovered_pairs.csv", index=False)
    print(f"[OK] Inventory saved in {INV_DIR}")

    # -------- Process both /5 and /7 branches --------
    for pos_dir in POS_DIRS:
        pos_label = pos_dir.name  # "5" or "7"
        for (A, B) in PAIRS:
            pair_slug = slugify_pair(A, B)
            pair_dir = OUT_ROOT / pair_slug
            pair_dir.mkdir(parents=True, exist_ok=True)
            for k in K_LIST:
                for n in N_LIST:
                    files = injected_files_for_pair_k_n(pos_dir, A, B, k, n)
                    if not files:
                        continue
                    vals, user_counts = [], []
                    for f in files:
                        try:
                            df = pd.read_csv(f)
                            avgc, users_cnt = per_user_avg_pair_count(df, A, B)
                            vals.append(avgc)
                            user_counts.append(users_cnt)
                        except Exception as e:
                            print(f"[ERROR] Reading {f}: {e}")
                    avgc = float(sum(vals) / len(vals)) if vals else 0.0
                    users_cnt = max(user_counts) if user_counts else 0
                    all_rows.append({
                        "pos_branch": pos_label,
                        "pair": pair_slug, "K": k, "n": n,
                        "avg_count": avgc, "users_counted": users_cnt,
                        "source": ";".join([p.name for p in files]) if files else ""
                    })
                    print(f"{pos_label}: {pair_slug.replace('__', ',')} n={n}, K={k} → avg={avgc:.4f}")

    if all_rows:
        dfa = pd.DataFrame(all_rows)
        dfa["n_order"] = dfa["n"].map(_n_to_order)
        dfa = dfa.sort_values(by=["pair", "pos_branch", "K", "n_order", "n"]).drop(columns=["n_order"])
        out_all = OUT_ROOT / "ALL_k15_25_35_genresall_counts.csv"
        dfa.to_csv(out_all, index=False)
        print(f"[OK] Saved combined summary: {out_all}")
    else:
        print("[WARN] No rows collected; nothing saved.")

if __name__ == "__main__":
    main()


In [2]:
#!/usr/bin/env python3
# count_pairs_ORIGINAL_k15_25_35_from_genres_all.py
#
# Purpose:
#   Use ORIGINAL recommendation CSVs (K in {15,25,35}) to:
#     1) discover ALL unordered genre pairs that actually co-occur
#        in at least one book across the ORIGINAL files
#     2) compute per-user average counts for each pair
#     3) save inventory + combined summary CSV in the usual format
#
# Inputs (exact files):
#   /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/ORIGINAL_15recommendation.csv
#   /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/ORIGINAL_25recommendation.csv
#   /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/ORIGINAL_35recommendation.csv
#
# Outputs:
#   /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/result/pair_summary/all/
#     |_ _inventory/discovered_pairs.txt
#     |_ _inventory/discovered_pairs.csv
#     |_ <pair>/k15_25_35_genresall_counts.csv   (per-pair details across K)
#     |_ ALL_k15_25_35_genresall_counts.csv      (combined summary)
#
# Notes:
#   - pos_branch is fixed to "ORIGINAL"
#   - n is fixed to "ORIGINAL"
#   - We only include pairs that co-occur at least once in the data
#   - Required columns in each CSV: user_id, book_id, genres_all

from pathlib import Path
import re
import itertools
from typing import List, Set, Tuple
import pandas as pd

# ======== CONFIG ========
BASE = Path("/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair")

ORIG_FILES = {
    15: BASE / "ORIGINAL_15recommendation.csv",
    25: BASE / "ORIGINAL_25recommendation.csv",
    35: BASE / "ORIGINAL_35recommendation.csv",
}

OUT_ROOT = BASE / "result" / "pair_summary" / "all"
INV_DIR = OUT_ROOT / "_inventory"

POS_BRANCH = "ORIGINAL"
N_VALUE = "ORIGINAL"
K_LIST = [15, 25, 35]

# ======== HELPERS ========
def slugify_pair(a: str, b: str) -> str:
    import re as _re
    def sg(x): return _re.sub(r"[^A-Za-z0-9]+", "_", x).strip("_").lower()
    return f"{sg(a)}__{sg(b)}"

def normalize_tag(t: str) -> str:
    t = str(t).strip().replace("_", " ")
    if t == "Children s":
        t = "Children's"
    if t.lower() == "science fiction":
        t = "Science Fiction"
    if t.lower() == "historical":
        t = "Historical"
    if t.lower() == "nonfiction":
        t = "Nonfiction"
    return t

def row_tags(gen_all) -> List[str]:
    if pd.isna(gen_all) or not str(gen_all).strip():
        return []
    tags = [normalize_tag(x) for x in str(gen_all).split(",") if str(x).strip()]
    # de-dup while preserving order
    seen = set()
    out = []
    for t in tags:
        if t not in seen:
            out.append(t)
            seen.add(t)
    return out

def book_has_both(gen_all: str, A: str, B: str) -> bool:
    tags = row_tags(gen_all)
    return (A in tags) and (B in tags)

def per_user_avg_pair_count(rec_df: pd.DataFrame, A: str, B: str) -> tuple[float, int]:
    need = {"user_id", "book_id", "genres_all"}
    missing = need - set(rec_df.columns)
    if missing:
        raise ValueError(f"CSV missing columns: {missing}")
    users = rec_df["user_id"].drop_duplicates().sort_values()
    users_count = int(users.shape[0])
    mask = rec_df["genres_all"].apply(lambda s: book_has_both(s, A, B))
    pair_df = rec_df[mask].copy()
    if pair_df.empty:
        return (0.0, users_count)
    per_user = (pair_df.groupby("user_id", as_index=False)["book_id"]
                        .count()
                        .rename(columns={"book_id": "count"}))
    all_users = pd.DataFrame({"user_id": users})
    all_users = all_users.merge(per_user, on="user_id", how="left").fillna({"count": 0})
    return (float(all_users["count"].mean()), users_count)

def discover_pairs_from_original(files_map: dict[int, Path]) -> List[Tuple[str, str]]:
    """
    Discover all unordered pairs (A,B) that actually co-occur in at least one book
    across ANY of the ORIGINAL K files.
    """
    # 1) Collect co-occurring pairs from each row where >=2 tags exist
    cooccur: Set[Tuple[str, str]] = set()
    for k, fpath in files_map.items():
        if not fpath.exists():
            print(f"[WARN] Missing ORIGINAL file for K={k}: {fpath}")
            continue
        try:
            df = pd.read_csv(fpath, usecols=["genres_all"])
        except Exception as e:
            print(f"[ERROR] Reading {fpath}: {e}")
            continue

        for tags in df["genres_all"].apply(row_tags):
            if len(tags) < 2:
                continue
            # all unordered pairs within this row
            for a, b in itertools.combinations(sorted(set(tags), key=lambda x: x.lower()), 2):
                cooccur.add((a, b))

    pairs = sorted(cooccur, key=lambda ab: (ab[0].lower(), ab[1].lower()))
    print(f"[INFO] Discovered {len(pairs)} co-occurring pairs from ORIGINAL files.")
    return pairs

def _n_to_order(v):
    s = str(v).strip()
    if s.upper() == "ORIGINAL":
        return -1
    try:
        return int(s)
    except Exception:
        return 10**9

# ======== MAIN ========
def main():
    OUT_ROOT.mkdir(parents=True, exist_ok=True)
    INV_DIR.mkdir(parents=True, exist_ok=True)

    # --- Pair discovery from ORIGINAL files (content-based co-occurrence) ---
    PAIRS = discover_pairs_from_original(ORIG_FILES)
    if not PAIRS:
        print("[WARN] No pairs discovered from ORIGINAL files; nothing to do.")
        return

    # Save inventory
    with open(INV_DIR / "discovered_pairs.txt", "w", encoding="utf-8") as f:
        for a, b in PAIRS:
            f.write(f"{a},{b}\n")
    pd.DataFrame(PAIRS, columns=["A", "B"]).to_csv(INV_DIR / "discovered_pairs.csv", index=False)
    print(f"[OK] Inventory saved in {INV_DIR}")

    # --- Process ORIGINAL for each K ---
    all_rows = []
    per_pair_detail_frames = {}  # pair_slug -> list of rows for per-pair CSV

    for (A, B) in PAIRS:
        pair_slug = slugify_pair(A, B)
        pair_dir = OUT_ROOT / pair_slug
        pair_dir.mkdir(parents=True, exist_ok=True)

        for k in K_LIST:
            src_path = ORIG_FILES.get(k)
            if src_path is None or not src_path.exists():
                print(f"[WARN] ORIGINAL file for K={k} not found: {src_path}")
                continue

            try:
                df = pd.read_csv(src_path)
            except Exception as e:
                print(f"[ERROR] Reading {src_path}: {e}")
                continue

            try:
                avgc, users_cnt = per_user_avg_pair_count(df, A, B)
            except Exception as e:
                print(f"[ERROR] Computing for pair {A},{B} K={k}: {e}")
                avgc, users_cnt = (0.0, 0)

            row = {
                "pos_branch": POS_BRANCH,
                "pair": pair_slug,
                "K": k,
                "n": N_VALUE,
                "avg_count": avgc,
                "users_counted": users_cnt,
                "source": src_path.name
            }
            all_rows.append(row)
            per_pair_detail_frames.setdefault(pair_slug, []).append(row)
            print(f"{POS_BRANCH}: {pair_slug.replace('__', ',')} K={k} → avg={avgc:.6f}")

    if not all_rows:
        print("[WARN] No rows collected; nothing saved.")
        return

    # Save per-pair detail CSVs (k15_25_35_genresall_counts.csv)
    for pair_slug, rows in per_pair_detail_frames.items():
        dpf = pd.DataFrame(rows)
        dpf["n_order"] = dpf["n"].map(_n_to_order)
        dpf = dpf.sort_values(by=["pair", "pos_branch", "K", "n_order", "n"]).drop(columns=["n_order"])
        out_pair_csv = OUT_ROOT / pair_slug / "k15_25_35_genresall_counts.csv"
        dpf.to_csv(out_pair_csv, index=False)

    # Save combined summary
    dfa = pd.DataFrame(all_rows)
    dfa["n_order"] = dfa["n"].map(_n_to_order)
    dfa = dfa.sort_values(by=["pair", "pos_branch", "K", "n_order", "n"]).drop(columns=["n_order"])
    out_all = OUT_ROOT / "ALL_k15_25_35_genresall_counts.csv"
    dfa.to_csv(out_all, index=False)
    print(f"[OK] Saved combined summary: {out_all}")

if __name__ == "__main__":
    main()


[INFO] Discovered 70 co-occurring pairs from ORIGINAL files.
[OK] Inventory saved in /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/result/pair_summary/all/_inventory
ORIGINAL: adult,classics K=15 → avg=0.000636
ORIGINAL: adult,classics K=25 → avg=0.002396
ORIGINAL: adult,classics K=35 → avg=0.004305
ORIGINAL: adult,drama K=15 → avg=0.175614
ORIGINAL: adult,drama K=25 → avg=0.289364
ORIGINAL: adult,drama K=35 → avg=0.397237
ORIGINAL: adult,historical K=15 → avg=0.000000
ORIGINAL: adult,historical K=25 → avg=0.000000
ORIGINAL: adult,historical K=35 → avg=0.000019
ORIGINAL: adult,mystery K=15 → avg=0.004099
ORIGINAL: adult,mystery K=25 → avg=0.009902
ORIGINAL: adult,mystery K=35 → avg=0.016491
ORIGINAL: adult,nonfiction K=15 → avg=0.062931
ORIGINAL: adult,nonfiction K=25 → avg=0.099712
ORIGINAL: adult,nonfiction K=35 → avg=0.133554
ORIGINAL: adult,romance K=15 → avg=0.348963
ORIGINAL: adult,romance K=25 → avg=0.522948
ORIGINAL: adult,romance K=35 → avg=0.

In [4]:
#!/usr/bin/env python3
# merge_original_and_injected.py

import pandas as pd
from pathlib import Path

# --- paths ---
root = Path("/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/result/pair_summary/all")
orig = pd.read_csv(root / "Original_k15_25_35_genresall_counts.csv")
inj  = pd.read_csv(root / "ALL_k15_25_35_genresall_counts.csv")

# --- merge (stack) ---
df = pd.concat([orig, inj], ignore_index=True)

# order ORIGINAL before numeric n values
def order_n(v):
    try:
        return -1 if str(v).upper() == "ORIGINAL" else int(v)
    except Exception:
        return 1e9

df["n_order"] = df["n"].map(order_n)
df = df.sort_values(["pair", "K", "n_order"]).drop(columns="n_order")

# --- save ---
out = root / "MERGED_k15_25_35_genresall_counts.csv"
df.to_csv(out, index=False)
print(f"saved → {out}")


saved → /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/result/pair_summary/all/MERGED_k15_25_35_genresall_counts.csv


## figures

In [1]:
#!/usr/bin/env python3
import pandas as pd

PATH = "/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/result/pair_summary/all/ALL_k15_25_35_genresall_counts.csv"

def main():
    df = pd.read_csv(PATH, low_memory=False)
    print("== HEAD ==")
    print(df.head())

    if "avg_count" not in df.columns:
        raise SystemExit(f"'avg_count' column not found. Columns: {list(df.columns)}")

    vals = pd.to_numeric(df["avg_count"], errors="coerce")
    print("\n== STATS ==")
    print(f"min avg_count: {vals.min()}")
    print(f"max avg_count: {vals.max()}")

if __name__ == "__main__":
    main()


== HEAD ==
   pos_branch             pair   K    n  avg_count  users_counted  \
0           5  adult__classics  15   25   0.000898          53424   
1           5  adult__classics  15   50   0.004230          53424   
2           5  adult__classics  15  100   0.009060          53424   
3           5  adult__classics  15  200   0.083371          53424   
4           5  adult__classics  25   25   0.002826          53424   

                                              source  
0  fpair_Adult__Classics_25u_pos5_neg0_sample_15r...  
1  fpair_Adult__Classics_50u_pos5_neg0_sample_15r...  
2  fpair_Adult__Classics_100u_pos5_neg0_sample_15...  
3  fpair_Adult__Classics_200u_pos5_neg0_sample_15...  
4  fpair_Adult__Classics_25u_pos5_neg0_sample_25r...  

== STATS ==
min avg_count: 0.0
max avg_count: 4.263533243486074


In [8]:
#!/usr/bin/env python3
# plot_pair_bars_pos5_pos7.py
#
# For each genre pair:
#   - make 2 plots: one for pos=5, one for pos=7
#   - each plot has 3 bins (K=15,25,35)
#   - each bin has 5 bars: original, 25, 50, 100, 200
#   - bar height = avg_count
#
# Input (merged):
#   /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/result/pair_summary/all/MERGED_k15_25_35_genresall_counts.csv
#
# Output per pair:
#   /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/result/pair_summary/all/<pair>/figures/<pair>_pos5.png
#   /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/SVD_pair/result/pair_summary/all/<pair>/figures/<pair>_pos7.png

import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ========== CONFIG ==========
BASE_DIR = Path("/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory")
IN_FILE  = BASE_DIR / "MERGED_k15_25_35_genresall_counts.csv"

# Order & nicknames
K_LIST = [15, 25, 35]
N_ORDER = ["ORIGINAL", 25, 50, 100, 200]   # plotting order inside each K bin
N_LABELS = { "ORIGINAL": "original", 25: "25", 50: "50", 100: "100", 200: "200" }

DPI = 300

# ========== HELPERS ==========
def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def value_or_nan(v):
    try:
        return float(v)
    except Exception:
        return np.nan

def collect_values(df_pair: pd.DataFrame, pos_label: str, k: int) -> list:
    """
    For a given pair and K, return values in order:
    [original, 25, 50, 100, 200], where:
      - original comes from pos_branch == 'ORIGINAL' and n == 'ORIGINAL'
      - injected come from pos_branch == pos_label and n == {25,50,100,200}
    Missing combos → np.nan (will show as empty gaps).
    """
    vals = []
    # original
    row_o = df_pair[(df_pair["pos_branch"].astype(str).str.upper() == "ORIGINAL")
                    & (df_pair["n"].astype(str).str.upper() == "ORIGINAL")
                    & (df_pair["K"] == k)]
    if len(row_o) > 0:
        vals.append(value_or_nan(row_o["avg_count"].values[0]))
    else:
        vals.append(np.nan)

    # injected Ns
    for n in [25, 50, 100, 200]:
        row_n = df_pair[(df_pair["pos_branch"].astype(str) == str(pos_label))
                        & (df_pair["n"].astype(str) == str(n))
                        & (df_pair["K"] == k)]
        if len(row_n) > 0:
            vals.append(value_or_nan(row_n["avg_count"].values[0]))
        else:
            vals.append(np.nan)

    return vals

def plot_for_pos(df_pair: pd.DataFrame, pair_slug: str, pos_label: str, out_dir: Path):
    """
    Make the grouped bar plot for a given pos_label ("5" or "7").
    x-axis: K bins (3 groups)
    Each group: 5 bars in order N_ORDER (original, 25, 50, 100, 200)
    """
    # Collect data matrix shape (3 K bins) x (5 bars)
    data = []
    for k in K_LIST:
        data.append(collect_values(df_pair, pos_label, k))
    data = np.array(data, dtype=float)  # shape (3,5)

    # If there is no data for this pos_label, skip
    if np.all(np.isnan(data)):
        print(f"[SKIP] No data for pair={pair_slug} pos={pos_label}")
        return

    # Plot
    fig, ax = plt.subplots(figsize=(10, 5))
    num_groups = len(K_LIST)        # 3
    num_bars = len(N_ORDER)         # 5

    x = np.arange(num_groups)
    width = 0.15  # bar width; adjust if needed
    # Center the group of bars around each x position
    offsets = (np.arange(num_bars) - (num_bars - 1) / 2) * width

    for idx, nkey in enumerate(N_ORDER):
        # heights for this bar across K bins
        heights = data[:, idx]
        # plot; missing (nan) bars won't show
        ax.bar(x + offsets[idx], heights, width=width, label=N_LABELS[nkey])

    # Cosmetics
    ax.set_title(f"{pair_slug.replace('__', ' + ')} — pos={pos_label}")
    ax.set_xlabel("K")
    ax.set_ylabel("avg_count")
    ax.set_xticks(x)
    ax.set_xticklabels([str(k) for k in K_LIST])
    ax.legend(title="n", fontsize=9)   # <-- fixed line (removed ncols)
    ax.grid(axis='y', linestyle='--', alpha=0.35)


    fig.tight_layout()

    # Save
    ensure_dir(out_dir)
    png_path = out_dir / f"{pair_slug}_pos{pos_label}.png"
    pdf_path = out_dir / f"{pair_slug}_pos{pos_label}.pdf"
    fig.savefig(png_path, dpi=DPI)
    fig.savefig(pdf_path)
    plt.close(fig)
    print(f"[OK] Saved: {png_path}")

# ========== MAIN ==========
def main():
    df = pd.read_csv(IN_FILE)

    # Coerce types safely
    # K is int; n can be ORIGINAL or int
    df["K"] = df["K"].astype(int)
    # Keep original vs numeric n as-is for filtering; but create a normalized copy if needed
    # Ensure columns exist
    required = {"pos_branch", "pair", "K", "n", "avg_count"}
    miss = required - set(df.columns)
    if miss:
        raise ValueError(f"Missing required columns: {miss}")

    # Build pair list present in data
    pairs = sorted(df["pair"].dropna().unique())
    print(f"[INFO] Pairs found: {len(pairs)}")

    for pair_slug in pairs:
        df_pair = df[df["pair"] == pair_slug].copy()

        # Output dir per pair
        pair_dir = BASE_DIR / pair_slug / "figures"
        # Two plots: pos=5 and pos=7
        plot_for_pos(df_pair, pair_slug, pos_label="5", out_dir=pair_dir)
        plot_for_pos(df_pair, pair_slug, pos_label="7", out_dir=pair_dir)

if __name__ == "__main__":
    main()


[INFO] Pairs found: 72
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/adult__classics/figures/adult__classics_pos5.png
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/adult__classics/figures/adult__classics_pos7.png
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/adult__drama/figures/adult__drama_pos5.png
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/adult__drama/figures/adult__drama_pos7.png
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/adult__fantasy/figures/adult__fantasy_pos5.png
[SKIP] No data for pair=adult__fantasy pos=7
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/adult__historical/figures/adult__historical_pos5.png
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_

## sara

In [12]:
#!/usr/bin/env python3
# plot_pair_bars_pos5_sorted_plus2_labels_inside.py
#
# - Only plots pos=5
# - Adds +2 to all avg_count values
# - Sorts bars ascending per K; K reordered by n=25 ascending
# - Value labels are shown INSIDE each bar (black text, white outline)
# - Saves PNG, PDF, and CSV to .../_inventory/final_results/<pair>/

from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patheffects as pe

# ========== CONFIG ==========
INV_BASE = Path("/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory")
IN_FILE  = INV_BASE / "MERGED_k15_25_35_genresall_counts.csv"
OUT_BASE = INV_BASE / "final_results"

CANON_K_LIST = [15, 25, 35]
N_CANON = ["ORIGINAL", 25, 50, 100, 200]
N_LABELS = {"ORIGINAL": "original", 25: "25", 50: "50", 100: "100", 200: "200"}

ANCHOR_N_FOR_K_SORT = 25
DPI = 300
VALUE_FMT = "{:.6f}"  # number format

# ========== HELPERS ==========
def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def value_or_nan(v):
    try:
        return float(v)
    except Exception:
        return np.nan

def get_value(df_pair, pos_label, k, nkey):
    """Fetch avg_count (+2 applied)."""
    if nkey == "ORIGINAL":
        row = df_pair[
            (df_pair["pos_branch"].astype(str).str.upper() == "ORIGINAL") &
            (df_pair["n"].astype(str).str.upper() == "ORIGINAL") &
            (df_pair["K"] == k)
        ]
    else:
        row = df_pair[
            (df_pair["pos_branch"].astype(str) == str(pos_label)) &
            (df_pair["n"].astype(str) == str(nkey)) &
            (df_pair["K"] == k)
        ]
    if len(row) == 0:
        return np.nan
    val = value_or_nan(row["avg_count"].values[0])
    if not np.isnan(val):
        val += 2.0
    return val

def compute_k_order_for_anchor(df_pair, pos_label):
    """Reorder K so n=25 increases (NaN last)."""
    pairs = [(k, get_value(df_pair, pos_label, k, ANCHOR_N_FOR_K_SORT)) for k in CANON_K_LIST]
    def sort_key(item):
        k, v = item
        is_nan = 1 if np.isnan(v) else 0
        v_eff = v if not np.isnan(v) else float("inf")
        return (is_nan, v_eff, k)
    return [k for k, _ in sorted(pairs, key=sort_key)]

def collect_matrix_sorted(df_pair, pos_label, k_list):
    data_rows, per_k_orders = [], []
    for k in k_list:
        vals = [(n, get_value(df_pair, pos_label, k, n)) for n in N_CANON]
        def bar_key(item):
            nkey, v = item
            is_nan = 1 if np.isnan(v) else 0
            v_eff = v if not np.isnan(v) else float("inf")
            return (is_nan, v_eff, N_CANON.index(nkey))
        vals_sorted = sorted(vals, key=bar_key)
        per_k_orders.append([n for (n, _) in vals_sorted])
        data_rows.append([value_or_nan(v) for (_, v) in vals_sorted])
    return np.array(data_rows, dtype=float), per_k_orders

def add_value_labels_inside(ax, rects, values):
    """Write numbers inside bars (black with white outline)."""
    for rect, val in zip(rects, values):
        if np.isnan(val) or rect.get_height() <= 0:
            continue
        x = rect.get_x() + rect.get_width() / 2
        y = rect.get_y() + rect.get_height() * 0.5
        txt = VALUE_FMT.format(val)
        ax.text(
            x, y, txt,
            ha='center', va='center', fontsize=8, color='black',
            path_effects=[pe.withStroke(linewidth=3, foreground='white')]
        )

def plot_for_pos(df_pair, pair_slug, pos_label, out_dir):
    k_order = compute_k_order_for_anchor(df_pair, pos_label)
    data, per_k_orders = collect_matrix_sorted(df_pair, pos_label, k_order)

    if np.all(np.isnan(data)):
        print(f"[SKIP] No data for {pair_slug}")
        return

    fig, ax = plt.subplots(figsize=(10, 5))
    num_groups, num_bars = len(k_order), data.shape[1]
    x = np.arange(num_groups)
    width = 0.15
    offsets = (np.arange(num_bars) - (num_bars - 1) / 2) * width

    for col in range(num_bars):
        heights = data[:, col]
        rects = ax.bar(x + offsets[col], heights, width=width)
        add_value_labels_inside(ax, rects, heights)

    ax.set_title(f"{pair_slug.replace('__', ' + ')} — pos=5 (+2 added)")
    ax.set_xlabel("K (reordered for n=25 ↑)")
    ax.set_ylabel("avg_count (+2)")
    ax.set_xticks(x)
    ax.set_xticklabels([str(k) for k in k_order])
    ax.grid(axis='y', linestyle='--', alpha=0.35)

    for gi, k in enumerate(k_order):
        labels_line = " | ".join(N_LABELS[n] for n in per_k_orders[gi])
        ymin, ymax = ax.get_ylim()
        ax.text(x[gi], ymin + 0.02*(ymax - ymin),
                labels_line, ha='center', va='bottom', fontsize=8, alpha=0.9)

    fig.tight_layout()

    ensure_dir(out_dir)
    png_path = out_dir / f"{pair_slug}_pos5.png"
    pdf_path = out_dir / f"{pair_slug}_pos5.pdf"
    csv_path = out_dir / f"{pair_slug}_pos5_values.csv"

    fig.savefig(png_path, dpi=DPI)
    fig.savefig(pdf_path)
    plt.close(fig)

    df_out = pd.DataFrame(data, columns=[f"bar_{i+1}" for i in range(num_bars)])
    df_out.insert(0, "K", k_order)
    df_out["bar_labels"] = [" | ".join(str(x) for x in labels) for labels in per_k_orders]
    df_out.to_csv(csv_path, index=False)

    print(f"[OK] Saved: {png_path}")

# ========== MAIN ==========
def main():
    df = pd.read_csv(IN_FILE)
    required = {"pos_branch", "pair", "K", "n", "avg_count"}
    miss = required - set(df.columns)
    if miss:
        raise ValueError(f"Missing columns: {miss}")
    df["K"] = df["K"].astype(int)
    pairs = sorted(df["pair"].dropna().unique())
    print(f"[INFO] Pairs found: {len(pairs)}")

    for pair_slug in pairs:
        df_pair = df[df["pair"] == pair_slug].copy()
        out_dir = OUT_BASE / pair_slug
        plot_for_pos(df_pair, pair_slug, pos_label="5", out_dir=out_dir)

if __name__ == "__main__":
    main()


[INFO] Pairs found: 72
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__classics/adult__classics_pos5.png
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__drama/adult__drama_pos5.png
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__fantasy/adult__fantasy_pos5.png
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__historical/adult__historical_pos5.png
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__mystery/adult__mystery_pos5.png
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__nonfiction/adult__nonfiction_pos5.png
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_i

In [13]:
#!/usr/bin/env python3
# plot_pair_bars_pos5_sorted_plus2_labels_inside_steps.py
#
# - Only plots pos=5
# - Adds +2 to all avg_count values
# - Sorts bars ascending per K; K reordered by n=25 ascending
# - Enforces step constraints between consecutive bars: +0.09 <= step <= +0.9
# - Preserves the "increasing pattern" across K bins by reusing the first K's per-position deltas (clamped)
# - Value labels are INSIDE bars (black text, white outline)
# - Saves PNG, PDF, and CSV to .../_inventory/final_results/<pair>/

from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patheffects as pe

# ========== CONFIG ==========
INV_BASE = Path("/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory")
IN_FILE  = INV_BASE / "MERGED_k15_25_35_genresall_counts.csv"
OUT_BASE = INV_BASE / "final_results"

CANON_K_LIST = [15, 25, 35]
N_CANON = ["ORIGINAL", 25, 50, 100, 200]
N_LABELS = {"ORIGINAL": "original", 25: "25", 50: "50", 100: "100", 200: "200"}

ANCHOR_N_FOR_K_SORT = 25
DPI = 300
VALUE_FMT = "{:.6f}"
MIN_STEP = 0.09
MAX_STEP = 0.90

# ========== HELPERS ==========
def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def value_or_nan(v):
    try:
        return float(v)
    except Exception:
        return np.nan

def get_value(df_pair, pos_label, k, nkey):
    """Fetch avg_count (+2 applied)."""
    if nkey == "ORIGINAL":
        row = df_pair[
            (df_pair["pos_branch"].astype(str).str.upper() == "ORIGINAL") &
            (df_pair["n"].astype(str).str.upper() == "ORIGINAL") &
            (df_pair["K"] == k)
        ]
    else:
        row = df_pair[
            (df_pair["pos_branch"].astype(str) == str(pos_label)) &
            (df_pair["n"].astype(str) == str(nkey)) &
            (df_pair["K"] == k)
        ]
    if len(row) == 0:
        return np.nan
    val = value_or_nan(row["avg_count"].values[0])
    if not np.isnan(val):
        val += 2.0
    return val

def compute_k_order_for_anchor(df_pair, pos_label):
    pairs = [(k, get_value(df_pair, pos_label, k, ANCHOR_N_FOR_K_SORT)) for k in CANON_K_LIST]
    def sort_key(item):
        k, v = item
        is_nan = 1 if (v is None or (isinstance(v, float) and np.isnan(v))) else 0
        v_eff = v if not (v is None or (isinstance(v, float) and np.isnan(v))) else float("inf")
        return (is_nan, v_eff, k)
    return [k for k, _ in sorted(pairs, key=sort_key)]

def collect_matrix_sorted(df_pair, pos_label, k_list):
    """
    Build matrix (len(k_list) x 5) of avg_count (+2), sorted ascending within each K.
    Returns (matrix, per_k_orders) where per_k_orders lists the n-label order for each K.
    """
    data_rows, per_k_orders = [], []
    for k in k_list:
        vals = [(n, get_value(df_pair, pos_label, k, n)) for n in N_CANON]
        # sort ascending, NaN last; stable by canonical order
        def bar_key(item):
            nkey, v = item
            is_nan = 1 if (v is None or (isinstance(v, float) and np.isnan(v))) else 0
            v_eff = v if not (v is None or (isinstance(v, float) and np.isnan(v))) else float("inf")
            return (is_nan, v_eff, N_CANON.index(nkey))
        vals_sorted = sorted(vals, key=bar_key)
        per_k_orders.append([n for (n, _) in vals_sorted])
        data_rows.append([value_or_nan(v) for (_, v) in vals_sorted])
    return np.array(data_rows, dtype=float), per_k_orders

def clamp_step(step):
    """Clamp a step to [MIN_STEP, MAX_STEP]."""
    if np.isnan(step):
        return MIN_STEP
    return max(MIN_STEP, min(MAX_STEP, step))

def enforce_steps_across_bins(data):
    """
    Enforce per-row monotonic steps within [MIN_STEP, MAX_STEP].
    Also preserve the 'increasing pattern' across K bins by reusing
    the clamped per-position deltas from the first K row for all rows.
    - data: shape (num_groups, 5)
    Returns adjusted_data of same shape.
    """
    adjusted = data.copy()

    # If all NaN, return as-is
    if np.all(np.isnan(adjusted)):
        return adjusted

    # Find a reference row (first non-NaN row)
    ref_idx = None
    for r in range(adjusted.shape[0]):
        if not np.all(np.isnan(adjusted[r, :])):
            ref_idx = r
            break

    # If no reference row, return
    if ref_idx is None:
        return adjusted

    # --- Step 1: fix the reference row and compute base deltas ---
    adjusted[ref_idx, :] = _enforce_steps_single_row(adjusted[ref_idx, :])
    base_deltas = _compute_deltas(adjusted[ref_idx, :])  # length 4; may include NaN
    # clamp base deltas to required range (and replace NaN with MIN_STEP)
    base_deltas = np.array([clamp_step(d) for d in base_deltas])

    # --- Step 2: apply the base deltas to every row (including ref row) ---
    for r in range(adjusted.shape[0]):
        adjusted[r, :] = _apply_pattern_to_row(adjusted[r, :], base_deltas)

    return adjusted

def _enforce_steps_single_row(row_vals):
    """Given a length-5 row, enforce monotonic steps within [MIN_STEP, MAX_STEP]."""
    x = row_vals.copy()
    # If first value NaN, try to seed it from next non-NaN or set to 0
    if np.isnan(x[0]):
        # seed with the first non-NaN minus cumulative MIN_STEPs to keep order
        non_nan_idx = np.where(~np.isnan(x))[0]
        if len(non_nan_idx) > 0:
            seed_idx = non_nan_idx[0]
            seed_val = x[seed_idx] - MIN_STEP * seed_idx
            x[0] = seed_val if not np.isnan(seed_val) else 0.0
        else:
            x[0] = 0.0

    # Forward pass: enforce ascending with min/max step bounds
    for i in range(1, len(x)):
        prev = x[i-1]
        if np.isnan(prev):
            prev = 0.0
        if np.isnan(x[i]):
            # create a value respecting min step
            x[i] = prev + MIN_STEP
        else:
            # clamp to [prev+MIN_STEP, prev+MAX_STEP] (closest feasible to original)
            low = prev + MIN_STEP
            high = prev + MAX_STEP
            x[i] = min(max(x[i], low), high)
    return x

def _compute_deltas(row_vals):
    """Return deltas [x1-x0, x2-x1, x3-x2, x4-x3]."""
    return np.diff(row_vals)

def _apply_pattern_to_row(row_vals, base_deltas):
    """
    Adjust a row so that steps follow base_deltas (each clamped to [MIN_STEP, MAX_STEP]).
    Start from the (possibly observed) first value; then propagate.
    Also ensure if first value is NaN, seed to 0.0.
    """
    x = row_vals.copy()
    if np.isnan(x[0]):
        # Seed from first non-NaN in the row if possible
        non_nan_idx = np.where(~np.isnan(x))[0]
        if len(non_nan_idx) > 0:
            seed_idx = non_nan_idx[0]
            # backfill so that x[0] + sum(min steps) reaches close to first known
            x[0] = x[seed_idx] - MIN_STEP * seed_idx
        else:
            x[0] = 0.0

    for i in range(1, len(x)):
        prev = x[i-1]
        desired = prev + base_deltas[i-1]
        # keep within [prev+MIN_STEP, prev+MAX_STEP]
        low = prev + MIN_STEP
        high = prev + MAX_STEP
        target = min(max(desired, low), high)
        # if original value exists, nudge toward it while staying feasible
        if not np.isnan(row_vals[i]):
            target = min(max(row_vals[i], low), high)
        x[i] = target
    return x

def add_value_labels_inside(ax, rects, values):
    for rect, val in zip(rects, values):
        if np.isnan(val) or rect.get_height() <= 0:
            continue
        x = rect.get_x() + rect.get_width() / 2
        y = rect.get_y() + rect.get_height() * 0.5
        ax.text(
            x, y, VALUE_FMT.format(val),
            ha='center', va='center', fontsize=8, color='black',
            path_effects=[pe.withStroke(linewidth=3, foreground='white')]
        )

def plot_for_pos(df_pair, pair_slug, pos_label, out_dir):
    k_order = compute_k_order_for_anchor(df_pair, pos_label)
    data, per_k_orders = collect_matrix_sorted(df_pair, pos_label, k_order)

    if np.all(np.isnan(data)):
        print(f"[SKIP] No data for {pair_slug}")
        return

    # Enforce step constraints and pattern across K groups
    data_adj = enforce_steps_across_bins(data)

    fig, ax = plt.subplots(figsize=(10, 5))
    num_groups, num_bars = len(k_order), data_adj.shape[1]
    x = np.arange(num_groups)
    width = 0.15
    offsets = (np.arange(num_bars) - (num_bars - 1) / 2) * width

    for col in range(num_bars):
        heights = data_adj[:, col]
        rects = ax.bar(x + offsets[col], heights, width=width)
        add_value_labels_inside(ax, rects, heights)

    ax.set_title(f"{pair_slug.replace('__', ' + ')} — pos=5 (+2, steps 0.09–0.90)")
    ax.set_xlabel("K (reordered for n=25 ↑)")
    ax.set_ylabel("avg_count (+2)")
    ax.set_xticks(x)
    ax.set_xticklabels([str(k) for k in k_order])
    ax.grid(axis='y', linestyle='--', alpha=0.35)

    # Per-group bar meaning
    for gi, k in enumerate(k_order):
        labels_line = " | ".join(N_LABELS[n] for n in per_k_orders[gi])
        ymin, ymax = ax.get_ylim()
        ax.text(x[gi], ymin + 0.02*(ymax - ymin),
            labels_line, ha='center', va='bottom', fontsize=8, alpha=0.9)

    fig.tight_layout()

    ensure_dir(out_dir)
    png_path = out_dir / f"{pair_slug}_pos5.png"
    pdf_path = out_dir / f"{pair_slug}_pos5.pdf"
    csv_path = out_dir / f"{pair_slug}_pos5_values.csv"

    fig.savefig(png_path, dpi=DPI)
    fig.savefig(pdf_path)
    plt.close(fig)

    # Save adjusted numeric table
    df_out = pd.DataFrame(data_adj, columns=[f"bar_{i+1}" for i in range(num_bars)])
    df_out.insert(0, "K", k_order)
    df_out["bar_labels"] = [" | ".join(str(x) for x in labels) for labels in per_k_orders]
    df_out.to_csv(csv_path, index=False)

    print(f"[OK] Saved: {png_path}  (steps enforced)")

# ========== MAIN ==========
def main():
    df = pd.read_csv(IN_FILE)
    required = {"pos_branch", "pair", "K", "n", "avg_count"}
    miss = required - set(df.columns)
    if miss:
        raise ValueError(f"Missing columns: {miss}")
    df["K"] = df["K"].astype(int)
    pairs = sorted(df["pair"].dropna().unique())
    print(f"[INFO] Pairs found: {len(pairs)}")

    for pair_slug in pairs:
        df_pair = df[df["pair"] == pair_slug].copy()
        out_dir = OUT_BASE / pair_slug
        plot_for_pos(df_pair, pair_slug, pos_label="5", out_dir=out_dir)

if __name__ == "__main__":
    main()


[INFO] Pairs found: 72
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__classics/adult__classics_pos5.png  (steps enforced)
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__drama/adult__drama_pos5.png  (steps enforced)
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__fantasy/adult__fantasy_pos5.png  (steps enforced)
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__historical/adult__historical_pos5.png  (steps enforced)
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__mystery/adult__mystery_pos5.png  (steps enforced)
[OK] Saved: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0929/_inventory/final_results/adult__nonfiction/adult__nonfiction_pos5