In [2]:
import os
import pandas as pd

In [33]:
df = pd.read_csv('/home/moshtasa/Research/phd-svd-recsys/SVD/Book/data/df_final_with_genres.csv')

In [34]:
df

Unnamed: 0,user_id,book_id,rating,decade,original_title,authors,genres
0,1,258,5,2000,La sombra del viento,"Carlos Ruiz Zafón, Lucia Graves","Mystery, Historical"
1,2,4081,4,2000,,,
2,2,260,5,1930,How to Win Friends and Influence People,Dale Carnegie,"Nonfiction, Drama"
3,2,9296,5,1970,Das Drama des begabten Kindes und die Suche na...,"Alice Miller, Ruth Ward","Horror, Mystery"
4,2,2318,3,1990,The Millionaire Next Door: The Surprising Secr...,"Thomas J. Stanley, William D. Danko","Nonfiction, Drama"
...,...,...,...,...,...,...,...
5976474,49925,510,5,1990,The Great Hunt,Robert Jordan,"Fantasy, Adventure"
5976475,49925,528,4,1990,The Dragon Reborn,Robert Jordan,"Classics, Drama"
5976476,49925,722,4,1990,The Shadow Rising,Robert Jordan,"Adventure, Drama"
5976477,49925,949,5,1990,The Fires of Heaven,Robert Jordan,"Fantasy, Adventure"


In [36]:
import pandas as pd
import numpy as np

# --- Your canonical 13 genres, fixed order for rows/cols ---
GENRES_13 = [
    "Adult", "Adventure", "Children's", "Classics", "Drama",
    "Fantasy", "Historical", "Horror", "Mystery", "Nonfiction",
    "Romance", "Science Fiction", "Thriller"
]

def parse_genres(s):
    if pd.isna(s):
        return []
    return [g.strip() for g in str(s).split(",") if g.strip()]

# 1) Collapse to UNIQUE books and keep one ordered genre list per book
#    (first non-null genres string per book_id, preserving its order)
books = (
    df[['book_id', 'genres']]
      .dropna(subset=['book_id'])
      .sort_values('book_id')
      .drop_duplicates(subset=['book_id'], keep='first')
      .copy()
)

# Parse + filter to canonical 13, preserving order within each book
books['genre_list'] = books['genres'].apply(parse_genres).apply(
    lambda gl: [g for g in gl if g in GENRES_13]
)
books = books[books['genre_list'].map(len) > 0].copy()

# 2) Count and print how many UNIQUE canonical genres are actually present in df
present = []
for gl in books['genre_list']:
    present.extend(gl)
present = sorted(set(present), key=lambda g: GENRES_13.index(g))

print(f"Unique genres present (from canonical 13): {len(present)} / 13")
print(present)

# 3) Build an ORDER-SENSITIVE co-occurrence matrix (Gi -> Gj), Gi != Gj only
idx = {g: i for i, g in enumerate(GENRES_13)}
n = len(GENRES_13)
co_mat = np.zeros((n, n), dtype=int)

for gl in books['genre_list']:
    # First occurrence positions per genre in this book (preserve order)
    pos = {}
    for k, g in enumerate(gl):
        if g not in pos:
            pos[g] = k

    # Count ordered pairs where Gi occurs BEFORE Gj (Gi != Gj). No diagonal.
    genres_in_book = list(pos.keys())
    for gi in genres_in_book:
        for gj in genres_in_book:
            if gi == gj:
                continue
            if pos[gi] < pos[gj]:
                co_mat[idx[gi], idx[gj]] += 1

# Wrap into a 13x13 DataFrame with fixed order (diagonal is guaranteed 0)
co_df = pd.DataFrame(co_mat, index=GENRES_13, columns=GENRES_13)

# Show a quick peek (optional)
display(co_df)

# Optional: save to CSV
# co_df.to_csv("ordered_genre_cooccurrence_unique_books_13x13.csv", index=True)


Unique genres present (from canonical 13): 13 / 13
['Adult', 'Adventure', "Children's", 'Classics', 'Drama', 'Fantasy', 'Historical', 'Horror', 'Mystery', 'Nonfiction', 'Romance', 'Science Fiction', 'Thriller']


Unnamed: 0,Adult,Adventure,Children's,Classics,Drama,Fantasy,Historical,Horror,Mystery,Nonfiction,Romance,Science Fiction,Thriller
Adult,0,0,0,4,91,0,2,0,3,6,0,0,0
Adventure,0,0,9,46,35,6,17,1,27,14,1,6,23
Children's,0,284,0,137,93,66,30,4,56,11,13,0,0
Classics,1,17,5,0,286,11,10,6,23,25,7,1,0
Drama,53,16,1,83,0,1,21,0,26,18,8,0,2
Fantasy,2,812,99,14,98,0,35,193,236,16,232,17,40
Historical,3,31,9,14,286,4,0,1,38,44,59,1,7
Horror,0,6,8,15,33,42,0,0,194,3,23,6,97
Mystery,5,45,11,37,246,14,67,38,0,3,50,0,799
Nonfiction,18,62,6,88,585,7,35,1,19,0,10,41,6


In [37]:
import pandas as pd
import numpy as np
from itertools import combinations

# --- Fixed 13-genre vocabulary (rows/cols will be exactly these 13) ---
GENRES_13 = [
    "Adult", "Adventure", "Children's", "Classics", "Drama",
    "Fantasy", "Historical", "Horror", "Mystery", "Nonfiction",
    "Romance", "Science Fiction", "Thriller"
]

def parse_genres(s: str):
    if pd.isna(s):
        return []
    return [g.strip() for g in str(s).split(",") if g.strip()]

# 1) Collapse to UNIQUE books; keep one genres string per book_id
books = (
    df[['book_id', 'genres']]
      .dropna(subset=['book_id'])
      .sort_values('book_id')
      .drop_duplicates(subset=['book_id'], keep='first')
      .copy()
)

# 2) Parse & filter to canonical 13; also de-duplicate within a book
def to_canonical_set(gstr):
    glist = [g for g in parse_genres(gstr) if g in GENRES_13]
    # de-duplicate while (optionally) preserving first-seen order
    return list(dict.fromkeys(glist))

books['genre_list'] = books['genres'].apply(to_canonical_set)
books = books[books['genre_list'].map(len) > 0].copy()

# 3) Report how many of the canonical 13 actually appear
present = sorted(set(g for gl in books['genre_list'] for g in gl), key=lambda g: GENRES_13.index(g))
print(f"Unique genres present (from canonical 13): {len(present)} / 13")
print(present)

# 4) Build a SYMMETRIC co-occurrence matrix over UNIQUE books
#    Cell (Gi, Gj) = number of UNIQUE books that contain BOTH Gi and Gj (order-agnostic)
idx = {g: i for i, g in enumerate(GENRES_13)}
n = len(GENRES_13)
co_mat = np.zeros((n, n), dtype=int)

for gl in books['genre_list']:
    s = list(set(gl))           # ensure uniqueness before making pairs
    for g1, g2 in combinations(s, 2):
        i, j = idx[g1], idx[g2]
        # increment both directions to enforce symmetry
        co_mat[i, j] += 1
        co_mat[j, i] += 1

# Zero the diagonal (no GiGi counts)
np.fill_diagonal(co_mat, 0)

# 5) Wrap in DataFrame
co_df = pd.DataFrame(co_mat, index=GENRES_13, columns=GENRES_13)

# Peek / use
display(co_df)

# Optional: save
# co_df.to_csv("symmetric_genre_cooccurrence_unique_books_13x13.csv", index=True)


Unique genres present (from canonical 13): 13 / 13
['Adult', 'Adventure', "Children's", 'Classics', 'Drama', 'Fantasy', 'Historical', 'Horror', 'Mystery', 'Nonfiction', 'Romance', 'Science Fiction', 'Thriller']


Unnamed: 0,Adult,Adventure,Children's,Classics,Drama,Fantasy,Historical,Horror,Mystery,Nonfiction,Romance,Science Fiction,Thriller
Adult,0,0,0,5,144,2,5,0,8,24,139,0,4
Adventure,0,0,293,63,51,818,48,7,72,76,19,264,78
Children's,0,293,0,142,94,165,39,12,67,17,22,12,0
Classics,5,63,142,0,369,25,24,21,60,113,44,35,0
Drama,144,51,94,369,0,99,307,33,272,603,941,65,28
Fantasy,2,818,165,25,99,0,39,235,250,23,351,41,40
Historical,5,48,39,24,307,39,0,1,105,79,189,8,13
Horror,0,7,12,21,33,235,1,0,232,4,55,46,123
Mystery,8,72,67,60,272,250,105,232,0,22,257,130,1088
Nonfiction,24,76,17,113,603,23,79,4,22,0,30,69,11


In [40]:
import re
import pandas as pd

# collapse to unique books
books_raw = (
    df[['book_id', 'original_title', 'authors', 'genres']]
      .dropna(subset=['book_id'])
      .sort_values('book_id')
      .drop_duplicates(subset=['book_id'], keep='first')
      .copy()
)

def split_genres(s):
    if pd.isna(s):
        return []
    return [g.strip() for g in str(s).split(",") if g.strip()]

tmp = books_raw.dropna(subset=['genres']).copy()
tmp['genre_list_raw'] = tmp['genres'].apply(split_genres)

def has_both(gl, a, b):
    s = set(gl)
    return a in s and b in s

pair = ("Adventure", "Fantasy")
mask_pair = tmp['genre_list_raw'].apply(lambda gl: has_both(gl, *pair))
subset = tmp[mask_pair]

print(f"Books with BOTH {pair[0]} and {pair[1]}:", len(subset))
display(subset[['book_id','original_title','authors','genres']].head(20))


Books with BOTH Adventure and Fantasy: 818


Unnamed: 0,book_id,original_title,authors,genres
3246005,6,The Fault in Our Stars,John Green,"Fantasy, Adventure"
1635898,19,The Fellowship of the Ring,J.R.R. Tolkien,"Fantasy, Adventure"
123194,21,Harry Potter and the Order of the Phoenix,"J.K. Rowling, Mary GrandPré","Fantasy, Adventure"
3940906,23,Harry Potter and the Chamber of Secrets,"J.K. Rowling, Mary GrandPré","Fantasy, Adventure"
2917592,24,Harry Potter and the Goblet of Fire,"J.K. Rowling, Mary GrandPré","Fantasy, Adventure"
2740016,25,Harry Potter and the Deathly Hallows,"J.K. Rowling, Mary GrandPré","Fantasy, Adventure"
3034951,27,Harry Potter and the Half-Blood Prince,"J.K. Rowling, Mary GrandPré","Fantasy, Adventure"
3679511,36,The Giver,Lois Lowry,"Fantasy, Adventure"
576279,38,The Time Traveler's Wife,Audrey Niffenegger,"Fantasy, Adventure"
427492,41,The Lightning Thief,Rick Riordan,"Fantasy, Adventure"


In [46]:
import pandas as pd
from itertools import combinations

# --- Parse genres, preserving order and de-duping per book ---
def parse_genres(s):
    if pd.isna(s):
        return []
    # keep order, remove empty parts
    parts = [p.strip() for p in str(s).split(",") if p.strip()]
    # de-duplicate while preserving first occurrence order
    seen = set()
    out = []
    for p in parts:
        if p not in seen:
            out.append(p)
            seen.add(p)
    return out

# 1) Unique books with ordered genre lists
books = (
    df[['book_id', 'genres']]
      .dropna(subset=['book_id'])
      .drop_duplicates(subset=['book_id'], keep='first')
      .copy()
)
books['genre_list'] = books['genres'].apply(parse_genres)
books = books[books['genre_list'].map(len) > 0].copy()

# 2) Discover all genres from the dataset
GENRES = sorted({g for gl in books['genre_list'] for g in gl})

# 3) Build a per-book position map for quick order comparisons
#    pos_maps[i] is a dict: {genre: first_position_in_list}
pos_maps = []
for gl in books['genre_list']:
    pos = {}
    for idx, g in enumerate(gl):
        if g not in pos:
            pos[g] = idx
    pos_maps.append(pos)

# 4) Count for every pair:
#    - total_both: books containing BOTH genres (order-agnostic)
#    - g1_before_g2: Gi appears before Gj
#    - g2_before_g1: Gj appears before Gi
def count_pair(g1, g2):
    total_both = 0
    g1_before_g2 = 0
    g2_before_g1 = 0
    for pos in pos_maps:
        if g1 in pos and g2 in pos:
            total_both += 1
            if pos[g1] < pos[g2]:
                g1_before_g2 += 1
            elif pos[g2] < pos[g1]:
                g2_before_g1 += 1
            # (equal won't happen because we de-duplicated within a book)
    return total_both, g1_before_g2, g2_before_g1

# 5) Print in the requested format for ALL pairs (once per unordered pair)
for i, g1 in enumerate(GENRES):
    for j in range(i+1, len(GENRES)):
        g2 = GENRES[j]
        total_both, g1_before_g2, g2_before_g1 = count_pair(g1, g2)
        # Exact format you asked for:
        print(f"Total books in {g1}, {g2}: {total_both}")
        print(f"{g1.lower()}, {g2.lower()}: {g1_before_g2}")
        print(f"{g2.lower()}, {g1.lower()}: {g2_before_g1}")


Total books in Adult, Adventure: 0
adult, adventure: 0
adventure, adult: 0
Total books in Adult, Children's: 0
adult, children's: 0
children's, adult: 0
Total books in Adult, Classics: 5
adult, classics: 4
classics, adult: 1
Total books in Adult, Drama: 144
adult, drama: 91
drama, adult: 53
Total books in Adult, Fantasy: 2
adult, fantasy: 0
fantasy, adult: 2
Total books in Adult, Historical: 5
adult, historical: 2
historical, adult: 3
Total books in Adult, Horror: 0
adult, horror: 0
horror, adult: 0
Total books in Adult, Mystery: 8
adult, mystery: 3
mystery, adult: 5
Total books in Adult, Nonfiction: 24
adult, nonfiction: 6
nonfiction, adult: 18
Total books in Adult, Romance: 139
adult, romance: 0
romance, adult: 139
Total books in Adult, Science Fiction: 0
adult, science fiction: 0
science fiction, adult: 0
Total books in Adult, Thriller: 4
adult, thriller: 0
thriller, adult: 4
Total books in Adventure, Children's: 293
adventure, children's: 9
children's, adventure: 284
Total books in

## adding 

In [None]:
#!/usr/bin/env python3
# build_pair_bias_pos5and7_neg0.py
# For each unordered genre pair (G1, G2):
#   - positives: all UNIQUE books containing BOTH G1 and G2, rated POS_RATING ∈ {5, 7}
#   - negatives: optional (all or sampled from remaining books)
#   - add RUN_USERS fictitious users per pair
# Writes two parallel trees:
#   .../0929/PAIR_INJECTION/5/...
#   .../0929/PAIR_INJECTION/7/...

import os
import re
import random
import pandas as pd
from itertools import combinations
from pathlib import Path

# ========= CONFIG =========
BASE_DIR      = Path("/home/moshtasa/Research/phd-svd-recsys/SVD/Book")
INPUT_CSV     = BASE_DIR / "data/df_final_with_genres.csv"   # requires: user_id, book_id, rating, genres

# Root output; script will create "5" and "7" subfolders automatically
BASE_OUT_DIR  = BASE_DIR / "result/rec/top_re/0929/PAIR_INJECTION"

GENRE_COL     = "genres"
USER_COL      = "user_id"
BOOK_COL      = "book_id"
RATING_COL    = "rating"

RUN_USERS     = [25, 50, 100, 200]   # number of synthetic users per pair (each variant)
NEG_RATING    = 0

# ---- NEGATIVE assignment mode ----
# "none"   → no negatives (only positives for the pair)
# "all"    → rate EVERY non-pair book as 0  (huge files)
# "sample" → sample a subset of non-pair books per pair
ZERO_MODE     = "sample"
NEG_RATIO     = 4                     # when ZERO_MODE="sample": negatives per user ≈ NEG_RATIO * (#positives)
RNG_SEED      = 42                    # deterministic sampling
# ================================

def sanitize_fn(s: str) -> str:
    s = (s or "").strip().replace(" ", "_")
    return re.sub(r"[^0-9A-Za-z_]+", "_", s) or "UNK"

def parse_genres(cell: str):
    if not isinstance(cell, str) or not cell.strip():
        return []
    parts = [p.strip() for p in cell.split(",") if p.strip()]
    # de-duplicate while preserving order
    seen, out = set(), []
    for p in parts:
        if p not in seen:
            out.append(p); seen.add(p)
    return out

def prepare_books(df: pd.DataFrame):
    books = df[[BOOK_COL, GENRE_COL]].drop_duplicates(subset=[BOOK_COL]).copy()
    books["genre_list"] = books[GENRE_COL].apply(parse_genres)
    books = books[books["genre_list"].map(len) > 0].copy()
    return books

def run_for_pos(df: pd.DataFrame, pos_rating: int, base_start_uid: int):
    """
    Generate files for a given positive rating (5 or 7).
    Uses separate output folder and user-id block space.
    """
    # Output dirs & logs
    out_dir = BASE_OUT_DIR / f"{pos_rating}"
    out_dir.mkdir(parents=True, exist_ok=True)
    summary_txt = out_dir / "summary.txt"
    summary_csv = out_dir / "summary.csv"

    # Build per-book genre info
    books = prepare_books(df)
    GENRES = sorted({g for gl in books["genre_list"] for g in gl})
    all_books = sorted(books[BOOK_COL].astype(int).unique().tolist())
    book_to_genres = dict(books[[BOOK_COL, GENRE_COL]].values)
    book_to_set = dict(zip(books[BOOK_COL].astype(int), books["genre_list"].apply(set)))

    baseline_users = df[USER_COL].nunique()
    baseline_rows  = len(df)

    # user id allocation:
    # - BLOCK separates different (pair, run_size) buckets
    # - POS_OFFSET separates pos=5 vs pos=7 spaces
    BLOCK = 1_000_000
    POS_OFFSET = 0 if pos_rating == 5 else 10_000_000  # keep far apart

    rows_summary = []
    with open(summary_txt, "w", encoding="utf-8") as log:
        log.write("=== BASELINE ===\n")
        log.write(f"👤 Unique users: {baseline_users:,}\n")
        log.write(f"🧾 Rows: {baseline_rows:,}\n")
        log.write(f"🔢 Synthetic user_id base start: {base_start_uid + POS_OFFSET}\n")
        log.write(f"Discovered genres ({len(GENRES)}): {GENRES}\n")
        log.write(f"POS_RATING={pos_rating} | ZERO_MODE={ZERO_MODE} | NEG_RATIO={NEG_RATIO} | RNG_SEED={RNG_SEED}\n")
        log.write("="*80 + "\n\n")

    grand_added = 0
    made_any = False
    pair_index = 0  # increments per unordered pair (g1,g2)

    # Iterate per unordered pair and per RUN size
    for g1, g2 in combinations(GENRES, 2):
        # books that have BOTH genres
        pos_books = [int(b) for b in all_books if g1 in book_to_set[b] and g2 in book_to_set[b]]
        n_pos = len(pos_books)

        if n_pos == 0:
            msg = f"okay! we dont have any pair of {g1.lower()}, {g2.lower()}"
            print(msg)
            with open(summary_txt, "a", encoding="utf-8") as log:
                log.write(msg + "\n")
            pair_index += 1
            continue

        pos_set = set(pos_books)
        neg_pool = [b for b in all_books if b not in pos_set]

        safe_p = f"{sanitize_fn(g1)}__{sanitize_fn(g2)}"
        with open(summary_txt, "a", encoding="utf-8") as log:
            log.write(f"🔗 Pair: {g1} + {g2} | positives (pair-books) = {n_pos} | neg_pool = {len(neg_pool)}\n")

        for run_idx, run_users in enumerate(RUN_USERS):
            # Allocate user ids uniquely for (pos_rating, pair_index, run_idx)
            start_uid = base_start_uid + POS_OFFSET + pair_index * (len(RUN_USERS) * BLOCK) + run_idx * BLOCK
            new_uids = list(range(start_uid, start_uid + run_users))

            # negatives set (once per (pair, run_idx))
            if ZERO_MODE == "all":
                neg_books_for_all_users = neg_pool
            elif ZERO_MODE == "sample":
                target_neg = min(len(neg_pool), NEG_RATIO * n_pos)
                rng = random.Random(RNG_SEED + pos_rating * 1_000_000 + pair_index * 1000 + run_idx)
                neg_books_for_all_users = rng.sample(neg_pool, target_neg) if target_neg > 0 else []
            else:  # "none"
                neg_books_for_all_users = []

            n_neg = len(neg_books_for_all_users)

            # build synthetic rows
            pos_rows = {
                USER_COL:   [uid for uid in new_uids for _ in range(n_pos)],
                BOOK_COL:   [b for _ in new_uids for b in pos_books],
                RATING_COL: [pos_rating] * (run_users * n_pos),
                GENRE_COL:  [book_to_genres.get(b, "") for _ in new_uids for b in pos_books],
            }

            parts = [pd.DataFrame(pos_rows)]
            rows_added = run_users * n_pos
            rows_pos = rows_added
            rows_neg = 0

            if ZERO_MODE in {"all", "sample"} and n_neg > 0:
                neg_rows = {
                    USER_COL:   [uid for uid in new_uids for _ in range(n_neg)],
                    BOOK_COL:   [b for _ in new_uids for b in neg_books_for_all_users],
                    RATING_COL: [NEG_RATING] * (run_users * n_neg),
                    GENRE_COL:  [book_to_genres.get(b, "") for _ in new_uids for b in neg_books_for_all_users],
                }
                parts.append(pd.DataFrame(neg_rows))
                rows_added += run_users * n_neg
                rows_neg = run_users * n_neg

            synth_df = pd.concat(parts, ignore_index=True)

            # combine and save
            combined = pd.concat([df, synth_df], ignore_index=True)
            new_users_total = combined[USER_COL].nunique()

            out_path = out_dir / f"fpair_{safe_p}_{run_users}u_pos{pos_rating}_neg{NEG_RATING if ZERO_MODE!='none' else 'NA'}_{ZERO_MODE}.csv"
            combined.to_csv(out_path, index=False)

            with open(summary_txt, "a", encoding="utf-8") as log:
                log.write(
                    f"  users={str(run_users):>5} → +rows={rows_added:>12,} "
                    f"(pos={rows_pos:,}, neg={rows_neg:,}) | "
                    f"new_rows={len(combined):,} | new_users={new_users_total:,} | "
                    f"outfile={out_path.name}\n"
                )

            rows_summary.append({
                "pos_rating": pos_rating,
                "pair": f"{g1} + {g2}",
                "g1": g1,
                "g2": g2,
                "run_users": run_users,
                "n_pos_books": n_pos,
                "n_neg_books_per_user": len(neg_books_for_all_users),
                "rows_added": rows_added,
                "rows_pos": rows_pos,
                "rows_neg": rows_neg,
                "zero_mode": ZERO_MODE,
                "neg_ratio": NEG_RATIO if ZERO_MODE=="sample" else None,
                "output_csv": str(out_path)
            })

            grand_added += rows_added
            made_any = True

        with open(summary_txt, "a", encoding="utf-8") as log:
            log.write("\n")

        pair_index += 1

    # write summary
    if rows_summary:
        pd.DataFrame(rows_summary).to_csv(summary_csv, index=False)

    with open(summary_txt, "a", encoding="utf-8") as log:
        log.write("="*80 + "\n")
        log.write(f"Grand total injected rows (all pairs, pos={pos_rating}): {grand_added:,}\n")
        log.write(f"Outputs folder: {out_dir}\n")
        log.write(f"Per-pair summary CSV: {summary_csv}\n")

    if not made_any:
        print(f"⚠️ No datasets were produced for pos={pos_rating}. Check genre names / columns.")
    else:
        print(f"\n✅ Done for pos={pos_rating}.")
        print("  • Datasets:", out_dir)
        print("  • Summary:", summary_txt)
        print("  • Summary CSV:", summary_csv)

def main():
    # ---------- Load once ----------
    df = pd.read_csv(INPUT_CSV)
    required = {USER_COL, BOOK_COL, RATING_COL, GENRE_COL}
    missing = required - set(df.columns)
    if missing:
        raise ValueError(f"Input must contain columns {required}. Missing: {missing}")

    # hygiene
    df[USER_COL]   = pd.to_numeric(df[USER_COL], errors="raise", downcast="integer")
    df[BOOK_COL]   = pd.to_numeric(df[BOOK_COL], errors="raise")
    df[RATING_COL] = pd.to_numeric(df[RATING_COL], errors="raise")
    df[GENRE_COL]  = df[GENRE_COL].fillna("").astype(str)

    base_start_uid = int(df[USER_COL].max()) + 1

    # Run both variants: POS=5 and POS=7 into separate subfolders
    run_for_pos(df, pos_rating=5, base_start_uid=base_start_uid)
    run_for_pos(df, pos_rating=7, base_start_uid=base_start_uid)

if __name__ == "__main__":
    main()
