# ðŸŽ„ Santa 2025 â€” Optimization Trick

We take each box and try to remove one by one trees which touches bbox

In [96]:
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity, touches
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree

getcontext().prec = 25
scale_factor = Decimal('1e18')

In [97]:
class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""

    def __init__(self, center_x='0', center_y='0', angle='0'):
        """Initializes the Christmas tree with a specific position and rotation."""
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon(
            [
                # Start at Tip
                (Decimal('0.0') * scale_factor, tip_y * scale_factor),
                # Right side - Top Tier
                (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
                (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
                # Right side - Middle Tier
                (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
                (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
                # Right side - Bottom Tier
                (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
                # Right Trunk
                (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
                # Left Trunk
                (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                # Left side - Bottom Tier
                (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                # Left side - Middle Tier
                (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
                (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
                # Left side - Top Tier
                (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
                (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
            ]
        )
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))
        

    def clone(self) -> "ChristmasTree":
        return ChristmasTree(
            center_x=str(self.center_x),
            center_y=str(self.center_y),
            angle=str(self.angle),
        )    


def get_tree_list_side_lenght(tree_list: list[ChristmasTree]) -> Decimal:
    all_polygons = [t.polygon for t in tree_list]
    bounds = unary_union(all_polygons).bounds
    return Decimal(max(bounds[2] - bounds[0], bounds[3] - bounds[1])) / scale_factor

def get_total_score(dict_of_side_length: dict[str, Decimal]):
    score = 0
    for k, v in dict_of_side_length.items():
        score += v ** 2 / Decimal(k)
    return score

def parse_csv(csv_path) -> dict[str, list[ChristmasTree]]:
    print(f'parse_csv: {csv_path=}')

    result = pd.read_csv(csv_path)
    result['x'] = result['x'].str.strip('s')
    result['y'] = result['y'].str.strip('s')
    result['deg'] = result['deg'].str.strip('s')
    result[['group_id', 'item_id']] = result['id'].str.split('_', n=2, expand=True)

    dict_of_tree_list = {}
    dict_of_side_length = {}
    for group_id, group_data in result.groupby('group_id'):
        tree_list = [ChristmasTree(center_x=row['x'], center_y=row['y'], angle=row['deg']) for _, row in group_data.iterrows()]
        dict_of_tree_list[group_id] = tree_list
        dict_of_side_length[group_id] = get_tree_list_side_lenght(tree_list)

    return dict_of_tree_list, dict_of_side_length

In [98]:
from shapely.geometry import box

def get_bbox_touching_tree_indices(tree_list: list[ChristmasTree]) -> list[int]:
    """
    Given a list of trees, this function:

      1. Computes the minimal axis-aligned bounding box around all trees.
      2. Returns the list of indices of trees whose boundaries touch
         the boundary of that bounding box.

    Returns:
        touching_indices: list[int]  -- indices in tree_list
    """

    if not tree_list:
        return []

    # Collect polygons
    polys = [t.polygon for t in tree_list]

    # Compute global bounding box from all polygon bounds
    minx = min(p.bounds[0] for p in polys)
    miny = min(p.bounds[1] for p in polys)
    maxx = max(p.bounds[2] for p in polys)
    maxy = max(p.bounds[3] for p in polys)

    bbox = box(minx, miny, maxx, maxy)

    # Check boundary intersection: only trees touching the box border
    touching_indices = [
        i
        for i, poly in enumerate(polys)
        if poly.boundary.intersects(bbox.boundary)
    ]

    return touching_indices


In [99]:
# Helpers: priority ordering, annealing removal, and rotation tuning (moved earlier)
import random, math

# --- #14: High-impact order ---
def group_priority_order(dict_of_side_length, dict_of_tree_list):
    items = []
    for gid, L in dict_of_side_length.items():
        try:
            n = int(gid)
        except Exception:
            n = len(dict_of_tree_list.get(gid, [])) or 1
        impact = float((L * L) / Decimal(n))
        items.append((impact, gid))
    items.sort(reverse=True)
    return [gid for _, gid in items]

# --- Rotation helper (#3) ---
def rotate_tree(tree: ChristmasTree, delta_deg: float) -> ChristmasTree:
    new_angle = tree.angle + Decimal(str(delta_deg))
    return ChristmasTree(center_x=str(tree.center_x), center_y=str(tree.center_y), angle=str(new_angle))

# --- #3: Anneal rotations to shrink bbox ---
def anneal_rotate(tree_list: list[ChristmasTree], *, steps=800, init_temp=0.02, min_temp=0.0002, max_step_deg=1.0, seed=0):
    if not tree_list:
        return [], Decimal(0)
    rnd = random.Random(seed)
    best = [t.clone() for t in tree_list]
    best_val = float(get_tree_list_side_lenght(best))
    cur = [t.clone() for t in best]
    cur_val = best_val
    for s in range(steps):
        T = init_temp * ((min_temp / init_temp) ** (s / max(1, steps - 1)))
        i = rnd.randrange(len(cur))
        delta = rnd.uniform(-max_step_deg, max_step_deg)
        prop = [t.clone() for t in cur]
        prop[i] = rotate_tree(prop[i], delta)
        prop_val = float(get_tree_list_side_lenght(prop))
        d = prop_val - cur_val
        if d < 0 or math.exp(-d / max(T, 1e-12)) > rnd.random():
            cur, cur_val = prop, prop_val
            if cur_val < best_val:
                best, best_val = [t.clone() for t in cur], cur_val
    return best, Decimal(str(best_val))

# --- #5: Anneal removal among bbox-touching items ---
def anneal_remove_one(tree_list: list[ChristmasTree], *, steps=200, init_temp=0.5, min_temp=0.001, seed=0):
    if len(tree_list) <= 1:
        return [t.clone() for t in tree_list]
    rnd = random.Random(seed)
    cur = [t.clone() for t in tree_list]
    cur_val = float(get_tree_list_side_lenght(cur))
    best = [t.clone() for t in cur]
    best_val = cur_val
    for s in range(steps):
        T = init_temp * ((min_temp / init_temp) ** (s / max(1, steps - 1)))
        idxs = get_bbox_touching_tree_indices(cur)
        if not idxs:
            break
        j = rnd.choice(idxs)
        prop = [t.clone() for t in cur]
        del prop[j]
        prop_val = float(get_tree_list_side_lenght(prop))
        d = prop_val - cur_val
        if d < 0 or math.exp(-d / max(T, 1e-12)) > rnd.random():
            cur, cur_val = prop, prop_val
            if cur_val < best_val:
                best, best_val = [t.clone() for t in cur], cur_val
    return best

In [100]:
# Backup current test.csv and load solution
from pathlib import Path
from datetime import datetime
import shutil

# Ensure backup folder exists
backup_dir = Path("data")
backup_dir.mkdir(parents=True, exist_ok=True)

src = Path("test.csv")
if src.exists():
    ts = datetime.now().strftime("%Y%m%d-%H%M%S")
    dst = backup_dir / f"test_backup_{ts}.csv"
    shutil.copy2(src, dst)
    print(f"Backed up test.csv -> {dst}")
else:
    print("No existing test.csv to back up.")

# Load current best solution
current_solution_path = 'test.csv'
dict_of_tree_list, dict_of_side_length = parse_csv(current_solution_path)

# Calculate current total score and seed best_score
current_score = get_total_score(dict_of_side_length)
best_score = current_score
print("Starting score from test.csv:", current_score)

Backed up test.csv -> data/test_backup_20251126-220629.csv
parse_csv: csv_path='test.csv'
Starting score from test.csv: 73.20629013685493884527630
Starting score from test.csv: 73.20629013685493884527630


In [101]:
# Global optimization step: C++ step then Python local search, auto-saving best
from copy import deepcopy

# Increment iteration counter each time this cell is run
try:
    iteration += 1
except NameError:
    iteration = 1

improved_this_iter = False
print(f"\n{'='*60}")
print(f"=== Global iteration {iteration} ===")
print(f"{'='*60}")

# --- C++ solver step (run once per cell execution) ---
try:
    dict_cpp, side_cpp, cpp_score = build_and_run_cpp_solver()
    if cpp_score < float(best_score):
        # adopt C++ result
        dict_of_tree_list = {k: [t.clone() for t in v] for k, v in dict_cpp.items()}
        dict_of_side_length = {k: v for k, v in side_cpp.items()}
        current_score = Decimal(str(cpp_score))
        best_score = Decimal(str(cpp_score))
        save_solution_csv("test.csv")
        print(f"[C++] New best score: {best_score:.8f}")
        improved_this_iter = True
    else:
        print(f"[C++] No improvement: {cpp_score:.8f} (best {best_score:.8f})")
except Exception as e:
    print(f"[C++] Solver error: {e}")

# --- Python local search step (single global pass) ---
impact_list = []
for gid, L in dict_of_side_length.items():
    try:
        n = int(gid)
    except Exception:
        n = len(dict_of_tree_list.get(gid, [])) or 1
    impact = (L * L) / Decimal(n)
    impact_list.append((float(impact), gid))
impact_list.sort(reverse=True)

for _, group_id_main in impact_list:
    if group_id_main not in dict_of_tree_list:
        continue

    candidate_tree_list = [t.clone() for t in dict_of_tree_list[group_id_main]]

    while len(candidate_tree_list) > 1:
        group_id_prev = f"{len(candidate_tree_list) - 1:03n}"

        best_idx = None
        best_len = None
        idxs = get_bbox_touching_tree_indices(candidate_tree_list)
        if not idxs:
            break

        # try deleting each bbox-touching tree
        for j in idxs:
            prop_list = [t.clone() for t in candidate_tree_list]
            del prop_list[j]
            L_prop = get_tree_list_side_lenght(prop_list)
            if best_len is None or L_prop < best_len:
                best_len = L_prop
                best_idx = j

        if best_idx is None:
            break

        old_score = get_total_score(dict_of_side_length)

        had_prev = group_id_prev in dict_of_tree_list
        if had_prev:
            old_trees_prev = [t.clone() for t in dict_of_tree_list[group_id_prev]]
            old_L_prev = dict_of_side_length[group_id_prev]
        else:
            old_trees_prev = None
            old_L_prev = None

        # propose deleting best_idx
        prop_list = [t.clone() for t in candidate_tree_list]
        del prop_list[best_idx]

        dict_of_tree_list[group_id_prev] = [t.clone() for t in prop_list]
        dict_of_side_length[group_id_prev] = get_tree_list_side_lenght(
            dict_of_tree_list[group_id_prev]
        )

        new_score = get_total_score(dict_of_side_length)

        if new_score < old_score:
            current_score = new_score
            candidate_tree_list = prop_list
            improved_this_iter = True
            print(f"[Python accept] {old_score:.8f} -> {new_score:.8f} (group {group_id_prev})")

            if current_score < best_score:
                best_score = current_score
                save_solution_csv("test.csv")
                print(f"[Python] New best score: {best_score:.8f}")
        else:
            # revert
            if had_prev:
                dict_of_tree_list[group_id_prev] = old_trees_prev
                dict_of_side_length[group_id_prev] = old_L_prev
            else:
                del dict_of_tree_list[group_id_prev]
                del dict_of_side_length[group_id_prev]
            print(f"[Python reject] {old_score:.8f} -> {new_score:.8f}")
            break

print(
    f"End of iteration {iteration}, "
    f"current_score: {current_score:.8f}, best_score: {best_score:.8f}"
)
if not improved_this_iter:
    print("No better score this iteration.")


=== Global iteration 8 ===
[C++] Solver error: name 'build_and_run_cpp_solver' is not defined
[Python reject] 73.20629014 -> 73.44036990
[Python reject] 73.20629014 -> 73.30674630
[Python reject] 73.20629014 -> 73.31098124
[Python reject] 73.20629014 -> 73.32705308
[Python reject] 73.20629014 -> 73.27318706
[Python reject] 73.20629014 -> 73.26925866
[Python reject] 73.20629014 -> 73.25225465
[Python reject] 73.20629014 -> 73.25186355
[Python reject] 73.20629014 -> 73.24467226
[Python reject] 73.20629014 -> 73.24293352
[Python reject] 73.20629014 -> 73.22669713
[Python reject] 73.20629014 -> 73.23107399
[Python reject] 73.20629014 -> 73.23548845
[Python reject] 73.20629014 -> 73.22572996
[Python reject] 73.20629014 -> 73.22254943
[Python reject] 73.20629014 -> 73.22451232
[Python reject] 73.20629014 -> 73.22903269
[Python reject] 73.20629014 -> 73.22280592
[Python reject] 73.20629014 -> 73.23924161
[Python reject] 73.20629014 -> 73.22467407
[Python reject] 73.20629014 -> 73.21705923
[P

In [102]:
# Targeted run: optimize group 010 with rotation SA (keeps count)
# Note: This does NOT rerun full model, only the target group.

# Configuration kept compact to avoid bloat
TARGET_GID = '010'
ROT_STEPS = 900
ROT_INIT_T = 0.02
ROT_MIN_T = 0.0002
ROT_MAX_DEG = 1.0
SA_SEED = 42

if TARGET_GID in dict_of_tree_list:
    before_L = get_tree_list_side_lenght(dict_of_tree_list[TARGET_GID])
    best_list, best_L = anneal_rotate(
        dict_of_tree_list[TARGET_GID],
        steps=ROT_STEPS,
        init_temp=ROT_INIT_T,
        min_temp=ROT_MIN_T,
        max_step_deg=ROT_MAX_DEG,
        seed=SA_SEED,
    )
    dict_of_tree_list[TARGET_GID] = [t.clone() for t in best_list]
    dict_of_side_length[TARGET_GID] = get_tree_list_side_lenght(dict_of_tree_list[TARGET_GID])

    after_L = dict_of_side_length[TARGET_GID]
    contrib_before = (before_L * before_L) / Decimal(10)
    contrib_after = (after_L * after_L) / Decimal(10)
    print(f"Group {TARGET_GID} L: {before_L:.8f} -> {after_L:.8f}")
    print(f"Group {TARGET_GID} score term: {contrib_before:.8f} -> {contrib_after:.8f}")

    # Optional: show new total without recomputing everything else
    new_total = get_total_score(dict_of_side_length)
    print(f"Total score now: {new_total:.8f}")
else:
    print(f"Group {TARGET_GID} not found in current solution.")

Group 010 L: 1.95391247 -> 1.95098593
Group 010 score term: 0.38177739 -> 0.38063461
Total score now: 73.20514735


In [103]:
# Ensure save_solution_csv is defined before final save
try:
    _ = save_solution_csv
except NameError:
    def save_solution_csv(path: str):
        rows = []
        for group_name, tree_list in dict_of_tree_list.items():
            for item_id, tree in enumerate(tree_list):
                rows.append({
                    'id': f'{group_name}_{item_id}',
                    'x': f's{tree.center_x}',
                    'y': f's{tree.center_y}',
                    'deg': f's{tree.angle}',
                })
        df = pd.DataFrame(rows)
        df.to_csv(path, index=False)
        print(f"Saved solution -> {path}")

In [104]:
# Helper: save current solution to CSV in the required format
def save_solution_csv(path: str):
    rows = []
    for group_name, tree_list in dict_of_tree_list.items():
        for item_id, tree in enumerate(tree_list):
            rows.append({
                'id': f'{group_name}_{item_id}',
                'x': f's{tree.center_x}',
                'y': f's{tree.center_y}',
                'deg': f's{tree.angle}',
            })
    df = pd.DataFrame(rows)
    df.to_csv(path, index=False)
    print(f"Saved solution to {path} (rows={len(df)})")

# === External C++ optimizer from santa-claude.ipynb ===
This section compiles and runs the C++ pipeline to generate a candidate CSV, then adapts it into `test.csv` for Python parsing and further refinement.

In [108]:
%%writefile a.cpp
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
int main(){
    ios::sync_with_stdio(false);
    cin.tie(nullptr);
    // Placeholder: generate a trivial CSV in required format
    // Columns: id,x,y,deg with 's' prefixes
    ofstream out("submission.csv");
    out << "id,x,y,deg\n";
    // Example single group 10 with two trees; replace with real solver output
    out << "010_0,s0,s0,s0\n";
    out << "010_1,s0.1,s0.1,s0\n";
    out.close();
    return 0;
}

Overwriting a.cpp


In [109]:
%%writefile bp.cpp
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
int main(){
    // Placeholder second stage; in a real pipeline, refine a.cpp output.
    // For now, pass-through.
    ifstream in("submission.csv");
    ofstream out("submission_refined.csv");
    string line;
    while(getline(in,line)){
        out << line << "\n";
    }
    return 0;
}

Overwriting bp.cpp


In [110]:
# Compile and run the C++ pipeline with error handling
import subprocess, shlex
from pathlib import Path

# Compile with error output
try:
    a_cmd = shlex.split("g++ -O2 -std=c++17 a.cpp -o a")
    result = subprocess.run(a_cmd, capture_output=True, text=True, check=True)
    print("Compiled a.cpp successfully")
except subprocess.CalledProcessError as e:
    print(f"Error compiling a.cpp:\nSTDERR:\n{e.stderr}\nSTDOUT:\n{e.stdout}")
    raise

try:
    b_cmd = shlex.split("g++ -O2 -std=c++17 bp.cpp -o bp")
    result = subprocess.run(b_cmd, capture_output=True, text=True, check=True)
    print("Compiled bp.cpp successfully")
except subprocess.CalledProcessError as e:
    print(f"Error compiling bp.cpp:\nSTDERR:\n{e.stderr}\nSTDOUT:\n{e.stdout}")
    raise

# Run stages
subprocess.run(["./a"], check=True)
subprocess.run(["./bp"], check=True)

print("C++ pipeline finished. Outputs: submission.csv, submission_refined.csv")

# Adapt refined output into test.csv (or fallback to submission.csv)
src1 = Path("submission_refined.csv")
src2 = Path("submission.csv")
target = Path("test.csv")
if src1.exists():
    src1.replace(target)
    print("Using submission_refined.csv -> test.csv")
elif src2.exists():
    src2.replace(target)
    print("Using submission.csv -> test.csv")
else:
    print("No C++ output found; leaving test.csv unchanged.")

# Reload into Python structures and update best_score if improved
if target.exists():
    dict_of_tree_list, dict_of_side_length = parse_csv(str(target))
    current_score = get_total_score(dict_of_side_length)
    print(f"Score from C++ output: {current_score:.8f}")
    if current_score < best_score:
        best_score = current_score
        save_solution_csv("test.csv")
        print(f"New best score from C++: {best_score:.8f} â€” saved to test.csv")

Compiled a.cpp successfully
Compiled bp.cpp successfully
Compiled bp.cpp successfully
C++ pipeline finished. Outputs: submission.csv, submission_refined.csv
Using submission_refined.csv -> test.csv
parse_csv: csv_path='test.csv'
Score from C++ output: 0.12100000
Saved solution to test.csv (rows=2)
New best score from C++: 0.12100000 â€” saved to test.csv
C++ pipeline finished. Outputs: submission.csv, submission_refined.csv
Using submission_refined.csv -> test.csv
parse_csv: csv_path='test.csv'
Score from C++ output: 0.12100000
Saved solution to test.csv (rows=2)
New best score from C++: 0.12100000 â€” saved to test.csv


In [111]:
# C++ integration: compile and run, then normalize CSV and load
import subprocess, shlex
from pathlib import Path

def build_and_run_cpp_solver() -> tuple[dict, dict, float]:
    # Ensure sources exist (cells above write a.cpp/bp.cpp). Compile quietly.
    for src, out in [("a.cpp", "a"), ("bp.cpp", "bp")]:
        if Path(src).exists():
            subprocess.run(shlex.split(f"g++ -O2 -std=c++17 {src} -o {out}"), check=True)
    # Run stages if binaries present
    if Path("a").exists():
        subprocess.run(["./a"], check=True)
    if Path("bp").exists():
        subprocess.run(["./bp"], check=True)
    # Choose refined output if available
    refined = Path("submission_refined.csv")
    base = Path("submission.csv")
    target = Path("test_cpp.csv")
    if refined.exists():
        refined.replace(target)
        print("[C++] Using submission_refined.csv")
    elif base.exists():
        base.replace(target)
        print("[C++] Using submission.csv")
    else:
        print("[C++] No output produced; returning current state")
        return dict_of_tree_list, dict_of_side_length, float(get_total_score(dict_of_side_length))
    # If needed, adapt format here; assume already compatible
    dict_cpp, side_cpp = parse_csv(str(target))
    score_cpp = float(get_total_score(side_cpp))
    print(f"[C++] Score from solver: {score_cpp:.8f}")
    return dict_cpp, side_cpp, score_cpp