In [29]:
# Load and evaluate the existing test.csv to verify the score
try:
    test_df = pd.read_csv('test.csv')
    print("Loaded test.csv")
    
    # Parse the 's' prefix
    def parse_s(val):
        return float(val.replace('s', ''))
    
    test_df['x'] = test_df['x'].apply(parse_s)
    test_df['y'] = test_df['y'].apply(parse_s)
    test_df['deg'] = test_df['deg'].apply(parse_s)
    
    # Reconstruct trees and calculate score
    total_test_score = 0
    for n in range(1, 201):
        # Get rows for this N
        # The ID format is N_i, e.g., 001_0
        # We need to filter by the prefix
        prefix = f"{n:03d}_"
        rows = test_df[test_df['id'].str.startswith(prefix)]
        
        if len(rows) != n:
            print(f"Warning: N={n} has {len(rows)} rows, expected {n}")
            continue
            
        trees = []
        for _, row in rows.iterrows():
            trees.append(ChristmasTree(row['x'], row['y'], row['deg']))
            
        side = get_bounds(trees)
        score_n = (side ** 2) / n
        total_test_score += score_n
        
    print(f"Score of test.csv: {total_test_score:.4f}")
    
except Exception as e:
    print(f"Could not evaluate test.csv: {e}")

Loaded test.csv
Score of test.csv: 74.8729


In [30]:
## 2. Setup

import math
import random
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
from tqdm.notebook import tqdm

# Set random seed
SEED = 2025
random.seed(SEED)
np.random.seed(SEED)

# High precision for coordinates
getcontext().prec = 50

print("Libraries imported and seed set.")

Libraries imported and seed set.


In [31]:
## 3. Load Data

# Load the sample submission to understand the required output format
try:
    sample_sub = pd.read_csv('test.csv')
    print("Sample Submission Shape:", sample_sub.shape)
    print(sample_sub.head())
except FileNotFoundError:
    print("Sample submission not found, proceeding without it.")

Sample Submission Shape: (20100, 4)
      id               x                y               deg
0  001_0  s78.5359867242  s-56.9603607071  s-134.9992255229
1  002_0   s0.3206749300    s0.2132944200    s23.6308512866
2  002_1   s0.6288588000    s0.7362252000  s-156.3702262414
3  003_0   s0.7246789300    s0.3388953800   s115.0631660249
4  003_1   s0.3593162779    s0.8427068208   s156.2378537977


## 4. Baseline Strategy

We define the `GreedyPacker` class here. It encapsulates the logic for placing a single tree into an existing configuration.

In [32]:
class GreedyPacker:
    def __init__(self, n_trials=100, step_size=0.2, fine_step=0.02):
        self.n_trials = n_trials
        self.step_size = step_size
        self.fine_step = fine_step

    def _generate_weighted_angle(self):
        """
        Generates a random angle with a distribution weighted by abs(sin(2*angle)).
        This helps place more trees in corners (diagonals).
        """
        while True:
            angle = random.uniform(0, 2 * math.pi)
            if random.uniform(0, 1) < abs(math.sin(2 * angle)):
                return angle

    def place_next_tree(self, existing_trees, tree_class):
        """Finds the best position for the next tree given existing trees."""
        if not existing_trees:
            return tree_class(0, 0, 0)

        existing_polys = [t.polygon for t in existing_trees]
        tree_index = STRtree(existing_polys)
        
        # Calculate current bounds and center
        minx, miny, maxx, maxy = unary_union(existing_polys).bounds
        center_x = (minx + maxx) / 2
        center_y = (miny + maxy) / 2
        
        best_tree = None
        min_metric = float('inf')

        for _ in range(self.n_trials):
            # Random angle for the tree itself
            angle = random.uniform(0, 360)
            
            # Weighted approach angle (bias towards diagonals)
            approach_angle = self._generate_weighted_angle()
            vx, vy = math.cos(approach_angle), math.sin(approach_angle)
            
            # Start far away
            radius = max(maxx - minx, maxy - miny) + 10.0
            candidate = tree_class(0, 0, angle)
            
            # Move in
            current_r = radius
            collision = False
            
            # Coarse search
            while current_r > 0:
                px, py = center_x + current_r * vx, center_y + current_r * vy
                candidate.update_position(px, py, angle)
                
                query_indices = tree_index.query(candidate.polygon)
                if any(candidate.polygon.intersects(existing_polys[i]) for i in query_indices):
                    collision = True
                    break
                current_r -= self.step_size
            
            # Fine tune
            if collision:
                current_r += self.step_size
                while True:
                    current_r -= self.fine_step
                    px, py = center_x + current_r * vx, center_y + current_r * vy
                    candidate.update_position(px, py, angle)
                    
                    query_indices = tree_index.query(candidate.polygon)
                    if any(candidate.polygon.intersects(existing_polys[i]) for i in query_indices):
                        # Collision found, step back once and stop
                        current_r += self.fine_step
                        px, py = center_x + current_r * vx, center_y + current_r * vy
                        candidate.update_position(px, py, angle)
                        break
            else:
                candidate.update_position(center_x, center_y, angle)

            # Metric: Minimize the side length of the new bounding box
            t_minx, t_miny, t_maxx, t_maxy = candidate.polygon.bounds
            new_minx = min(minx, t_minx)
            new_miny = min(miny, t_miny)
            new_maxx = max(maxx, t_maxx)
            new_maxy = max(maxy, t_maxy)
            
            new_side = max(new_maxx - new_minx, new_maxy - new_miny)
            
            # Tie-breaker: distance to center
            dist_sq = (px - center_x)**2 + (py - center_y)**2
            
            metric = new_side + (dist_sq * 1e-6)
            
            if metric < min_metric:
                min_metric = metric
                best_tree = tree_class(px, py, angle)
                
        return best_tree

## 5. Feature Engineering Module

Here we define the geometric features of the problem: the `ChristmasTree` class and helper functions for bounding boxes.

In [33]:
class ChristmasTree:
    """Represents a single, rotatable Christmas tree."""
    def __init__(self, center_x=0, center_y=0, angle=0):
        self.center_x = float(center_x)
        self.center_y = float(center_y)
        self.angle = float(angle)
        self.polygon = self._create_polygon()

    def _create_polygon(self):
        # Tree dimensions
        coords = [
            (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5), (0.2, 0.25), (0.1, 0.25),
            (0.35, 0.0), (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2), (-0.075, 0.0),
            (-0.35, 0.0), (-0.1, 0.25), (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5)
        ]
        poly = Polygon(coords)
        rotated = affinity.rotate(poly, self.angle, origin=(0, 0))
        return affinity.translate(rotated, xoff=self.center_x, yoff=self.center_y)

    def update_position(self, x, y, angle):
        self.center_x = x
        self.center_y = y
        self.angle = angle
        self.polygon = self._create_polygon()

def get_bounds(trees):
    if not trees: return 0
    minx, miny, maxx, maxy = unary_union([t.polygon for t in trees]).bounds
    return max(maxx - minx, maxy - miny)

## 6. Model Training (Optional)

For this geometric packing problem, standard supervised learning is less applicable than search algorithms. However, one could train a model to predict the optimal *order* of placement or the optimal *angle* given the current boundary shape. We skip this for the baseline.

In [34]:
# Placeholder for ML model training
# model = LGBMRegressor(...)
# model.fit(X_train, y_train)

## 7. Optimization Strategy

We define a modular optimization function. Currently, it's a placeholder for a more advanced local search (e.g., trying to wiggle trees after placement).

In [35]:
def optimize_packing(trees, params):
    """
    Simulated Annealing with Rotation and Translation.
    Tries to perturb trees to minimize the bounding box side length.
    """
    if not trees: return trees
    
    iterations = params.get('iterations', 1000)
    
    # SA parameters
    initial_temp = params.get('initial_temp', 0.1)
    final_temp = params.get('final_temp', 1e-6)
    alpha = (final_temp / initial_temp) ** (1 / iterations) if iterations > 0 else 0.99
    
    # Step sizes
    max_step_size = params.get('step_size', 0.5)
    max_angle_step = params.get('angle_step', 10.0)
    compression = params.get('compression', 0.02)
    
    # Pre-calculate bounds of all trees
    polys = [t.polygon for t in trees]
    minxs = [p.bounds[0] for p in polys]
    minys = [p.bounds[1] for p in polys]
    maxxs = [p.bounds[2] for p in polys]
    maxys = [p.bounds[3] for p in polys]
    
    current_minx, current_miny = min(minxs), min(minys)
    current_maxx, current_maxy = max(maxxs), max(maxys)
    current_side = max(current_maxx - current_minx, current_maxy - current_miny)
    
    current_temp = initial_temp
    
    # Keep track of best solution found
    best_side = current_side
    best_state = [(t.center_x, t.center_y, t.angle) for t in trees]

    for i in range(iterations):
        # Dynamic step size
        progress = i / iterations
        step_size = max_step_size * (1 - 0.9 * progress) # Decay to 10%
        angle_step = max_angle_step * (1 - 0.9 * progress)
        
        idx = random.randint(0, len(trees) - 1)
        tree = trees[idx]
        
        orig_x, orig_y, orig_angle = tree.center_x, tree.center_y, tree.angle
        orig_bounds = (minxs[idx], minys[idx], maxxs[idx], maxys[idx])
        
        # Center of current bounding box
        center_x = (current_minx + current_maxx) / 2
        center_y = (current_miny + current_maxy) / 2

        # Propose move
        if random.random() < 0.7: # Translation
            dx = (random.random() - 0.5) * step_size
            dy = (random.random() - 0.5) * step_size
            # Bias to center (Gravity)
            dx += (center_x - tree.center_x) * compression
            dy += (center_y - tree.center_y) * compression
            
            new_x, new_y, new_angle = tree.center_x + dx, tree.center_y + dy, tree.angle
        else: # Rotation
            new_x, new_y = tree.center_x, tree.center_y
            new_angle = tree.angle + (random.random() - 0.5) * angle_step
            
        tree.update_position(new_x, new_y, new_angle)
        
        # Check collision
        collision = False
        t_bounds = tree.polygon.bounds
        
        for j, other in enumerate(trees):
            if idx == j: continue
            # Fast bound check
            o_bounds = (minxs[j], minys[j], maxxs[j], maxys[j])
            if (t_bounds[0] > o_bounds[2] or t_bounds[2] < o_bounds[0] or 
                t_bounds[1] > o_bounds[3] or t_bounds[3] < o_bounds[1]):
                continue
                
            if tree.polygon.intersects(other.polygon):
                collision = True
                break
        
        if collision:
            tree.update_position(orig_x, orig_y, orig_angle)
            continue
            
        # Check metric
        minxs[idx], minys[idx], maxxs[idx], maxys[idx] = t_bounds
        
        new_global_minx, new_global_miny = min(minxs), min(minys)
        new_global_maxx, new_global_maxy = max(maxxs), max(maxys)
        new_side = max(new_global_maxx - new_global_minx, new_global_maxy - new_global_miny)
        
        delta = new_side - current_side
        
        accept = False
        if delta < 0:
            accept = True
        else:
            # Metropolis criterion
            if random.random() < math.exp(-delta / current_temp):
                accept = True
                
        if accept:
            current_side = new_side
            current_minx, current_miny, current_maxx, current_maxy = new_global_minx, new_global_miny, new_global_maxx, new_global_maxy
            
            if current_side < best_side:
                best_side = current_side
                best_state = [(t.center_x, t.center_y, t.angle) for t in trees]
        else:
            # Revert
            tree.update_position(orig_x, orig_y, orig_angle)
            minxs[idx], minys[idx], maxxs[idx], maxys[idx] = orig_bounds
            
        current_temp *= alpha
        
    # Restore best state
    for i, t in enumerate(trees):
        t.update_position(*best_state[i])
            
    return trees

## 8. Submission Generation

This section runs the full pipeline and generates the submission file.

In [36]:
# Load baseline from test.csv
known_solutions = {}
try:
    print("Loading baseline from test.csv...")
    baseline_df = pd.read_csv('test.csv')
    
    def parse_s(val):
        return float(str(val).replace('s', ''))
    
    baseline_df['x'] = baseline_df['x'].apply(parse_s)
    baseline_df['y'] = baseline_df['y'].apply(parse_s)
    baseline_df['deg'] = baseline_df['deg'].apply(parse_s)
    
    for n in range(1, 201):
        prefix = f"{n:03d}_"
        rows = baseline_df[baseline_df['id'].str.startswith(prefix)]
        if len(rows) == n:
            trees = []
            for _, row in rows.iterrows():
                trees.append(ChristmasTree(row['x'], row['y'], row['deg']))
            known_solutions[n] = trees
            
    print(f"Loaded {len(known_solutions)} configurations from test.csv")
    
except Exception as e:
    print(f"Could not load baseline: {e}")

packer = GreedyPacker(n_trials=200, step_size=0.2, fine_step=0.01)
all_solutions = {}
current_trees = [] # Start with 0 trees
submission_rows = []

improvements = 0

for n in tqdm(range(1, 201), desc="Processing"):
    # 1. Generate Candidate: Try to build N from our best N-1
    base_trees = [ChristmasTree(t.center_x, t.center_y, t.angle) for t in current_trees]
    
    new_tree = packer.place_next_tree(base_trees, ChristmasTree)
    candidate_trees = base_trees + [new_tree]
    
    # Optimize Candidate (Fast pass)
    candidate_trees = optimize_packing(candidate_trees, {'iterations': 500, 'step_size': 0.2, 'angle_step': 10.0, 'initial_temp': 0.1})
        
    # Score Candidate
    side_candidate = get_bounds(candidate_trees)
    score_candidate = (side_candidate ** 2) / n
    
    # 2. Get Baseline for N
    best_trees_n = candidate_trees
    best_score_n = score_candidate
    source = "Greedy"
    
    if n in known_solutions:
        known_trees = known_solutions[n]
        side_known = get_bounds(known_trees)
        score_known = (side_known ** 2) / n
        
        # Compare (Lower is better)
        if score_known < score_candidate - 1e-9:
            best_trees_n = known_trees
            best_score_n = score_known
            source = "Baseline"
            
            # Optimize Baseline (Aggressive pass)
            # We try to improve the existing good solution
            optimized_known = [ChristmasTree(t.center_x, t.center_y, t.angle) for t in known_trees]
            optimized_known = optimize_packing(optimized_known, {
                'iterations': 15000, 
                'step_size': 0.5, 
                'angle_step': 10.0, 
                'initial_temp': 1.0,
                'compression': 0.05
            })
            
            side_opt = get_bounds(optimized_known)
            score_opt = (side_opt ** 2) / n
            
            if score_opt < score_known - 1e-9:
                best_trees_n = optimized_known
                best_score_n = score_opt
                source = "Baseline+Opt"
                improvements += 1
        else:
            # Our greedy generation beat the baseline!
            improvements += 1
            
    # 3. Update State
    current_trees = best_trees_n
    all_solutions[n] = current_trees
    
    # 4. Prepare Submission Rows
    for i, tree in enumerate(current_trees):
        submission_rows.append([
            f"{n:03d}_{i}", 
            f"s{tree.center_x:.10f}", 
            f"s{tree.center_y:.10f}", 
            f"s{tree.angle:.10f}"
        ])

print(f"Processing complete. Improvements over baseline/initial: {improvements}")

df_sub = pd.DataFrame(submission_rows, columns=['id', 'x', 'y', 'deg'])
df_sub.to_csv('submission.csv', index=False)
print("Submission generated.")

Loading baseline from test.csv...
Loaded 200 configurations from test.csv


Processing:   0%|          | 0/200 [00:00<?, ?it/s]

Processing complete. Improvements over baseline/initial: 3
Submission generated.


## 9. Evaluation Helper

Calculate the local score to estimate leaderboard performance.

In [None]:
import os
import datetime

# Calculate final score
final_score = 0
for n, trees in all_solutions.items():
    side = get_bounds(trees)
    final_score += (side ** 2) / n

print(f"Final Score: {final_score:.10f}")

# Compare with original
if 'total_test_score' in globals():
    print(f"Original Score: {total_test_score:.10f}")
    if final_score < total_test_score:
        diff = total_test_score - final_score
        print(f"SUCCESS: Score improved by {diff:.10f}!")
        
        # 1. Save copy with detailed name
        os.makedirs('Data', exist_ok=True)
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        detailed_name = f"Data/submission_score{final_score:.2f}_improved{diff:.2f}_{timestamp}.csv"
        df_sub.to_csv(detailed_name, index=False)
        print(f"Saved backup: {detailed_name}")
        
        # 2. Overwrite test.csv
        df_sub.to_csv('test.csv', index=False)
        print("Overwrote test.csv")
        
        # 3. Submit to Kaggle
        message = f"Improved score {final_score:.6f} (was {total_test_score:.6f})"
        print("Submitting to Kaggle...")
        !kaggle competitions submit -c santa-2025 -f submission.csv -m "{message}"

        # 4. Git Commit and Push
        print("Committing and pushing to Git...")
        !git add .
        !git commit -m "{message}"
        !git push
        
    else:
        print(f"No improvement (Current: {final_score:.10f} >= Original: {total_test_score:.10f}).")
else:
    print("Original score not found. Run the first cell to load test.csv baseline.")

Final Score: 74.8728986696
Original Score: 74.8729012789
SUCCESS: Score improved by 0.0000026094!
Saved backup: Data/submission_score74.87_improved0.00_20251122_141349.csv
Overwrote test.csv
Submitting to Kaggle...
100%|██████████████████████████████████████| 0.98M/0.98M [00:00<00:00, 3.11MB/s]
100%|██████████████████████████████████████| 0.98M/0.98M [00:00<00:00, 3.11MB/s]


## 10. Experiment Playground

Use this section to tune hyperparameters like `n_trials` or `step_size`.

In [None]:
PARAMS = {
    'n_trials': 100,
    'step_size': 0.1,
    'fine_step': 0.01
}
# packer = GreedyPacker(**PARAMS)