In [2]:
import numpy as np

class SyntheticPopulationGenerator:
    def __init__(self, target_distribution, population_size, individual_features, initial_temperature):
        """
        Args:
            target_distribution: 1D np.array of desired category counts
            population_size: Number of individuals to select
            individual_features: Binary matrix (individuals x features)
            initial_temperature: Starting temperature for annealing
        """
        self.target_distribution = target_distribution
        self.population_size = population_size
        self.individual_features = individual_features
        self.temperature = initial_temperature
        
        # Initialize with random individuals (allow duplicates)
        self.selected_indices = np.random.choice(
            len(individual_features), 
            size=population_size, 
            replace=True
        )
        self.current_population = individual_features[self.selected_indices]
        self.current_counts = self.current_population.sum(axis=0)  # Sum features
        
    def calculate_fitness(self):
        """Compute L1 distance between current and target distributions"""
        return np.sum(np.abs(self.current_counts - self.target_distribution))
    
    def optimization_step(self):
        # Select random individual to potentially replace
        replace_idx = np.random.randint(0, self.population_size)
        individual_to_remove = self.current_population[replace_idx]
        
        # Temporarily remove its contribution
        self.current_counts -= individual_to_remove
        
        # Select candidate individual from full dataset
        candidate = self.individual_features[np.random.randint(0, len(self.individual_features))]
        
        # Evaluate potential change
        potential_counts = self.current_counts + candidate
        current_fitness = np.sum(np.abs(self.current_counts - self.target_distribution))
        potential_fitness = np.sum(np.abs(potential_counts - self.target_distribution))
        fitness_delta = potential_fitness - current_fitness
        
        # Metropolis acceptance criterion
        if (fitness_delta < 0 or 
            np.random.rand() < np.exp(-fitness_delta / self.temperature)):
            # Accept candidate
            self.current_counts += candidate
            self.current_population[replace_idx] = candidate
            self.selected_indices[replace_idx] = candidate_idx  # If tracking indices
        else:
            # Revert removal
            self.current_counts += individual_to_remove
        
        return self.calculate_fitness()

    def get_results(self):
        return {
            'selected_indices': self.best_idx,
            'error': self.best_error
        }