In [None]:
def iterative_proportional_fitting(base_population, marginal_totals, max_iterations=1000, tolerance=1e-6):
    # Initialize the synthetic population with the base population
    synthetic_population = [dict(record) for record in base_population]

    # Initialize weights
    for record in synthetic_population:
        record['weight'] = 1.0

    def calculate_current_totals(population, char):
        current_totals = {}
        for record in population:
            value = record[char]
            if value in current_totals:
                current_totals[value] += record['weight']
            else:
                current_totals[value] = record['weight']
        return current_totals

    def adjust_population(population, char, adjustment_factors):
        for record in population:
            value = record[char]
            if value in adjustment_factors:
                record['weight'] *= adjustment_factors[value]

    def normalize_weights(population):
        total_weight = sum(record['weight'] for record in population)
        for record in population:
            record['weight'] /= total_weight

    # Iterate over each demographic characteristic
    for char in marginal_totals:
        for _ in range(max_iterations):
            # Calculate the current marginal totals
            current_totals = calculate_current_totals(synthetic_population, char)

            # Calculate the adjustment factors
            adjustment_factors = {}
            for value, target_total in marginal_totals[char].items():
                if value in current_totals:
                    adjustment_factors[value] = target_total / current_totals[value]
                else:
                    adjustment_factors[value] = 0

            # Adjust the synthetic population
            adjust_population(synthetic_population, char, adjustment_factors)

            # Normalize the weights
            normalize_weights(synthetic_population)

            # Check for convergence
            if all(abs(current_totals[value] - target_total) < tolerance for value, target_total in marginal_totals[char].items()):
                break

    return synthetic_population



In [None]:
# Example usage
base_population = [
    {'age': 20, 'gender': 'male', 'income': 'low'},
    {'age': 30, 'gender': 'female', 'income': 'high'},
    {'age': 40, 'gender': 'male', 'income': 'medium'},
    {'age': 50, 'gender': 'female', 'income': 'low'},
    {'age': 60, 'gender': 'male', 'income': 'high'}
]

# Define marginal totals
marginal_totals = {
    'age': {20: 1, 30: 1, 40: 1, 50: 1, 60: 1},
    'gender': {'male': 3, 'female': 2},
    'income': {'low': 2, 'medium': 1, 'high': 2}
}

# Perform IPF
synthetic_population = iterative_proportional_fitting(base_population, marginal_totals)

# Print the synthetic population
for record in synthetic_population:
    print(record)
