In [14]:
import csv
import numpy as np

In [29]:
def read_microdata(filename):
    # Open the file in read mode
    with open(filename, mode='r', newline='') as file:
        reader = csv.reader(file)
        first_line = next(reader)
        headers = first_line[1:]
        data_array = np.empty((0, len(headers)), dtype=float)
        micro_data_ids =[]
        # Iterate through each row in the CSV file
        for row in reader:
            # Process each row
            micro_data_ids.append(row[0])
            data_array = np.append(data_array, [np.array(row[1:], dtype=float)], axis=0)
    return {
        'headers':headers,
        'micro_data_ids':micro_data_ids,
        'data_array':data_array
    }

In [30]:
def read_constraints(filename):
    # Open the file in read mode
    with open(filename, mode='r', newline='') as file:
        reader = csv.reader(file)
        first_line = next(reader)
        headers = first_line[2:]
        data_array = np.empty((0, len(headers)), dtype=float)
        geo_data_ids =[]
        population_totals=[]
        # Iterate through each row in the CSV file
        for row in reader:
            # Process each row
            geo_data_ids.append(row[0])
            population_totals.append(row[1])
            data_array = np.append(data_array, [np.array(row[2:], dtype=float)], axis=0)
    return {
        'headers':headers,
        'geo_data_ids':geo_data_ids,
        'population_totals':population_totals,
        'data_array':data_array
    }

In [31]:
micro_data = read_microdata('testdata/microdata_encoded.csv')
constraint_data = read_constraints('testdata/constraint_targets.csv')

In [32]:
micro_data['headers'] == constraint_data['headers']

True

In [None]:
class SimulatedAnnealing:
    def __init__(self, attributes):
        self.pos = attributes['pos']
        self.geo_id = attributes['geo_id']
        self.population_size = attributes['population_size']
        self.constraints = attributes['constraints']
        self.micro_data = attributes['micro_data']
        rows, cols = self.micro_data.shape
        self.micro_rows = rows
        self.micro_cols = cols

        self.fraction = 1 / self.population_size
        print(self.fraction)

        # Create initial population
        self.population = [np.random.randint(self.micro_rows) for _ in range(int(self.population_size))]
        self.macro_data = np.zeros(self.micro_cols)

        for p in self.population:
            self.macro_data = self.macro_data + (self.micro_data[p] * self.fraction)

    def chi_squared_distance(self,):
        epsilon = 1e-10  # Avoid division by zero
        return np.sum((self.constraints - self.macro_data) ** 2 / (self.constraints + epsilon))

    def kl_divergence(self):
        epsilon = 1e-10  # Avoid log(0)
        return np.sum(self.constraints * np.log((self.constraints + epsilon) / (self.macro_data + epsilon)))

    pass

In [115]:

attributes = {'pos':0,
                'geo_id': constraint_data['geo_data_ids'][0],
                'population_size':float(constraint_data['population_totals'][0]),
                'constraints':constraint_data['data_array'],
                'micro_data':micro_data['data_array']
                }
test = SimulatedAnnealing(attributes)

8.814455707360071e-05


In [118]:
test.chi_squared_distance()

np.float64(7.893688397038704)