In [42]:
using DelimitedFiles

In [43]:
function load_csv_with_headers_matrix(filename::String)
    # Read headers separately
    headers = readline(filename) |> line -> split(line, ",")
    
    # Read data as matrix (skip header row)
    data_matrix = readdlm(filename, ',', Int; skipstart=1)
    return headers, data_matrix
end;

In [44]:
cencus_headers, cencus_data = load_csv_with_headers_matrix("Data/toy_cencus.csv");
survay_headers, survay_data = load_csv_with_headers_matrix("Data/toy_survay.csv");
println(cencus_headers == survay_headers)

true


In [45]:
area = 1  # First area (row 1)
total = cencus_data[area, 1] + cencus_data[area, 2]  # Row 1, Cols 1 + 2
survey_data_length = size(survay_data,1);
random_indices = rand(1:survey_data_length, total);

In [46]:
cols = length(survay_headers)
synth_pop_totals = zeros(Int,1,cols )  # 1×5 Matrix: [0.0 0.0 0.0 0.0 0.0]
for i in random_indices
    for j in 1:cols
        synth_pop_totals[j]+=survay_data[i,j]
    end
end

In [47]:
function mutate(random_indices, survay_data, synth_pop_totals)
    """
    Mutate a synthetic population by swapping one individual for another and calculate the new fitness.
    
    This function performs a mutation operation on a synthetic population by:
    1. Randomly selecting an individual to remove from the current population
    2. Randomly selecting an individual to add from the survey data
    3. Calculating the new population totals and Manhattan distance fitness
    
    # Arguments
    - `random_indices::Vector{Int}`: Vector of indices mapping to individuals in the synthetic population
    - `survay_data::Matrix`: Survey data matrix where each row represents an individual
    - `synth_pop_totals::Matrix`: Current synthetic population totals (row vector)
    
    # Returns
    - `remove_person_pos::Int`: Position in random_indices of the removed individual
    - `add_person_idx::Int`: Index in survay_data of the added individual  
    - `temp_manhattan_dist::Number`: Manhattan distance of the new population from census data
    - `temp_synth_pop_totals::Matrix`: New synthetic population totals after mutation
    """
    
    # Validate input dimensions
    @assert size(synth_pop_totals, 1) == 1 "synth_pop_totals should be a row vector"
    @assert size(survay_data, 2) == size(synth_pop_totals, 2) "Dimension mismatch between survey data and population totals"
    
    # Pick a random individual from the synthetic population to remove
    remove_person_pos = rand(1:length(random_indices))
    remove_person_idx = random_indices[remove_person_pos]
    
    # Extract the individual's data as a row vector (maintains matrix dimensions)
    remove_person = survay_data[remove_person_pos:remove_person_pos, :]
    
    # Pick a random individual from the survey data to add  
    survey_data_length = size(survay_data, 1)
    add_person_idx = rand(1:survey_data_length)
    add_person = survay_data[add_person_idx:add_person_idx, :]
    
    # Create temporary population totals by swapping individuals
    # Note: Both remove_person and add_person are row vectors for dimension compatibility
    temp_synth_pop_totals = synth_pop_totals - remove_person + add_person
    
    # Calculate Manhattan distance between new population and census data
    # Assuming 'area' and 'cencus_data' are defined in outer scope (consider passing as parameters)
    temp_manhattan_dist = sum(abs.(temp_synth_pop_totals - cencus_data[area:area, :]))
    
    return remove_person_pos, add_person_idx, temp_synth_pop_totals , temp_manhattan_dist
end;

In [48]:
manhattan_dist = sum(abs.(synth_pop_totals - cencus_data[area:area,:]));
for _ in 1:1000
    remove_person_pos, add_person_idx, temp_synth_pop_totals , temp_manhattan_dist = mutate(random_indices, survay_data, synth_pop_totals)
    if temp_manhattan_dist < manhattan_dist
        random_indices[remove_person_pos] = add_person_idx
        synth_pop_totals = temp_synth_pop_totals
        manhattan_dist = temp_manhattan_dist
    end
    print("$manhattan_dist ")
end
println()        

62 62 62 62 60 58 56 56 52 50 50 48 48 48 46 46 46 46 46 46 46 46 46 46 46 46 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 44 42 42 42 42 42 42 42 40 40 38 38 38 38 38 38 38 38 38 38 38 38 38 38 36 36 36 36 36 36 36 36 36 36 34 34 34 34 34 32 32 32 32 32 32 32 32 32 32 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 28 28 26 26 26 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 22 22 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 12 12 12 12 12 12 12 12 12 12 12 12 12 12 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 1