In [97]:
import heapq
import os
import random
import time

import networkx as nx
import numpy as np
import math
import csv
from sys import maxsize


In [98]:
# def gen_prufer(degree_constrained, n):
#     result = []
#     candidates = list(range(n))
#     while len(result) < n:
#         # Lấy một số ngẫu nhiên từ danh sách candidates
#         x = random.choice(candidates)
#         # Nếu số này chưa xuất hiện quá m lần trong danh sách kết quả, thêm số này vào danh sách kết quả
#         if result.count(x) < degree_constrained - 1:
#             result.append(x)
#         # Nếu số này đã xuất hiện quá m lần, loại bỏ số này khỏi danh sách candidates
#         if result.count(x) == degree_constrained - 1:
#             candidates.remove(x)
#     return result

In [99]:
def gen_prufer(degree_constrained, n):
    # Tạo danh sách ban đầu
    initial_list = list(range(n))
    # Trộn ngẫu nhiên danh sách ban đầu
    random.shuffle(initial_list)
    # Tạo danh sách mới bằng cách lấy (degree_constrained-1) phần tử đầu tiên
    prufer_sequence = initial_list[:degree_constrained - 1]
    # Nếu danh sách mới chưa đủ n-2 phần tử thì lặp lại
    while len(prufer_sequence) < n - 2:
        # Trộn ngẫu nhiên lại danh sách ban đầu
        random.shuffle(initial_list)
        # Lấy phần tử đầu tiên của danh sách ban đầu nếu không nằm trong danh sách mới quá degree_constrained
        for item in initial_list:
            if prufer_sequence.count(item) < degree_constrained:
                prufer_sequence.append(item)
                break
    # Trả về danh sách mới
    return prufer_sequence

In [100]:
# Đọc dữ liệu từ tệp CSV và lưu trữ nó trong danh sách
def get_distance_table(file_path):
    records = []
    with open(file_path, 'r') as csvfile:
        reader = csv.reader(csvfile)
        next(reader) # Skip header row
        for row in reader:
            records.append(row)

    # Tạo từ điển để lưu trữ khoảng cách giữa các điểm
    distances_table = {}

    # Tính toán khoảng cách giữa các điểm và lưu trữ kết quả vào từ điển
    for i in range(len(records)):
        for j in range(i + 1, len(records)):
            point1 = (float(records[i][1]), float(records[i][2]))
            point2 = (float(records[j][1]), float(records[j][2]))
            distance = math.dist(point1, point2)
            key = (int(records[i][0]), int(records[j][0]))
            distances_table[key] = distance

    # In ra từ điển khoảng cách
    return distances_table


In [101]:
def get_distance(edge, distances_table):
    if edge in distances_table:
        return distances_table[edge]
    if edge[::-1] in distances_table:
        return distances_table[edge[::-1]]

In [102]:
def compute_degrees(edges):
    degrees = {}
    for edge in edges:
        for vertex in edge:
            if vertex not in degrees:
                degrees[vertex] = 0
            degrees[vertex] += 1
    return degrees


In [103]:
def calculate_fitness(prufer_sequence, degree_constrained, distances_table):
    tree = nx.from_prufer_sequence(prufer_sequence)
    edges = tree.edges
    degrees = list(compute_degrees(edges).values())
    # print(degrees)

    # Check if the degrees more than target degrees
    if any(x > degree_constrained for x in degrees):
        return 9999999

    cost = 0
    for edge in edges:
        cost = cost + get_distance(edge, distances_table)
    return cost

In [104]:
def crossover(parent1, parent2, crossover_rate=1):
    offspring1 = parent1[:]
    offspring2 = parent2[:]
    # print(parent1, parent2)
    if random.random() < crossover_rate:
        crossover_point = random.randint(1, len(parent1) - 2)
        # print("c point: ", crossover_point)
        offspring1[crossover_point:], offspring2[crossover_point:] = offspring2[crossover_point:], offspring1[
                                                                                                   crossover_point:]
    return offspring1, offspring2


In [105]:
def mutate(individual, mutation_rate=0.1):
    if random.random() < mutation_rate:
        idx1, idx2 = random.sample(range(len(individual)), 2)
        individual[idx1], individual[idx2] = individual[idx2], individual[idx1]
    return individual


def mutate_pop(population, mutation_rate):
    new_pop = []
    for individual in population:
        if random.random() < mutation_rate:
            idx1, idx2 = random.sample(range(len(individual)), 2)
            individual[idx1], individual[idx2] = individual[idx2], individual[idx1]
            new_pop.append(individual)
    return new_pop

In [106]:
# def fix_off(offspring, degree_constrained):
#     counts = {}
#     used = set()
#     new_numbers = []
#
#     for num in offspring:
#         counts[num] = counts.get(num, 0) + 1
#         if counts[num] > degree_constrained:
#             for i in range(0, len(offspring)):
#                 if i not in used:
#                     new_numbers.append(i)
#                     used.add(i)
#                     break
#         else:
#             new_numbers.append(num)
#
#     return new_numbers

In [107]:
def sort_by_value(dict):
    return sorted(dict.items(), key=lambda x: x[1])


def repair(prufer_sequence, degree_constrained):
    set_ver = set(range(len(prufer_sequence) + 2))
    count = {}
    for ver in set_ver:
        count[ver] = prufer_sequence.count(ver)
    count = dict(sort_by_value(count))

    for i in range(len(prufer_sequence)):
        if count[prufer_sequence[i]] > degree_constrained-1:
            j = next(iter(count))
            count.update([(prufer_sequence[i], count[prufer_sequence[i]] - 1), (j, count[j] + 1)])
            prufer_sequence[i] = j
            count = dict(sort_by_value(count))
    return prufer_sequence

In [108]:
def get_new_pop(population, population_size, degree_constrained, distances_table):
    fitness_values = [calculate_fitness(individual, degree_constrained, distances_table) for
                      individual in population]
    sorted_fitness = sorted(fitness_values)
    index_dict = {val: idx for idx, val in enumerate(fitness_values)}
    # Lấy danh sách index của population đã được sắp xếp theo fitness
    pop_index = [index_dict[val] for val in sorted_fitness]
    # Lấy index của [population_size] phần tử đầu (có cost nhỏ nhất)
    new_pop_index = pop_index[:population_size]
    new_pop = [population[i] for i in new_pop_index]
    return new_pop

def create_new_population(population, fitness, pop_size):
    # create a list of individuals with their fitnesses
    individuals_fitness = [(population[i], fitness[i]) for i in range(len(population))]

    # create a new population with the n/2 best individuals
    best_individuals = heapq.nsmallest(pop_size // 2, individuals_fitness, key=lambda x: x[1])
    new_pop = [individual[0] for individual in best_individuals]

    # create a list of remaining individuals
    remaining_individuals = individuals_fitness[pop_size // 2:]
    # add n/2 individuals chosen randomly with the criterion of the best fitness
    for _ in range(pop_size // 2):
        # print("remaining", remaining_individuals)
        # randomly select two individuals
        ind_indices = np.random.choice(len(remaining_individuals), size=2, replace=False)
#         print(ind_indices)
        ind1, ind2 = remaining_individuals[ind_indices[0]], remaining_individuals[ind_indices[1]]
#         print(ind1, ind2)
        # add the individual with the highest fitness to the new population
        if ind1[1] > ind2[1]:
            new_pop.append(ind1[0])
            remaining_individuals.remove(ind1)
        else:
            new_pop.append(ind2[0])
            remaining_individuals.remove(ind2)

    return new_pop

In [109]:
def run_ga(n, degree_constrained, distances_table, population_size=50, crossover_rate=0.8, mutation_rate=0.1,
           max_generations=50):
    population = [gen_prufer(degree_constrained, n) for _ in range(population_size)]

    for generation in range(max_generations):
        # Tính một mảng fitness value của population
        fitness_values = [calculate_fitness(prufer_sequence, degree_constrained, distances_table) for
                          prufer_sequence in population]
        # print(population)
        # print(fitness_values)
        new_population = []
        while len(new_population) < population_size:
            i, j = np.random.choice(range(population_size), size=2, replace=False,
                                    p=np.array(fitness_values) / sum(fitness_values))
            parent1 = population[i]
            parent2 = population[j]
            offspring1, offspring2 = crossover(parent1, parent2)
            new_population.append(offspring1)
            new_population.append(offspring2)
        for i in range(len(new_population)):
            mutate(new_population[i])
#             print(new_population[i])
            repair(new_population[i], degree_constrained)
#             print(new_population[i])

#         print(new_population)
        new_fitness = [calculate_fitness(prufer_sequence, degree_constrained, distances_table) for
                          prufer_sequence in new_population]
#         print(new_fitness)
        fitness = fitness_values + new_fitness
        
#         print(fitness)
        population = population + new_population
        population = create_new_population(population, fitness, population_size)
        # population = get_new_pop(population, population_size, degree_constrained, distances_table)

    return population

In [110]:
degree_constrained = 3
n = 15
distances_table = get_distance_table("data/15_nodes/2_15_fi10k.csv")

population = run_ga(n, degree_constrained, distances_table)
print(population)
print(population[0])
print(calculate_fitness(population[0], degree_constrained, distances_table))
fitness_values = [calculate_fitness(prufer_sequence, degree_constrained, distances_table) for prufer_sequence in
                          population]
print(fitness_values)

G = nx.from_prufer_sequence(population[0])
print(G.edges())

[[1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10, 12, 2, 2, 3, 0, 11, 1, 3, 6, 11], [1, 7, 10

In [111]:
# degree_constrained = 2
# n = 7
# distances_table = get_distance_table("data/7_wi29.csv")
#
# population = run_ga(n, degree_constrained, distances_table)
# print(population[0])
# print(calculate_fitness(population[0], degree_constrained, distances_table))
#
# G = nx.from_prufer_sequence(population[0])
# print(G.edges())

In [112]:
import csv
def write_result(results, filename):
    path_result = "result"
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['STT', 'Best Cost', 'Best Solution', 'Best Tree', 'Time'])

        # Ghi từng dòng kết quả
        for result in results:
            writer.writerow(result)

In [113]:
degree_constrained = 3
n = 15
path = "data/15_nodes"
list_file = os.listdir(path)
print(list_file)
for file in list_file:
    print(file)
    results = []
    path_to_data = os.path.join(path, file)
    dis_tab = get_distance_table(path_to_data)
    path_to_result = path_to_data.replace("data", "result")
    path_to_result = path_to_result.replace("\\", "/")
    # print(path_to_result)
    for i in range(10):
        print("Loop: ", i+1, "...")
        start_time = time.time()
        population = run_ga(n, degree_constrained, dis_tab)
        best_solution = population[0]
        best_cost = calculate_fitness(best_solution, n, dis_tab)
        best_tree = nx.from_prufer_sequence(population[0]).edges()
        end_time = time.time()
        elapsed_time = end_time - start_time
        row = (i+1, best_solution, best_cost, best_tree, elapsed_time)
        results.append(row)
    print("Result 10 times", results)

    # print(path_to_result)
    write_result(results, path_to_result)

['10_15_fi10k.csv', '11_15_fi10k.csv', '12_15_fi10k.csv', '13_15_fi10k.csv', '14_15_fi10k.csv', '15_15_fi10k.csv', '16_15_fi10k.csv', '17_15_fi10k.csv', '18_15_fi10k.csv', '19_15_fi10k.csv', '1_15_fi10k.csv', '20_15_fi10k.csv', '21_15_fi10k.csv', '22_15_fi10k.csv', '23_15_fi10k.csv', '24_15_fi10k.csv', '25_15_fi10k.csv', '26_15_fi10k.csv', '27_15_fi10k.csv', '28_15_fi10k.csv', '29_15_fi10k.csv', '2_15_fi10k.csv', '30_15_fi10k.csv', '3_15_fi10k.csv', '4_15_fi10k.csv', '5_15_fi10k.csv', '6_15_fi10k.csv', '7_15_fi10k.csv', '8_15_fi10k.csv', '9_15_fi10k.csv']
10_15_fi10k.csv
Loop:  1 ...
Loop:  2 ...
Loop:  3 ...
Loop:  4 ...
Loop:  5 ...
Loop:  6 ...
Loop:  7 ...
Loop:  8 ...
Loop:  9 ...
Loop:  10 ...
Result 10 times [(1, [5, 6, 1, 4, 10, 2, 9, 0, 13, 3, 4, 2, 9], 21522.776425362248, EdgeView([(0, 12), (0, 13), (1, 6), (1, 4), (2, 10), (2, 4), (2, 9), (3, 13), (3, 4), (5, 7), (5, 6), (8, 10), (9, 11), (9, 14)]), 1.0354063510894775), (2, [7, 2, 7, 5, 10, 3, 8, 2, 12, 14, 3, 12, 0], 22932.

In [114]:
degree_constrained = 3
n = 30
path = "data/30_nodes"
list_file = os.listdir(path)
print(list_file)
for file in list_file:
    print(file)
    results = []
    path_to_data = os.path.join(path, file)
    dis_tab = get_distance_table(path_to_data)
    path_to_result = path_to_data.replace("data", "result")
    path_to_result = path_to_result.replace("\\", "/")
    # print(path_to_result)
    for i in range(10):
        print("Loop: ", i+1, "...")
        start_time = time.time()
        population = run_ga(n, degree_constrained, dis_tab)
        best_solution = population[0]
        best_cost = calculate_fitness(best_solution, n, dis_tab)
        best_tree = nx.from_prufer_sequence(population[0]).edges()
        end_time = time.time()
        elapsed_time = end_time - start_time
        row = (i+1, best_solution, best_cost, best_tree, elapsed_time)
        results.append(row)
    print("Result 10 times", results)

    # print(path_to_result)
    write_result(results, path_to_result)

['10_30_fi10k.csv', '11_30_fi10k.csv', '12_30_fi10k.csv', '13_30_fi10k.csv', '14_30_fi10k.csv', '15_30_fi10k.csv', '16_30_fi10k.csv', '17_30_fi10k.csv', '18_30_fi10k.csv', '19_30_fi10k.csv', '1_30_fi10k.csv', '20_30_fi10k.csv', '21_30_fi10k.csv', '22_30_fi10k.csv', '23_30_fi10k.csv', '24_30_fi10k.csv', '25_30_fi10k.csv', '26_30_fi10k.csv', '27_30_fi10k.csv', '28_30_fi10k.csv', '29_30_fi10k.csv', '2_30_fi10k.csv', '30_30_fi10k.csv', '3_30_fi10k.csv', '4_30_fi10k.csv', '5_30_fi10k.csv', '6_30_fi10k.csv', '7_30_fi10k.csv', '8_30_fi10k.csv', '9_30_fi10k.csv']
10_30_fi10k.csv
Loop:  1 ...
Loop:  2 ...
Loop:  3 ...
Loop:  4 ...
Loop:  5 ...
Loop:  6 ...
Loop:  7 ...
Loop:  8 ...
Loop:  9 ...
Loop:  10 ...
Result 10 times [(1, [9, 11, 7, 0, 5, 6, 4, 15, 10, 13, 4, 22, 24, 0, 21, 14, 18, 28, 13, 17, 2, 7, 8, 10, 3, 1, 21, 6], 66786.03883121809, EdgeView([(0, 16), (0, 24), (0, 21), (1, 3), (1, 21), (2, 17), (2, 7), (3, 10), (4, 20), (4, 26), (4, 22), (5, 19), (5, 6), (6, 21), (6, 29), (7, 11), 