In [1]:
import numpy as np
import pandas as pd
from math import sqrt
from random import sample
import copy

In [2]:
# Мутация
def mutation(population_number, n, k):
    index_list = []
    for i in range(population_number):
        chromosome = sample(range(n - 1), k)
        chromosome.sort()
        index_list.append(chromosome)
    return index_list


# Проверка листа на одинаковость
def check_equal(index_list):
    new_list = []
    for element in index_list:
        if element not in new_list:
            new_list.append(element)
    return len(new_list) == 1

# Проверка хромосомы на дубликаты
def check_duplicates(chromosome):
    return len(chromosome) != len(set(chromosome))


def crossover(population_number, father_list, mother_list, index_list, temp_list):
    for i in range(population_number):
        j = father_list[i]
        z = mother_list[i]
        for k in range(len(index_list[i])):
            if k % 2 == 0:
                index_list[i][k] = temp_list[j][k]
            else:
                index_list[i][k] = temp_list[z][k]
    return index_list


def get_mothers(population_number, father_list, prob_list):
    mother_list = []
    for i in range(population_number):
        m = tournament_selection(population_number, prob_list)
        while m[0] == father_list[i]:
            m = tournament_selection(population_number, prob_list)
        mother_list.append(m[0])
    return mother_list


# Турнирная селекция
def tournament_selection(population_number, prob_list):
    resList = []
    for i in range(population_number):
        someList = sample(range(len(prob_list)), population_number // 2)
        res = 0
        maxNumber = -1
        for i in range(len(someList)):
            if (prob_list[someList[i]] > maxNumber):
                res = someList[i]
                maxNumber = prob_list[someList[i]]
        resList.append(res)
    return resList


# Расчёт QCFE - Quality criterion of features ensemble (критерий качества ансамбля признаков)
def calculate_qcfe(chromosome, data, n, k):
    corr_list = []
    for i in chromosome:
        corr_list.append(data[i][n])
    first_value = k * np.mean(corr_list)  # степень зависимости признаков

    corr_list = []
    i = 0
    j = 1
    while i != len(chromosome) - 1:
        z = j
        while z < len(chromosome):
            corr_list.append(data[i][z])
            z += 1
        i += 1
        j += 1
    second_value = sqrt(k + k * (k - 1) * np.mean(corr_list))  # степень независимости признаков

    return first_value / second_value


# Генерирование первой популяции
def generate_first_population(population_number, n, k):
    index_list = []
    for i in range(population_number):
        chromosome = sample(range(n - 1), k)
        chromosome.sort()
        index_list.append(chromosome)
    return index_list


def genetic_algorithm(data, col_names, k, population_number):
    n = len(col_names)  # общее количество признаков

    # лист индексов, который будет первой популяцией наших особей
    index_list = generate_first_population(population_number, n, k)
    best_qcfe = -1
    ensemble = []

    # ГЕНЕТИЧЕСКИЙ АЛГОРИТМ
    for index in range(1000):
        qcfe_list = []
        for chromosome in index_list:
            qcfe = calculate_qcfe(chromosome, data, n, k)
            qcfe_list.append(qcfe)

        max_qcfe = -1
        max_index = -1
        for z in range(len(index_list)):
            if qcfe_list[z] > max_qcfe:
                max_qcfe = qcfe_list[z]
                max_index = z
        if max_qcfe > best_qcfe:
            fucking_index = 0
            best_qcfe = max_qcfe
            ensemble = copy.deepcopy(index_list[max_index])

        prob_list = []
        for qcfe in qcfe_list:
            prob_list.append(qcfe / sum(qcfe_list))

        # Селекция особей (отцов и матерей)
        father_list = tournament_selection(population_number, prob_list)
        mother_list = get_mothers(population_number, father_list, prob_list)

        # Кроссовер
        temp_list = copy.deepcopy(index_list)
        index_list = crossover(population_number, father_list, mother_list, index_list, temp_list)
        
        
        for chromosome in index_list:
            chromosome.sort()
        for z in range(len(index_list)):
            if check_duplicates(index_list[z]):
                index_list[z] = sample(range(n - 1), k)
                index_list[z].sort()
        if check_equal(index_list):
            index_list = mutation(population_number, n, k)
        
        fucking_index += 1
        if fucking_index == 500:
            break
    return best_qcfe, ensemble


# # sensor_type = 'reinforced'
# sensor_list = ['convex', 'linear', 'reinforced', 'xmixed', 'ymixed']
# # sensor_list = ['linear']
# for sensor_type in sensor_list:
#     print(sensor_type)
#     data = pd.read_excel('corr(' + sensor_type + ').xlsx')  # загрузка данных
#     df = pd.read_excel(sensor_type + '.xlsx')
#     col_names = list(df.columns[:-1])
#     population_number = 50  # количество особей в популяции
#     max_qcfe = 0
#     best_ensemble = []
#     best_k = 0
#     qcfe_list = []
#     for k in range(10, 41):
#         print(k)
#         best_qcfe, ensemble = genetic_algorithm(data, col_names, k, population_number)
#         if best_qcfe > max_qcfe:
#             max_qcfe = best_qcfe
#             best_ensemble = ensemble
#             best_k = k
#         qcfe_list.append(best_qcfe)

#     # вывод результатов
#     text = ''
#     for index in best_ensemble:
#         text += col_names[index] + ';'
#     print('k = ', best_k)
#     print(text)
#     print(max_qcfe)
#     print()
#     for qfce in qcfe_list:
#         print(qfce)
#     print()

In [None]:
print(sensor_type)
data = pd.read_excel('/home/mhoncharuk/Education/liver_disease_recognizer/notebooks/GLCM_Diff_corr.xlsx') 
df = pd.read_excel(sensor_type + '.xlsx')
col_names = list(df.columns[:-1])
population_number = 50  # количество особей в популяции
max_qcfe = 0
best_ensemble = []
best_k = 0
qcfe_list = []
for k in range(10, 41):
    print(k)
    best_qcfe, ensemble = genetic_algorithm(data, col_names, k, population_number)
    if best_qcfe > max_qcfe:
        max_qcfe = best_qcfe
        best_ensemble = ensemble
        best_k = k
    qcfe_list.append(best_qcfe)

# вывод результатов
text = ''
for index in best_ensemble:
    text += col_names[index] + ';'
print('k = ', best_k)
print(text)
print(max_qcfe)
print()
for qfce in qcfe_list:
    print(qfce)
print()