### Sample notebook to search best grouping using GA with deap library  
6 members: UID 0-5  
3 groups: GID 0-2
4 skills: Management, NeedsResearch, Design, Programming  
Skill levels (scores) of each member is in CSV file group_members.csv  
Cost function:  
The sum of mean absolute deviations of total scores of members in each team  
+ absolute deviations of averaged total scores of members among teams  

In [29]:
import random
import numpy as np
import pandas as pd

from deap import algorithms
from deap import base
from deap import creator
from deap import tools

#### Parameters  

In [30]:
csv_in = 'group_prev.csv'
n_group = 4

#### Read member info from CSV file  

In [31]:
df = pd.read_csv(csv_in, delimiter=',', skiprows=0, header=0)
print(df.shape)
print(df.info())
display(df)
n_member = df.shape[0]
display(n_member)

(16, 3)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   prev1   16 non-null     int64
 1   prev2   16 non-null     int64
 2   prev3   16 non-null     int64
dtypes: int64(3)
memory usage: 512.0 bytes
None


Unnamed: 0,prev1,prev2,prev3
0,0,0,0
1,0,1,1
2,0,2,2
3,0,3,3
4,1,0,1
5,1,1,2
6,1,2,3
7,1,3,0
8,2,0,2
9,2,1,3


16

#### Setup of individual and population  

In [32]:
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", np.ndarray, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("attr_grp", random.randint, 0, n_group-1)
toolbox.register("individual", tools.initRepeat, creator.Individual,
                 toolbox.attr_grp, n=n_member)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)



#### Functions for fitness and evolution  

In [33]:
def energy(individual):
    ene = 0.0
    same_gr = 0
    for i in range(n_member-1):
        for j in range (i+1, n_member):
            if df.loc[i]['prev1'] == df.loc[j]['prev1']:
                same_gr += 1
            if df.loc[i]['prev2'] == df.loc[j]['prev2']:
                same_gr += 1
            if df.loc[i]['prev3'] == df.loc[j]['prev3']:
                same_gr += 1
    # print(same_gr)
    for i in range(n_group):
        # n_member
        ene += np.var(np.unique(individual, return_counts=True)[1])
        # n_pair
        ene += same_gr
    return ene,

def mutatOne(individual, indpb):
    for i in range(len(individual)):
        if random.random() < indpb:
            old = individual[i]
            new = old
            while old==new:
                new = random.randint(0, n_group-1)
            individual[i] = new
    return individual,
    
def cxTwoPointCopy(ind1, ind2):
    size = len(ind1)
    cxpt1 = random.randint(1, size)
    cxpt2 = random.randint(1, size - 1)
    if cxpt2 >= cxpt1:
        cxpt2 += 1
    else: # Swap the two cx points
        cxpt1, cxpt2 = cxpt2, cxpt1

    ind1[cxpt1:cxpt2], ind2[cxpt1:cxpt2] = ind2[cxpt1:cxpt2].copy(), ind1[cxpt1:cxpt2].copy()
        
    return ind1, ind2

#### Register functions  

In [34]:
toolbox.register("evaluate", energy)
toolbox.register("mate", cxTwoPointCopy)
toolbox.register("mutate", mutatOne, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)

#### Execute GA
should be run several times and see the robustness of the results  

In [35]:
random.seed(19)
pop = toolbox.population(n=10)
hof = tools.HallOfFame(1, similar=np.array_equal)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)
    
results = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2,
                              ngen=100, stats=stats, halloffame=hof)

gen	nevals	avg    	std    	min    	max
0  	10    	300.556	7.26789	291.556	314
1  	6     	296.133	4.22479	291.556	302
2  	8     	295.089	6.31084	288.889	308
3  	9     	292.067	4.01239	288.889	302
4  	6     	291.133	3.55757	288.889	300
5  	8     	290.867	2.94216	288.889	298
6  	2     	290.378	2.48263	288.889	296
7  	4     	291.067	5.70016	288.889	308
8  	4     	291.311	4.86504	288.889	302
9  	7     	289.422	1.6    	288.889	294.222
10 	6     	288.889	5.68434e-14	288.889	288.889
11 	5     	291.778	4.77855    	288.889	302    
12 	5     	288.889	5.68434e-14	288.889	288.889
13 	5     	290.8  	5.73333    	288.889	308    
14 	9     	292.644	6.51566    	288.889	308    
15 	6     	293.378	5.40032    	288.889	302    
16 	8     	291.578	4.79758    	288.889	302    
17 	6     	288.889	5.68434e-14	288.889	288.889
18 	6     	291.4  	7.53333    	288.889	314    
19 	5     	291.2  	6.93333    	288.889	312    
20 	6     	288.889	5.68434e-14	288.889	288.889
21 	9     	294.622	9.15965    	288.889	314    
22 

#### Output results  

In [36]:
print('HOF:', hof[0])
print(energy(hof[0]))

HOF: [1 3 3 0 3 3 1 1 1 3 0 3 0 1 0 0]
(288.8888888888889,)
