### Sample notebook to search best grouping using GA with deap library  
6 members: UID 0-5  
3 groups: GID 0-2
4 skills: Management, NeedsResearch, Design, Programming  
Skill levels (scores) of each member is in CSV file group_members.csv  
Cost function:  
The sum of mean absolute deviations of total scores of members in each team  
+ absolute deviations of averaged total scores of members among teams  

In [11]:
import random
import numpy as np
import pandas as pd

from deap import algorithms
from deap import base
from deap import creator
from deap import tools

#### Parameters  

In [12]:
csv_in = 'group_members.csv'
n_group = 3

#### Read member info from CSV file  

In [13]:
df = pd.read_csv(csv_in, delimiter=',', skiprows=0, header=0)
print(df.shape)
print(df.info())
display(df)
n_member = df.shape[0]
n_skill = df.shape[1]

(6, 4)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   Management     6 non-null      int64
 1   NeedsResearch  6 non-null      int64
 2   Design         6 non-null      int64
 3   Programming    6 non-null      int64
dtypes: int64(4)
memory usage: 320.0 bytes
None


Unnamed: 0,Management,NeedsResearch,Design,Programming
0,8,2,3,5
1,4,1,5,7
2,4,6,2,2
3,5,6,7,2
4,2,4,3,6
5,4,5,1,3


#### Setup of individual and population  

In [14]:
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", np.ndarray, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("attr_grp", random.randint, 0, n_group-1)
toolbox.register("individual", tools.initRepeat, creator.Individual,
                 toolbox.attr_grp, n=n_member)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)



#### Functions for fitness and evolution  

In [15]:
def energy(individual):
    ene = 0.0
    tot_mem = df.sum(axis=1)
    ave_all = tot_mem.sum() / n_group
    for i in range(n_group):
        tot_skill_grp = df[ individual==i ].sum()
        ene += tot_skill_grp.mad()
        ene += np.abs(tot_mem[ individual==i ].sum() - ave_all)
    return ene,

def mutatOne(individual, indpb):
    for i in range(len(individual)):
        if random.random() < indpb:
            old = individual[i]
            new = old
            while old==new:
                new = random.randint(0, n_group-1)
            individual[i] = new
    return individual,
    
def cxTwoPointCopy(ind1, ind2):
    size = len(ind1)
    cxpt1 = random.randint(1, size)
    cxpt2 = random.randint(1, size - 1)
    if cxpt2 >= cxpt1:
        cxpt2 += 1
    else: # Swap the two cx points
        cxpt1, cxpt2 = cxpt2, cxpt1

    ind1[cxpt1:cxpt2], ind2[cxpt1:cxpt2] = ind2[cxpt1:cxpt2].copy(), ind1[cxpt1:cxpt2].copy()
        
    return ind1, ind2

#### Register functions  

In [16]:
toolbox.register("evaluate", energy)
toolbox.register("mate", cxTwoPointCopy)
toolbox.register("mutate", mutatOne, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)

#### Execute GA
should be run several times and see the robustness of the results  

In [17]:
random.seed(11)
pop = toolbox.population(n=10)
hof = tools.HallOfFame(1, similar=np.array_equal)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)
    
results = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2,
                              ngen=100, stats=stats, halloffame=hof)

gen	nevals	avg    	std    	min    	max    
0  	10    	55.1208	15.3213	31.9167	71.3333
1  	10    	48.0917	18.5571	16.1667	69.4167
2  	5     	40.5083	23.3757	16.1667	80.3333
3  	10    	31.2833	10.9948	16.1667	46.7083
4  	7     	22.9792	8.05552	16.1667	35.2917
5  	5     	20.8667	9.48934	16.1667	45.6667
6  	8     	18.8417	8.025  	16.1667	42.9167
7  	5     	24.3667	10.6535	16.1667	45.6667
8  	4     	26.1875	18.7572	16.1667	77.2083
9  	5     	15.8917	0.825  	13.4167	16.1667
10 	8     	22.1708	10.0712	13.4167	42.2083
11 	4     	29.3333	19.1318	13.4167	69.9167
12 	6     	19.2042	10.5398	13.4167	45.0417
13 	8     	21.85  	12.9512	13.4167	43.4167
14 	8     	13.4167	1.77636e-15	13.4167	13.4167
15 	4     	16.7083	9.45229    	13.4167	45.0417
16 	8     	16.4333	9.05       	13.4167	43.5833
17 	4     	21.6708	13.1649    	13.4167	50.7083
18 	8     	17.7333	8.74276    	13.4167	38.0833
19 	3     	15.4917	6.225      	13.4167	34.1667
20 	2     	19.0167	11.2089    	13.4167	42.4167
21 	3     	21.3583	12.3328

#### Output results  

In [18]:
print('HOF:', hof[0])
print(energy(hof[0]))

HOF: [1 0 0 2 1 2]
(7.416666666666664,)
