In [347]:
from numpy import inf
import pandas as pd
from scipy.spatial.distance import cdist
import itertools
import pandas as pd
import numpy as np
import random 

In [348]:

from source.pairwise_distance_matrix import pairwise_distance_matrix
from source.GA_util import init_pop, objective, tournament_selection, mutation
from source.xyz_parser import XYZ_reader


In [349]:
def crossover(p1, p2, r_cross):

	# children are copies of parents by default
	c1, c2 = p1.copy(), p2.copy()

	# check for recombination
	if np.random.rand() < r_cross:

		# select crossover point that is not on the end of the string
		pt = np.random.randint(1, len(p1)-2)

		# perform crossover
		c1 = p1[:pt] + p2[pt:]
		c2 = p2[:pt] + p1[pt:]

	return [c1, c2]

In [350]:
def genetic_atom_mapping(educt_path,product_path,n_generations,pop_size,cross_rate,mut_rate):

    product_xyz = XYZ_reader(product_path)

    educt   = pairwise_distance_matrix(XYZ_reader(educt_path)  , inverse = True, unit = True, exp = None)
    product = pairwise_distance_matrix(XYZ_reader(product_path), inverse = True, unit = True, exp = None)

    # get the reference which atoms can get which places
    reference = [product_xyz.loc[product_xyz['Element'] == element].index.to_list() for element in product_xyz['Element'].unique()]
    
    options = list()
    for i in reference:
        for column in i :
            options.append(i)
     
    # initial population based on estimated best candidate 
    pop = [ j for i,j in enumerate(init_pop_score(educt,product,reference)) if i < (pop_size - 1)]

    # set initial guess
    best, best_eval = 0, objective(pop[0],educt,product)

    # enumerate generations
    for gen in range(n_generations):
        
        # evaluate all candidates in the population
        scores = [objective(i,educt,product) for i in pop]

        #print(scores)
        #check if there's a new best in this generation
        #best,best_eval = update_best(gen,pop,scores,best,best_eval)
        for i in range(len(scores)):
            if scores[i] > best_eval:
                best, best_eval = pop[i], scores[i]
                print(">%d, new best f(%s) = %.3f" % (gen,  pop[i], scores[i]))
                
        # select parents in turnament selection
        parents = [tournament_selection(pop,scores) for _ in range(pop_size)]

        # create the next generation    
        children = list()
        for i in range(0,pop_size,2):

            # get pair of parents
            p1, p2 = parents[i], parents[i+1]

            for child in [p1,p2]:#crossover(p1,p2,cross_rate):

                child = mutation(child,options,mut_rate)

                children.append(child)

        pop = children

    return [best,best_eval]


In [351]:
genetic_atom_mapping('educt.xyz','product.xyz',10000,40,0.2,0.1)

>0, new best f((7, 1, 2, 3, 4, 6, 5, 0, 8, 9)) = 9.016
>0, new best f((7, 1, 2, 3, 5, 6, 4, 0, 8, 9)) = 9.057
>2, new best f([7, 1, 4, 5, 2, 6, 3, 0, 8, 9]) = 9.163
>3, new best f([7, 1, 0, 3, 2, 5, 6, 4, 8, 9]) = 9.482
>177, new best f([7, 0, 1, 6, 2, 4, 3, 5, 9, 8]) = 9.533


[[7, 0, 1, 6, 2, 4, 3, 5, 9, 8], 9.532587436487301]