In [162]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
from typing import List, Optional, Callable, Tuple
from numpy import random
import itertools
import math
import statistics
from tabulate import tabulate
import time

from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error as mse_score
from sklearn.metrics import mean_absolute_error as mae_score
from sklearn.metrics import mean_absolute_percentage_error as mape_score

<font size = 6> Genetic algo with Adaptive Mutation</font>


Fitness based adaptive Mutation<br>
https://neptune.ai/blog/adaptive-mutation-in-genetic-algorithm-with-python-examples<br>
https://www.atlantis-press.com/article/7393.pdf

Rank based adaptive mutation <br>
https://arxiv.org/pdf/2104.08842

A good crossover operator for real encoding was proposed by K. Deb in 1995. Here is the paper: http://www.complex-systems.com/pdf/09-2-2.pdf

Parameters 
1. Param Grid: Given
2. Genome: One combination of Param Grid
3. Population: All combinations of Param Grid considered

Functions
1. Genetic Representation of Genome, Population (solutions)
2. Generating new solutions
3. Fitness Value functions: MSE, MAE, etc
4. Elitism/Selection Function: Choose which Genome to undergo CrossOver, can use python's choice function, weight = Fitness Value
5. Cross over Function
6. Mutation Algo: Usually mutation rate is set to 1/L, where L is the length of the bitstring.
7. Evolution Algo

Classes
1. Genome
2. Population
3. GenomSearch
    - GenomSearchCV


Limitations
1. Must be a range
2. cannot pipeline

Future:
1. Allow for more flexible formatting of param_grid
2. Take care of edge cases
3. Convert to binary (maybe faster?)
4. In population.mutation() -> Maybe can update f_max and f_avg as we are mutating, else can try parallel processing

Fitness Function:
Fine-tune the fitness function to better represent the problem's objectives and constraints. A well-designed fitness function guides the algorithm toward the desired solutions.

Constraint Handling:
Implement techniques for handling constraints effectively. Penalization or repair methods can ensure that generated solutions comply with the problem constraints.

Diversity Preservation:
Encourage diversity within the population to prevent premature convergence to local optima. Diversity-preserving techniques, such as crowding or fitness sharing, can help in maintaining a diverse set of solutions.

Hybrid Approaches:
Consider combining the genetic algorithm with other optimization techniques, such as local search or simulated annealing, to explore the search space more effectively.

Parameter Tuning:
Perform parameter tuning using techniques like grid search or genetic algorithms themselves to optimize the genetic algorithm's parameters for your specific problem.

Parallelization:
If your problem domain allows it, consider parallelizing the genetic algorithm to run multiple populations concurrently. This can significantly speed up the search process.




In [146]:
"""
Input example:

params_grid = {
    'n': {range: [low, high], dtype: int}   ## (Low, High) represents the range, dtype = int, float etc
    'objective': (range: [cat1, cat2, cat3,...], dtype: category }
    'gamma': {range: [low, high], dtype: float}
}

available dtypes:
category, int, float

population = [Genome1, Genome2, Genome3, ...]


need to make one for pipelining so like "PCA__n componenes"
"""

'\nInput example:\n\nparams_grid = {\n    \'n\': {range: [low, high], dtype: int}   ## (Low, High) represents the range, dtype = int, float etc\n    \'objective\': (range: [cat1, cat2, cat3,...], dtype: category }\n    \'gamma\': {range: [low, high], dtype: float}\n}\n\navailable dtypes:\ncategory, int, float\n\npopulation = [Genome1, Genome2, Genome3, ...]\n\n\nneed to make one for pipelining so like "PCA__n componenes"\n'

In [147]:
"""Helpful functions:
1. Timsort, insertion, merge
2. Merge dictionaries
"""


MIN_MERGE = 32


def calcMinRun(n):
    """Returns the minimum length of a
    run from 23 - 64 so that
    the len(array)/minrun is less than or
    equal to a power of 2.

    e.g. 1=>1, ..., 63=>63, 64=>32, 65=>33,
    ..., 127=>64, 128=>32, ...
    """
    r = 0
    while n >= MIN_MERGE:
        r |= n & 1
        n >>= 1
    return n + r


# This function sorts array from left index to
# to right index which is of size atmost RUN
def insertionSort(pop, left, right, dir):
    for i in range(left + 1, right + 1):
        j = i

        if dir == 1:       ## Ascendimg
            while j > left and pop[j] < pop[j - 1]:
                pop[j], pop[j - 1] = pop[j - 1], pop[j]
                j -= 1
        else:               ## Descending
            while j > left and pop[j] > pop[j - 1]:
                pop[j], pop[j - 1] = pop[j - 1], pop[j]
                j -= 1


# Merge function merges the sorted runs
def merge(pop, l, m, r, dir = 1):

    # original array is broken in two parts
    # left and right array
    len1, len2 = m - l + 1, r - m
    left, right = [], []
    for i in range(0, len1):
        left.append(pop[l + i])
    for i in range(0, len2):
        right.append(pop[m + 1 + i])

    i, j, k = 0, 0, l

    # after comparing, we merge those two array
    # in larger sub array
    if dir == 1:   ## Ascending
        while i < len1 and j < len2:
            if left[i] <= right[j]:
                pop[k] = left[i]
                i += 1

            else:
                pop[k] = right[j]
                j += 1

            k += 1
    else:           ## Descending
        while i < len1 and j < len2:
            if left[i] >= right[j]:
                pop[k] = left[i]
                i += 1

            else:
                pop[k] = right[j]
                j += 1

            k += 1

    # Copy remaining elements of left, if any
    while i < len1:
        pop[k] = left[i]
        k += 1
        i += 1

    # Copy remaining element of right, if any
    while j < len2:
        pop[k] = right[j]
        k += 1
        j += 1

# Iterative Timsort function to sort the
# array[0...n-1] (similar to merge sort)
def tim_sort(pop, dir = 1):
    """
    Purpose:
    Sorts population based on fitness values

    Input: 
    pop: Takes in a population
    dir: '1' - Ascending, '0' - 'Descending'
    """
    n = len(pop)
    minRun = calcMinRun(n)

    # Sort individual subarrays of size RUN
    for start in range(0, n, minRun):
        end = min(start + minRun - 1, n - 1)
        insertionSort(pop, start, end, dir)

    # Start merging from size RUN (or 32). It will merge
    # to form size 64, then 128, 256 and so on ....
    size = minRun
    while size < n:

        # Pick starting point of left sub array. We
        # are going to merge arr[left..left+size-1]
        # and arr[left+size, left+2*size-1]
        # After every merge, we increase left by 2*size
        for left in range(0, n, 2 * size):

            # Find ending point of left sub array
            # mid+1 is starting point of right sub array
            mid = min(n - 1, left + size - 1)
            right = min((left + 2 * size - 1), (n - 1))

            # Merge sub array arr[left.....mid] &
            # arr[mid+1....right]
            if mid < right:
                merge(pop, left, mid, right)

        size = 2 * size

    return pop

def merge_dicts(dict1, dict2):
    # Check for overlapping keys
    intersection = set(dict1.keys()) & set(dict2.keys())
    if intersection:
        raise ValueError(f"Overlapping keys found: {intersection}")

    # Merge the dictionaries
    merged_dict = dict1.copy()
    merged_dict.update(dict2)
    
    return merged_dict

def check_error(param_grid):
    """
    Purpose:
    Just to check input
    """
    avail_dtypes = ['int', 'float','category']

    pass



In [148]:
class Genome():
    def __init__(self, X_train, X_test, y_train, y_test, estimator, metric, params):
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self._estimator = estimator
        self.metric = metric
        self._params = params
        self._fitness = self.calc_fitness(params)
        
    
    @property
    def estimator(self):
        return self._estimator

    @property
    def params(self):
        return self._params

    @property
    def fitness(self):
        return self._fitness

    @params.setter
    def params(self, new_params):
        self._fitness = self.calc_fitness(new_params)
        self._params = new_params


    ## Comparing operators
    def __lt__(self, other):
        # Define custom behavior for "<"
        return self.fitness < other.fitness

    def __le__(self, other):
        # Define custom behavior for "<="
        return self.fitness <= other.fitness

    def __ge__(self, other):
        # Define custom behavior for ">="
        return self.fitness >= other.fitness

    def __eq__(self, other):
        # Define custom behavior for "=="
        return self.fitness == other.fitness

    def __ne__(self, other):
        # Define custom behavior for "!="
        return self.fitness != other.fitness
    
    def __len__(self):
        return len(self.params.keys())
    
    def __repr__(self):
        string = f'Genome = Params : {self.params}, Fitness : {self.fitness}'
        return string
    
    def extract(self, start, stop = None, step = 1):
        """
        Just for ease of slicing
        
        Parameters
        -----------------
        key: Can be slice object
        """
        if not stop: ## If stop == None, means only want extract one
            return dict([list(self.params.items())[start]])
            
        elif stop > start and stop <= len(self):
            target = dict([list(self.params.items())[i] for i in range(start, stop, step)])
            return target
        else:
            raise IndexError(f"Index out of range.")

    
    def calc_fitness(self, params):
        """
        Parameters
        -------------------
        Takes in 1 genome/set of params
        """

        model = self.estimator

        model.set_params(**params)

        model.fit(self.X_train, self.y_train)

        y_pred = model.predict(self.X_test)

        results = self.evaluate(self.y_test, y_pred, self.metric)

        return results

    
    def mutate(self, param_grid, rate):
        """"
        Mutate one gene

        Parameters
        --------------------
        choices: Param_grid for choosing
        rate: Mutation rate/ Probability of one parameter undergoing mutation

        Returns
        -------------------
        Returns new gene
        """
        print(f'original: {self}')

        length = len(self)
        choices = list(range(0, length))
        params = self.params.copy()
        keys = list(params.keys())

        ################################
        # Edge case 1: Only one choice, unable to mutate, so just remove from choices
        for index, (key, items) in enumerate(param_grid.items()):
            if len(items['range']) < 2:
                   choices.remove(index)
        ##################################

        for i in choices:
            prob = random.random()
            if prob < rate:   ## If 0 < prob and prob < rate, means falls under the possibility of mutating
                key = keys[i]
                target_val = params[key]
                items = param_grid[key]
                new_target_val = target_val

                while new_target_val == target_val:     ## Iterate until not the same
                    if items['dtype'] == 'category':
                        new_target_val = random.choice(items['range'])
                    elif items['dtype'] == 'int':
                        low, high = items['range']
                        new_target_val = random.randint(low, high)
                    elif items['dtype'] == 'float':
                        low, high = items['range']
                        new_target_val = round(random.uniform(low, high), 5)

                params[key] = new_target_val

                print(f"After mutation: '{key}': '{new_target_val}'")
            else:
                print(f'No mutation.')
                continue

        new_gene = Genome(self.X_train, self.X_test, self.y_train, self.y_test, self.estimator, self.metric, params)

        return new_gene
    
    def evaluate(self, actual, predicted, metric):
        '''Returns desired metrics'''

        metrics = {
                    "r2":r2_score(actual, predicted), 
                    "rmse":mse_score(actual, predicted)**0.5,
                    "mse":mse_score(actual, predicted), 
                    "mae":mae_score(actual, predicted),
                    }
        return metrics[metric]

    

In [149]:
class Population():
    def __init__(self, X_train, X_test, y_train, y_test, estimator, param_grid, pop_size, metric, population = None):
        self._X_train = X_train
        self._X_test = X_test
        self._y_test = y_test
        self._y_train = y_train
        self._estimator = estimator
        self._param_grid = param_grid
        self._pop_size = pop_size
        self._metric = metric
        if not population:  ## If no population
            self._population = self.generate_pop()
        else:
            self._population = population

    @property
    def X_train(self):
        return self._X_train
    
    @property
    def X_test(self):
        return self._X_test
    
    @property
    def y_train(self):
        return self._y_train
    
    @property
    def y_test(self):
        return self._y_test

    @property
    def estimator(self):
        return self._estimator
    
    @property
    def population(self):
        return tim_sort(self._population)
    
    @property
    def param_grid(self):
        return self._param_grid
    
    @property
    def pop_size(self):
        return self._pop_size
    
    @property
    def metric(self):
        return self._metric

    @population.setter
    def population(self, new_pop):
        self._population = new_pop

    ## Used this only for sorting so it's easier
    def __getitem__(self, key):
        if isinstance(key, int):
            return self.population[key]
        
        elif isinstance(key, slice):
            start, stop, step = key.indices(self.n)
            target = [self.population[i] for i in range(start, stop, step)]
            return target
        else:
            raise TypeError(f"Invalid Argument type. Must be int or slice.")
        
    def __repr__(self):
        string = ""
        for genome in self.population:
            string += f'{genome}\n'
        return string
        
    def copy(self):
        """
        Makes a copy of the current generation
        """
        pop_copy = [genome for genome in self.population]
        return Population(self.param_grid, self.pop_size, pop_copy)
            
    
    def get_fit(self):
        """
        Helps to get list of fitness values
        """
        fits = []
        for i in self:
            fits += [i.fitness]
        return fits

    
    def calc_avg_fit(self):
        """
        For adaptive Mutation
        """
        fits = self.get_fit()
        return statistics.mean(fits)


    def generate_pop(self):
        random.seed(3)
        """
        To initialize population

        Parameters
        --------------
        n: Number of Genomes

        Returns
        ---------------
        None

        """
        pop = []
        for i in range(self.pop_size):
            params = {}
            for key, items in self.param_grid.items():
                if items['dtype'] == 'category':
                    params[key] = random.choice(items['range'])
                elif items['dtype'] == 'int':
                    low, high = items['range']
                    params[key] = random.randint(low, high)
                else:
                    low, high = items['range']
                    params[key] = round(random.uniform(low, high), 5)
            new_genome = Genome(self.X_train, self.X_test, self.y_train, self.y_test, self.estimator, self.metric, params)
            pop += [new_genome]

        return pop


    def elitism(self, n):
        """
        To choose which n Genomes to bring over to next gen

        Parameters
        ------------------
        n: prop of Genomes, default 0.2 of population

        Returns
        -----------------
        Top n Genomes
        """
        if not 0 < n < 1:
            raise ValueError("n must be between 0 and 1")
        n = round(n*self.pop_size)
        top_n = self.population[0:n]

        return top_n


    def n_cross_over(self, genes, n, nco_rate = 0.5, verbose = True):
        """
        To cross over 2 genomes
        
        Parameters
        ------------------------
        genes: (a,b) Cross over genomes in pop[a] and pop[b]
        n: Number of points to cross over
        verbose: Illustrate parents to genomes
        nco_rate: Cross over rate/ Probability of genes undergoing crossover
            -> If rate = 0: None of the genes can undergo mutation
            -> If rate = 1: All of the genes can undergo mutation
        
        Returns
        ----------------------
        2 Genomes, Crossed over, not in place
        """
        a,b = genes
        a = int(a)
        b = int(b)
        gene1 = self.population[a]
        gene2 = self.population[b]

        len1 = len(gene1)
        len2 = len(gene2)
        if len1 != len2:
            raise ValueError("Genomes must be of same length")
        
        if len1 < 2:
            raise Exception("Too short!")
        
        if n >= len1 -1:
            raise Exception(f"Too much cross-over points. Maximum is {len1-1}.")

        prob = random.random()

        if prob > nco_rate:  
            print("p value = %.5f is more than %f. No cross over." %(prob, nco_rate))
            return [gene1, gene2]

        choices = list(range(1,len1-1))
        idx = list(np.random.choice(choices, size = n, replace = False))
        idxs = tim_sort(idx + [0, len1]) 
        sections = n+1
        params3 = {}
        params4 = {}

        for i in range(sections):
            ## Even
            if i%2 == 0:
                params33 = gene1.extract(idxs[i], idxs[i+1])
                params44 = gene2.extract(idxs[i], idxs[i+1])
                
                    
            ## Odd
            else:
                params33 = gene2.extract(idxs[i], idxs[i+1])
                params44 = gene1.extract(idxs[i], idxs[i+1])
            
            params3 = merge_dicts(params3, params33)
            params4 = merge_dicts(params4, params44)
                
        gene3 = Genome(self.X_train, self.X_test, self.y_train, self.y_test, self.estimator, self.metric, params3)
        gene4 = Genome(self.X_train, self.X_test, self.y_train, self.y_test, self.estimator, self.metric, params4)

        if verbose:
            print(f'------------Parent Genomes------------\n{gene1}\n{gene2}\n')
            print(f'After {n}-point(s) crossover at index(es) : {idx}\n')
            print(f'------------Children Genomes-------------\n{gene3}\n{gene4}\n')

        return [gene3, gene4]


    def selection(self, n, subset = None):
        """"
        Purpose:
        Helps n_cross_over by choosing what 2 genomes to put in
        
        Input:
        'n': Number of parent pairs
        'subset': [low,high) Genes in population to select from eg. gene index 4:7 (exclusive)

        Output:
        List of tuples of indexes: [ (parent1,parent2) ,(parent3, parent4)]
        """
        if not subset:
            low, high = (0, self.pop_size)
            size = self.pop_size
        else:
            low, high = subset
            size = high -low 

        max = math.comb(size, 2)
        if n > max:
            raise ValueError(f'Too much combinations. Maximum is {max}')

        pairs = []
        choices = list(range(low, high))
        fits = self.get_fit()[low:high]
        p = [i/sum(fits) for i in fits]

        for i in range(n):
            parent1, parent2 = random.choice(choices, p = p, size = 2, replace = False)
            pairs += [(parent1, parent2)]
        
        return pairs
    
    def fitness_mutation_rate(self, k, f_max, f_avg, f):
        """
        Purpose:
        Fitness-based adaptive mutation

        Input:
        k: (k1,k2) Tuple of 2 for constant, k1,k2 in (0,1)
        f_max: Maximum fitness of population
        f_avg: Average fitness of population
        f: Current fitness value of Genome
        
        Output:
        rate -> int
        """
        k1, k2 = k
        
        if f >= f_avg:      ## High quality solution
            rate = k1*( (f_max - f)/(f_max-f_avg) )
        else:               ## Low quality solution
            rate = k2
        return rate

    def rank_mutation_rate(self, p_max, r, n):
        """"
        Purpose:
        Rank-based adaptive mutation
        
        Input:
        p_max: Maximum mutation probability
        r: Rank of chromosome
        n: population size

        Output:
        rate -> int
        """
        p = p_max*( 1- (r-1)/(n-1))
        return p

    def mutation(self, type, inplace = True, **params):
        """
        Purpose:
        Randomly select from population without replacement and mutate

        Input:
        type: 'fitneses', 'random', 'rank'
            Default
            - 'fitness' : k = (k1,k2) = (0.05,0.06)
            - 'random': rate = 0.5
            -'rank': p_max = 0.08
        inplace: Mutate on spot
        subset: [a, b) Genome at index a (inclusive) till b (exclusive)

        Output:
        Population with mutated genes, inplace

        Warning:
        Self.population has to be SORTED
        """
        f_max = max(self.get_fit())
        f_avg = self.calc_avg_fit()
        new_pop = []

        # if not subset:  
        #     subset = self.population
        # else: 
        #     a, b = subset
        #     subset = self.population[a:b]

        subset = self.population
        ## Iterate through all the genome
        for index, gene in enumerate(subset):
            if type == 'fitness':
                try:
                    k = params['k']
                except KeyError:
                    print("Using default k = (0.05, 0.06).")
                    k = (0.05,0.06)
                finally:
                    rate = self.fitness_mutation_rate(k, f_max, f_avg, f = gene.fitness)

            elif type == 'rank':
                r = index
                n = self.pop_size
                try:
                    p_max = params['p_max']
                except KeyError:
                    print("Using default p_max = 0.08.")
                    p_max = 0.08
                finally:
                    rate = self.rank_mutation_rate(p_max, r, n)

            elif type == 'random':
                try:
                    rate = params['rate']
                except KeyError:
                    print("Using default rate = 0.01.")
                    rate = 0.01

            else:
                raise ValueError("No such mutation type.")
            
            new_gene = gene.mutate(self.param_grid, rate)
            new_pop += [new_gene]
        
        if inplace:
            self.population = new_pop
            new_pop = self
        else:
            new_pop = Population(self.X_train, self.X_test, self.y_train, self.y_train, self.estimator, self.param_grid, self.pop_size, self.metric, new_pop)

        return new_pop
    
    def best_solution(self):
        return self.population[0]



In [150]:
class GenomeGrid():
    def __init__(self, estimator, param_grid, max_evol, pop_size, mutation_type, metric = 'mse', el_prop = 0.2, nco_rate = 0.5, **params):
        """'
        Parameters/Attributes
        -------------------
        estimator: Model
        param_grid: Parameters
        max_evol: Max evolution
        pop_size: Population size
        mutation_type: 'fitneses', 'random', 'rank'

        optional:
        scoring: Evaluation criteria
        elitism: Prop of population for elitism, default = 0.2
        cross_over_rate/nco_rate: Default = 0.5
        type: 'fitness', 'random', 'rank'
            Default
            - 'fitness' : k = (k1,k2) = (0.05,0.06)
            - 'random': rate = 0.5
            -'rank': p_max = 0.08
        
        """
        self._estimator = estimator
        self._param_grid = param_grid
        self._max_evol = max_evol
        self._pop_size = pop_size
        self._mutation_type = mutation_type
        self._metric = metric
        self._el_prop = el_prop
        self._nco_rate = nco_rate
        if mutation_type == 'fitness':
            self._mutation_para = params.get('k', (0.05, 0.06))
        elif mutation_type == 'random':
            self._mutation_para = params.get('rate', 0.5)
        elif mutation_type == 'rank':
            self._mutation_para = params.get('p_max', 0.08)
        else:
            raise Exception('No such mutation type')

    @property
    def estimator(self):
        return self._estimator

    @property
    def param_grid(self):
        return self._param_grid
    
    @property
    def max_evol(self):
        return self._max_evol
    
    @property
    def pop_size(self):
        return self._pop_size
    
    @property
    def mutation_type(self):
        return self._mutation_type
    
    @property
    def metric(self):
        return self._metric
    
    # @property
    # def k(self):
    #     if self._k:
    #         return self._k
    #     else:
    #         raise ValueError("k tuple not initialised")
        
    # @property
    # def rate(self):
    #     if self._rate:
    #         return self._rate
    #     else:
    #         raise ValueError("rate not intialised")
        
    # @property
    # def p_max(self):
    #     if self._p_max:
    #         return self._p_max
    #     else:
    #         raise ValueError("p_max not initialised")

    @property
    def mutation_para(self):
        return self._mutation_para    

    @property
    def nco_rate(self):
        return self._nco_rate

    @property
    def el_prop(self):
        return self._el_prop
    
    
    def train_model(self, X_train, X_test, y_train, y_test, verbose = True):
        pop = Population(X_train, X_test, y_train, y_test, self.estimator, self.param_grid, self._pop_size, self.metric)
        for i in range(self.max_evol):
            
            ## Elitism: Choose top t
            top_t = pop.elitism(self.el_prop)
            parent_pairs = self.pop_size - len(top_t)

            ## Selection: Make a list of tuple pairs for crossover from subset
            parent_pairs = pop.selection(parent_pairs)

            ## Cross Over subset
            children = []
            for pair in parent_pairs:
                child = pop.n_cross_over(pair, 1, self.nco_rate, verbose)
                print(f'this is child {pair}: {child}')
                children += child
            
            pop = top_t + children
            pop = Population(X_train, X_test, y_train, y_test, self.estimator, self.param_grid, self._pop_size, self.metric, pop)
            
            ## Mutate everything, including those carried over by elitism
            pop.mutation(self.mutation_type, self.mutation_para)          

            if verbose:
                print(f'--------------Generation {i}------------\n{pop}')
        
        best = pop.best_solution()

        return best
    
        

            




        

        




In [151]:
## Test

param_grid = {
    'a': {'range': [1,10], 'dtype' : 'int'},
    'b': {'range': [1,10], 'dtype' : 'int'},
    'c': {'range': [1,10], 'dtype' : 'int'},
    'd': {'range':['A','B','C'], 'dtype' : 'category'}
}

## Wrong or edge cases
edge_cases = {  
    'd': {'range': [1,2], 'dtype': 'int'},      ## Convert to below
    'e': {'range': [1], 'dtype' : 'int'},
    'd': {'range': [1,2], 'dtype': 'float'}
}

wrong_cases = {
    'a': {'range': [], 'dtype':'int'},
    'b': {'range': [1,1], 'dtype': 'int'},
    'c': {'range': [1,2,3], 'dtype': 'category'}
}


In [152]:
param_grid = {
            'kernel': {'range' :['linear','poly'], 'dtype': 'category'},
            'C':{'range' : [0, 1], 'dtype': 'float'},
            'gamma': {'range': [0,1], 'dtype': 'float'}
            }

In [160]:
from sklearn import svm
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

df = load_iris()
X_train, X_test, y_train, y_test = train_test_split(df.data, df.target, test_size = 0.2, random_state = 123)


model = svm.SVC()

pop = GenomeGrid(model, param_grid, 100, 10, mutation_type = 'rank', p_max = 0.1)

In [None]:
pop.train_model(X_train, X_test, y_train, y_test)

In [134]:
new_pop = pop.copy()

In [None]:
new_pop.mutation('rank', p_max = 0.5)

In [137]:
x = pop.selection(2)
print(f'This is {x}')
pop.n_cross_over(x[1],n = 2, rate = 0)

This is [(6, 0), (5, 1)]
p value = 0.76913 is more than 0.000000. No mutation.


(Genome = Params : {'a': 2, 'b': 7, 'c': 3, 'd': 'C'}, Fitness : 79,
 Genome = Params : {'a': 9, 'b': 4, 'c': 9, 'd': 'A'}, Fitness : 87)

In [107]:
x = pop[0]
x.mutate(param_grid, 1)

original: Genome = Params : {'a': 6, 'b': 5, 'c': 1, 'd': 'A'}, Fitness : 77
After mutation: 'a': '1'
After mutation: 'b': '2'
After mutation: 'c': '3'
After mutation: 'd': 'C'


Genome = Params : {'a': 1, 'b': 2, 'c': 3, 'd': 'C'}, Fitness : 73

In [639]:
tim_sort(list(np.random.choice([1,2,3,4,5,6,7,8,9,10], size = 3, replace = False)))

[2, 8, 9]