In [1]:
from    ctypes  import *
import  ctypes
from    scipy   import sparse

import  timeit
import  numpy   as np
import  random

In [2]:
# treat this as a library call and put it here

# in Linux:

#     precompile with bash script
#     then run ldconfig

#     if your computer has issues you may need to reconfgure the PATH used to find libraries

# <my_computer/Projects/Machine_Learning$>>> $ gcc -fPIC -shared -o keith_functions.so keith_module.c
# <my_computer/Projects/Machine_Learning$>>> $ sudo ldconfig -v

# note that printf
# when called from keith_mutate over in C land
# prints out to the terminal you are using to run your IDE, not to the notebook here



In [3]:
keith_functions = CDLL("keith_functions.so")

In [4]:
keith_mutate    = keith_functions.keith_mutate

In [5]:
'''# speed comparison of four approaches    '''
'''    # base_mutate native python libraries        '''
'''    # pygad_mutate specialist libraries and two for loops'''
'''    # vector_mutate special libraries and aggressive vectorisation'''
'''    # keith_mutate modular call from bespoke C function in local library '''
'''                                                                                 '''
'''# further optimisations maybe possible with the four approaches                          '''
'''# note that mutate request internally casts the numpy population into a list of lists            '''

'# note that mutate request internally casts the numpy population into a list of lists            '

In [6]:
class MutateRequest:
    def __init__(self, population):
        
        # fairer to start with the native list object and require modules to promote to array
        # than have "naive" modules demoting ndarrays back down to lists
        number_of_mutations = get_number_of_mutations()
        self.population     = population
        self.population     = [list(algo) for algo in population] # see explanation of choice of list not ndarray
        self.rows_algos     = len(population)
        self.columns_genes  = len(population[0])
        self.number_of_mutations = number_of_mutations

class TellObject:
    def __init__(self, population, base_mutants, keith_mutants, pygad_mutants, vector_mutants):
        self.population     = population
        self.base_mutants   = base_mutants
        self.keith_mutants  = keith_mutants        
        self.pygad_mutants  = pygad_mutants
        self.vector_mutants = vector_mutants
        
def get_random_mutation_min_val():
    return -1

def get_random_mutation_max_val():
    return 1
        
def tell(tell_object):
    print('\noriginal population\n', np.around(np.asarray(tell_object.population), decimals=4))
    print('\nmutation by base libraries\n', np.around(np.asarray(tell_object.base_mutants), decimals=4))
    print('\nmutation by aggressively vectorised scipy\n', np.around(np.asarray(tell_object.vector_mutants), decimals=4))
    print('\nmutation by pygad function\n', np.around(np.asarray(tell_object.pygad_mutants), decimals=4))
    print('\nmutation by bespoke C function\n', np.around(np.asarray(tell_object.keith_mutants), decimals=4))

def get_number_of_mutations():
    number_of_mutations = 350
    return number_of_mutations

def build_population():
    low             = -1
    high            =  1
    rows_algos      =  20
    columns_genes   =  7000
    cohort_size     = (rows_algos, columns_genes) #  rows_algos, columns_genes
    population      = np.random.uniform(low, high, size=cohort_size)
    return population

In [7]:
def keith_wrapper(mutate_request):
    
    # https://stackoverflow.com/questions/26312711/how-do-i-convert-a-double-array-in-c-to-python-list
    
    rows_algos    = mutate_request.rows_algos
    columns_genes = mutate_request.columns_genes
    
    population      = np.asarray(mutate_request.population)
    flat_population = list(np.ravel(np.copy(population)))
    
    first_gene              = (c_double(flat_population[0]))
    min_mutation            = get_random_mutation_min_val()
    max_mutation            = get_random_mutation_max_val()
    length_flat_population  = len(flat_population)
    number_of_mutations     = get_number_of_mutations()
    
    # prep the C args
    CCompatiblePopulation   = c_double * length_flat_population
    C_compatible_population = CCompatiblePopulation(*flat_population)
    p                       = pointer(C_compatible_population)

    # do the C - be aware this may have unstable memory
    keith_mutate.restype    = ctypes.POINTER(ctypes.c_double * length_flat_population)
    
    # intepret return as a C list
    p_from_C                = keith_mutate(p, c_int(min_mutation), c_int(max_mutation), c_int(length_flat_population), c_int(number_of_mutations))
    C_born_population       = [x for x in p_from_C.contents]
    C_born_population_array = np.asarray(C_born_population)
    keith_mutants           = np.reshape(C_born_population_array, (rows_algos, columns_genes))
    
    return keith_mutants

In [8]:
def base_mutate(mutate_request):
    '''only uses naive base library - math, itertools, random etc are allowed'''
    base_mutants        = mutate_request.population
    number_of_mutations = mutate_request.number_of_mutations
    rows_algos          = mutate_request.rows_algos
    columns_genes       = mutate_request.columns_genes

    for algo in base_mutants:
        for mutation in range(number_of_mutations):
            random_place    = random.randrange(columns_genes)
            starting_gene   = algo[random_place]
            a_float         = random.random()
            algo.pop(random_place)
            mutant_gene     = starting_gene + a_float
            algo.insert(random_place, mutant_gene)
    return base_mutants

In [9]:
def pygad_mutate(mutate_request):
    # 1. removed dependency on gene_type
    # 2. all modules for these time trials start with a list because they refer to the MutateRequest
    #    and the mutate reqeust offers a list as the most basic thing
    #    so if this module seems handicapped by e.g. the need to retype offspring as an array then
    #    people are welcome to change the interface, but it just passes that handicap elsewhere

    offspring                = np.asarray(mutate_request.population)
    num_genes                = mutate_request.columns_genes
    mutation_num_genes       = mutate_request.number_of_mutations
    random_mutation_min_val  = get_random_mutation_min_val()
    random_mutation_max_val  = get_random_mutation_max_val()
    
    for offspring_idx in range(offspring.shape[0]):
        mutation_indices = np.array(random.sample(range(0, num_genes), mutation_num_genes))
        for gene_idx in mutation_indices:
            random_value = np.random.uniform(low=random_mutation_min_val, 
                                            high=random_mutation_max_val, 
                                            size=1)
            offspring[offspring_idx, gene_idx] = offspring[offspring_idx, gene_idx] + random_value
    return offspring

In [10]:
def vector_mutate(mutate_request):

    population          = np.asarray(mutate_request.population)
    number_of_mutations = mutate_request.number_of_mutations
    all_loci            = mutate_request.rows_algos * mutate_request.columns_genes
    mutation_rate       = number_of_mutations/mutate_request.columns_genes
    
    mutation_matrix = sparse.random(population.shape[0], population.shape[1], density=mutation_rate)
    pre_mutants     = np.copy(population)
    mutants         = np.add(pre_mutants, mutation_matrix.todense())
    return mutants

In [11]:
if __name__ == '__main__':
    
    population          = build_population()
    mutate_request      = MutateRequest(population)
    
    base_mutants        =  base_mutate(mutate_request)
    keith_mutants       = keith_wrapper(mutate_request)
    pygad_mutants       = pygad_mutate(mutate_request)
    vector_mutants      = vector_mutate(mutate_request)
    
    that_which_to_tell = TellObject(population, base_mutants, keith_mutants, pygad_mutants, vector_mutants)
    tell(that_which_to_tell)
    


original population
 [[-0.3169 -0.3134  0.0436 ...  0.5701  0.4312 -0.7247]
 [ 0.3405  0.7434 -0.8623 ...  0.6927 -0.4048 -0.935 ]
 [ 0.2301  0.1732 -0.7647 ...  0.2443 -0.032  -0.6554]
 ...
 [ 0.1442 -0.0571 -0.963  ...  0.3522 -0.8316  0.7716]
 [ 0.342  -0.4969  0.6047 ... -0.7141  0.5167  0.9836]
 [-0.3438  0.6768  0.1483 ... -0.6731 -0.1981 -0.6949]]

mutation by base libraries
 [[-0.3169  0.6239  0.0436 ...  0.5701  0.4312 -0.7247]
 [ 0.3405  0.7434 -0.8623 ...  1.1337 -0.4048 -0.935 ]
 [ 0.2301  0.1732 -0.7647 ...  0.2952 -0.032  -0.6554]
 ...
 [ 0.1442 -0.0571 -0.963  ...  0.3522 -0.8316  0.7716]
 [ 0.342  -0.4969  0.6047 ... -0.7141  0.5167  0.9836]
 [-0.3438  0.6768  0.1483 ... -0.6731 -0.1981 -0.6949]]

mutation by aggressively vectorised scipy
 [[-0.3169  0.6239  0.0436 ...  0.5701  0.4312 -0.7247]
 [ 0.3405  0.7434 -0.8623 ...  1.1337  0.1647 -0.935 ]
 [ 0.2301  0.1732 -0.7647 ...  0.2952 -0.032  -0.6554]
 ...
 [ 0.6854 -0.0571 -0.963  ...  0.3522 -0.8316  0.7716]
 [ 0.342

In [12]:
'''
# build population
'''

def test_build_population():
    population = build_population()
    assert isinstance(population,       np.ndarray),    'Error: should be array of arrays'
    assert isinstance(population[0],    np.ndarray),    'Error: should be an array'
    assert isinstance(population[0][0], np.float64),    'Error: should be a float'
    print('pass')

In [13]:
'''
# base mutate
'''    

def test_base_mutate():
    number_of_mutations   = get_number_of_mutations()
    population            = build_population()
    mutate_request        = MutateRequest(population)
    mutants               = base_mutate(mutate_request)
    max_parity_sum        = population.shape[0] * population.shape[1]
    rows_algos            = mutate_request.rows_algos
    
    #form
    assert isinstance(mutants,      list),  'Error: should be a list'
    assert isinstance(mutants[0],   list),  'Error: should be a list'
    assert isinstance(mutants[0][0],float), 'Error: should be a float'
    
    #content
    flat_population = list(np.ravel(np.asarray(population)))
    flat_mutants    = list(np.ravel(np.asarray(mutants)))
    both            = zip(flat_population, flat_mutants)
    parity_check    = sum([int(x[0]==x[1]) for x in both])
    if number_of_mutations > 0:
        assert parity_check <= max_parity_sum-rows_algos, 'Error: there should be at least one difference per algo'
    assert parity_check > 0, 'Error: they should not be completely different'
    print('pass')

In [14]:
'''
# keith wrapper and mutate
'''

def test_keith_wrapper():
    
    number_of_mutations     = get_number_of_mutations()
    population              = build_population()
    mutate_request          = MutateRequest(population)
    keith_mutants           = keith_wrapper(mutate_request)

    max_parity_sum        = population.shape[0] * population.shape[1]
    rows_algos            = mutate_request.rows_algos
    
    #form
    assert isinstance(keith_mutants,      np.ndarray),  'Error: should be an np array'
    assert isinstance(keith_mutants[0],   np.ndarray),  'Error: should be an np array'
    assert isinstance(keith_mutants[0][0],float),       'Error: should be a float'
    
    #content
    flat_population = list(np.ravel(np.asarray(population)))
    flat_mutants    = list(np.ravel(np.asarray(keith_mutants)))
    both            = zip(flat_population, flat_mutants)
    parity_check    = sum([int(x[0]==x[1]) for x in both])
    if number_of_mutations > 0:
        assert parity_check != max_parity_sum, 'Error: they should not be completely the same'
    assert parity_check > 0, 'Error: they should not be completely different'
    print('pass')
    

In [15]:
'''
# vector mutate
'''

def test_vector_mutate():
    number_of_mutations   = get_number_of_mutations()
    population            = build_population()
    mutate_request        = MutateRequest(population)
    mutants               = vector_mutate(mutate_request)
    max_parity_sum        = population.shape[0] * population.shape[1]
    rows_algos            = mutate_request.rows_algos
    
    #form
    assert isinstance(mutants,      np.ndarray),  'Error: should be an np array'
    assert isinstance(mutants[0],   np.ndarray),  'Error: should be an np array'
    assert isinstance(mutants[0][0],np.matrix), 'Error: should be a float'
    
    #content
    flat_population = list(np.ravel(np.asarray(population)))
    flat_mutants    = list(np.ravel(np.asarray(mutants)))
    both            = zip(flat_population, flat_mutants)
    parity_check    = sum([int(x[0]==x[1]) for x in both])
    if number_of_mutations > 0:
        assert parity_check <= max_parity_sum-rows_algos, 'Error: there should be at least one difference per algo'
    assert parity_check > 0, 'Error: they should not be completely different'
    print('pass')

In [16]:
'''
# pygad mutate
'''
    
def test_pygad_mutate():
    number_of_mutations   = get_number_of_mutations()
    population            = build_population()
    mutate_request        = MutateRequest(population)
    mutants               = pygad_mutate(mutate_request)
    max_parity_sum        = population.shape[0] * population.shape[1]
    rows_algos            = mutate_request.rows_algos
    
    #form
    assert isinstance(mutants,      np.ndarray),  'Error: should be an np array'
    assert isinstance(mutants[0],   np.ndarray),  'Error: should be an np array'
    assert isinstance(mutants[0][0],float), 'Error: should be a float'
    
    #content
    flat_population = list(np.ravel(np.asarray(population)))
    flat_mutants    = list(np.ravel(np.asarray(mutants)))
    both            = zip(flat_population, flat_mutants)
    parity_check    = sum([int(x[0]==x[1]) for x in both])
    if number_of_mutations > 0:
        assert parity_check <= max_parity_sum-rows_algos, 'Error: there should be at least one difference per algo'
    assert parity_check > 0, 'Error: they should not be completely different'
    print('pass')
    


In [17]:
 
'''
# tests per se
'''

def tests():
    
    test_base_mutate()
    test_build_population()
    test_keith_wrapper()
    test_pygad_mutate()
    test_vector_mutate()
    print('\nwell done passed all active tests - stick to TDD even where you are excited.\n')
    
tests()

pass
pass
pass
pass
pass

well done passed all active tests - stick to TDD even where you are excited.



In [18]:
'''
# timings
'''

def timings():
    
    population          = build_population()
    mutate_request      = MutateRequest(population)

    base_times      = []
    for i in range(10000):
        base_time   = timeit.timeit("base_mutate",   setup="from __main__ import base_mutate")
        base_times.append(base_time)
    
    keith_times     = []
    for i in range(10000):
        keith_time  = timeit.timeit("keith_mutate",  setup="from __main__ import keith_mutate")
        keith_times.append(keith_time)

    pygad_times     = []
    for i in range(10000):
        pygad_time  = timeit.timeit("pygad_mutate",  setup="from __main__ import pygad_mutate")
        pygad_times.append(pygad_time)
    
    vector_times    = []
    for i in range(10000):
        vector_time = timeit.timeit("vector_mutate", setup="from __main__ import vector_mutate")
        vector_times.append(vector_time)
        
    base_mean      = np.mean(base_times)
    keith_mean     = np.mean(keith_times)
    pygad_mean     = np.mean(pygad_times)
    vector_mean    = np.mean(vector_times)
    
    print("\nbase_mutate   average time\t", base_mean)
    print("\nkeith_mutate  average time\t", keith_mean)
    print("\npygad_mutate  average time\t", pygad_mean)
    print("\nvector_mutate average time\t", vector_mean)
    
timings()



base_mutate   average time	 0.008683392293999895

keith_mutate  average time	 0.00891941054599938

pygad_mutate  average time	 0.00927882889410248

vector_mutate average time	 0.009176885336099077
