In [8]:
import random
import math
import copy
import re
import pandas as pd
import numpy as np
import os, os.path

def calculate_boundary(parmSamples):
    # evaluate parm's length in chromosome, determine every parm's boundary
    parmBound = []
    for i in range(len(parmSamples)):
        assert(parmSamples[i] > 1e-7)
        assert('int' in str(type(parmSamples[i])))
        numSamples = parmSamples[i]
        binLength = math.log(numSamples, 2) # idx starts from 0
        binLength = int(math.floor(binLength)+1)     # binary
        parmBound.append(binLength)
    return parmBound

class Chromosome(object):
    """Chromosome: based on the parmSamples
        ----
        The chromosome parameters:
            parmChrom:  chrome binary code list
            parmNum:    number of parameters in chromosome

            genIdx:     the generation N.O. of current chrom
            chromIdx:   the chrom N.O. of current chrom
            strChrom:   string format binary code
            model:      {term: sigma, truncation ratio, coefficient}
            rms:        rms of current model
            parentID:   '{$fatherGenIdx}_{$fatherChromIdx}_{$MotherGenIdx}_{$MotherChromIdx}'
            childIDs:   a list, ['{$child1_genIdx}_{$child1_chromIdx}', ...]
    """
    def __init__(self):
        super(Chromosome, self).__init__()
        self.init_chrom_property()

    def init_chrom_property(self, parmChrom=[], parmNum=0,
                                parentID='-1_-1_-1_-1', childIDs=[],
                                genIdx=0, chromIdx=0, model={}, rms=0.,
                                ):
        '''two reasons for this init_chrom_property method:
            1. A whole picture for chm variables
            2. keep consistence for clone_chrom at anytime
        '''
        self.parmChrom = parmChrom
        self.parmNum = parmNum
        self.genIdx = genIdx
        self.chromIdx = chromIdx
        self.model = model
        self.rms = rms
        self.parentID = parentID
        self.childIDs = childIDs

    def set_chrom_result(self, genIdx=0, chromIdx=0, model={}, rms=0.,):
        self.genIdx = genIdx
        self.chromIdx = chromIdx
        self.model = model
        self.rms = rms

    def rand_init_chrom(self, parmSamples, parmBound):
        """randomly initialize chromosome by bin length and max range of the parameters"""
        parmChrom = []
        for i in range(len(parmBound)):
            curParmVal = random.randint(0, parmSamples[i]) # np.random, low (inclusive) to high (exclusive).
            subChrom = self.dec2bin(curParmVal, parmBound[i])
            parmChrom += subChrom
        self.parmChrom = parmChrom # chromosome sequence
        self.parmNum = len(parmSamples)
        return self

    def dec2bin(self, num, length):
        binChar = bin(num)
        binChar = list(binChar[2:]) # 15 -> '0b1111'
        if len(binChar) < length:
            binChar = ['0' for ii in range(length - len(binChar))] + binChar
        binVal = [int(aa) for aa in binChar]
        # print num, length, binChar
        return binVal

    def clone_chrom(self):
        newChrom = Chromosome()
        newChrom.parmChrom = copy.deepcopy(self.parmChrom)
        newChrom.parmNum = self.parmNum
        newChrom.genIdx = self.genIdx
        newChrom.chromIdx = self.chromIdx
        newChrom.model = copy.deepcopy(self.model)
        newChrom.rms = self.rms
        newChrom.parentID = self.parentID
        newChrom.childIDs = copy.deepcopy(self.childIDs)
        return newChrom

    def back_parm(self, parmMin, parmMax, parmSamples, parmBound):
        parmChrom = self.parmChrom
        parmVal = []
        acculumatedLen = 0
        for ii, curBinLen in enumerate(parmBound):
            subChrom = parmChrom[acculumatedLen:(acculumatedLen + curBinLen)]
            acculumatedLen += curBinLen
            binVal = 0 # binary number
            for jj, curBite in enumerate(subChrom):
                binVal += curBite*2**(curBinLen - jj - 1)
            curParmVal = parmMin[ii] + 1. * binVal / (parmSamples[ii] - 1) * (parmMax[ii] - parmMin[ii]) # real model parm val
            parmVal.append(curParmVal)
        return parmVal

    def crossover(self, mother, parmIdx, parmBound):
        # crossover self and mother chromosome, at the parm boundary positions
        assert(self.parmNum == mother.parmNum)
        assert(parmIdx < self.parmNum)
        pos = 0
        for ii in range(parmIdx):
            pos += parmBound[ii]
        chmTemp = self.clone_chrom()
        self.parmChrom = self.parmChrom[:pos] + mother.parmChrom[pos:]
        mother.parmChrom = mother.parmChrom[:pos] + chmTemp.parmChrom[pos:]

    def mutate(self, parmIndexes, parmSamples, parmBound):
        # mutate within itself, by the unit of parm
        # switch parm i into some other value
        parmNum = self.parmNum
        assert(isinstance(parmIndexes, list))
        assert(all([ii < parmNum for ii in parmIndexes]))
        acculumated_len = lambda x,y: int(np.sum(x[0:y]))  # python index [0->y)
        for ii in parmIndexes:
            curParmVal = random.randint(0, parmSamples[ii]) # np.random, low (inclusive) to high (exclusive).
            startPos = acculumated_len(parmBound, ii)
            self.parmChrom[startPos:startPos+parmBound[ii]] = self.dec2bin(curParmVal, parmBound[ii])

    def str_chrom(self):
        strChrom = ""
        for ii in self.parmChrom:
            strChrom += str(ii)
        self.strChrom = strChrom
        return strChrom
            
if __name__ == "__main__":
    random.seed(0)
    cwd = os.getcwd()
    infile = os.path.join(cwd, "data\\v0", "GA_init_setting_v0.xlsx")
    df = pd.read_excel(infile)

    parmMin = df[['parmName', 'parmMin']].set_index('parmName').to_dict().values()[0]
    parmNames = parmMin.keys()
    parmMin = parmMin.values()
    parmMax = df[['parmName', 'parmMax']].set_index('parmName').to_dict().values()[0].values()
    parmSamples = df[['parmName', 'parmSamples']].set_index('parmName').to_dict().values()[0].values()           
    
    print df[['parmName', 'parmMin']].set_index('parmName').to_dict().values()[0].keys()
    # print parmMin, parmMax, parmSamples
    for parmList in iter([parmMin, parmMax, parmSamples]):
        print parmList
    parmBound = calculate_boundary(parmSamples)
    print parmBound, len(parmBound)
    
    chm1 = Chromosome()
    chm1 = chm1.rand_init_chrom(parmSamples, parmBound)
    chm2 = Chromosome()
    chm2 = chm2.rand_init_chrom(parmSamples, parmBound)
    
    print chm1.str_chrom()
    print chm2.str_chrom()
    
    parmVal1 = chm1.back_parm(parmMin, parmMax, parmSamples, parmBound)
    print parmVal1

    print "\ncrossover:"
    for i in range(10):    
        chm1.crossover(chm2, i, parmBound)
        print i, parmBound[i]
        print chm1.str_chrom()
        print chm2.str_chrom()
    print "\nmutation:"    
    for i in range(10):
        chm1.mutate([i], parmSamples, parmBound)
        print i, chm1.str_chrom()
        
print  chm1.__dict__ 

[u'b0n_ratio', u'sigmaMG1', u'sigmaAp', u'sigmaBp', u'sigmaBn', u'sigmaAG2', u'sigmaAG1', u'sigmaAm', u'b0m_ratio', u'b0_ratio']
[0.10000000000000001, 30.0, 30.0, 30.0, 30.0, 50.0, 0.0, 30.0, 1.5, 0.69999999999999996]
[0.5, 300.0, 300.0, 300.0, 300.0, 300.0, 20.0, 300.0, 1.8999999999999999, 1.3]
[3, 100, 100, 100, 100, 67, 33, 100, 3, 4]
[2, 7, 7, 7, 7, 7, 6, 7, 2, 3] 10
1110011000101010001101001100110011011011010001111001010
1101100100011100100110001111100010001011110110001111100
[0.70000000000000007, 237.27272727272725, 144.54545454545456, 100.90909090909092, 169.09090909090909, 152.27272727272728, 16.25, 111.81818181818183, 1.7, 1.1000000000000001]

crossover:
0 2
1101100100011100100110001111100010001011110110001111100
1110011000101010001101001100110011011011010001111001010
1 7
1110011000101010001101001100110011011011010001111001010
1101100100011100100110001111100010001011110110001111100
2 7
1110011000011100100110001111100010001011110110001111100
110110010010101000110100110011001101

In [None]:
import copy
def sort_chroms(rmses):
    chromNum = len(rmses)
    print range(0, chromNum-1)
    print range(1, chromNum)
    print rmses
    for ii in range(0, chromNum-1):
        for jj in range(ii , chromNum):
            if rmses[jj] < rmses[ii]:
                temp = copy.copy(rmses[ii])
                rmses[ii] = rmses[jj]
                rmses[jj] = copy.copy(temp)
    print rmses
    pass
rmses = [0.18363069569565638, 0.05463982085186381, 0.9556240947909088, 0.17688278609064156, 0.8143323448307423]
sort_chroms(rmses)

In [None]:
binLen = sum([2, 7, 7, 7, 7, 7, 6, 7, 2, 2])
print binLen
print len(''.join('1000011100111001011001010111000100110001000010001000100'))

In [None]:
import random
import math
import copy
import re
import pandas as pd
import os, os.path

def gen_term_pairs(selectedTerms, parmNames):
    # return a table of search param name and (sigmaVal, truncation level), e.g., 'Ap': sigmaAp, b0_ratio
    termpairs = {}
    noneSigmaTerm = {"A", "Mav", "Slope"}

    sigmaParms = [aa for aa in parmNames if(re.match(r'sigma[\S]*', aa) and (not re.match(r'sigma2D[\S]*', aa)))]
    truncRatioParms = [aa for aa in parmNames if re.match(r'b[0-9]*[-_AaBbPp0-9]*', aa)]

    fixedTermPairs = {"Ap": "b0_ratio", "Bp": "b0_ratio", "Am": "b0m_ratio", "Bn": "b0n_ratio"}

    for curTerm in selectedTerms:
        curSigma = ""
        curTruncRatio = ""
        if curTerm not in noneSigmaTerm:
            if curTerm in fixedTermPairs.keys():
                curSigma = r'sigma{}'.format(curTerm)
                curTruncRatio = fixedTermPairs[curTerm]
            else:
                for sigmaName in sigmaParms:
                    if curTerm in sigmaName:
                        curSigma = sigmaName
                        break
                for truncRatio in truncRatioParms:
                    if curTerm in truncRatio:
                        curTruncRatio = truncRatio
                        break
        termpairs[curTerm] = (curSigma, curTruncRatio)
    return termpairs

if __name__ == "__main__":
    cwd = os.getcwd()
    infile = os.path.join(cwd, "GA_init_setting_v0.xlsx")
    df = pd.read_excel(infile)
    
    parmSamples = df[['parmName', 'parmSamples']].set_index('parmName').to_dict().values()[0]
    parmNames = parmSamples.keys()
    print parmNames
    useterms = ['AG1', 'AG2', 'Ap', 'Bp', 'Am', 'Bn', 'MG1', 'Slope']
    termpairs = gen_term_pairs(useterms, parmNames)
    print termpairs

In [12]:
z = (1.0, 0.5, 20)
a,b,c =z
print a,b,c
print z

1.0 0.5 20
(1.0, 0.5, 20)
