In [3]:
import random
import math
import copy
import re
import pandas as pd
import numpy as np
import os, os.path

def calculate_boundary(parmSamples):
    # evaluate parm's length in chromosome, determine every parm's boundary
    parmBound = []
    for i in range(len(parmSamples)):
        assert(parmSamples[i] > 1e-7)
        assert('int' in str(type(parmSamples[i])))
        numSamples = parmSamples[i]
        binLength = math.log(numSamples-1, 2) # idx starts from 0
        binLength = int(math.floor(binLength)+1)     # binary
        parmBound.append(binLength)
    return parmBound

class Chromosome(object):
    """Chromosome: based on the parmSamples"""
    def __init__(self):
        super(Chromosome, self).__init__()

    def rand_init_chrom(self, parmSamples, parmBound):
        """randomly initialize chromosome by bin length and max range of the parameters"""
        parmChrom = []
        for i in range(len(parmBound)):
            curParmVal = random.randint(0, parmSamples[i]-1)
            subChrom = self.dec2bin(curParmVal, parmBound[i])
            parmChrom += subChrom
        self.parmChrom = parmChrom # chromosome sequence
        self.parmNum = len(parmSamples)
        return self

    def dec2bin(self, num, length):
        binChar = bin(num)
        binChar = list(binChar[2:]) # 15 -> '0b1111'
        if len(binChar) < length:
            binChar = ['0' for ii in range(length - len(binChar))] + binChar
        binVal = [int(aa) for aa in binChar]
        return binVal

    def clone_chrom(self):
        newChrom = Chromosome()
        newChrom.parmChrom = copy.deepcopy(self.parmChrom)
        newChrom.parmNum = self.parmNum
        return newChrom

    def back_parm(self, parmMin, parmMax, parmSamples, parmBound):
        parmChrom = self.parmChrom
        parmVal = []
        acculumatedLen = 0
        for ii, curBinLen in enumerate(parmBound):
            subChrom = parmChrom[acculumatedLen:(acculumatedLen + curBinLen)]
            acculumatedLen += curBinLen
            binVal = 0 # binary number
            for jj, curBite in enumerate(subChrom):
                binVal += curBite*2**(curBinLen - jj - 1)
            curBinRange = parmSamples[ii]
            curParmRange = parmMax[ii] - parmMin[ii]
            curParmVal = parmMin[ii] + 1. * binVal / curBinRange * curParmRange # real model parm val
            parmVal.append(curParmVal)
        return parmVal

    def crossover(self, mother, parmIdx, parmBound):
        # crossover self and mother chromosome, at the parm boundary positions
        assert(self.parmNum == mother.parmNum)
        assert(parmIdx < self.parmNum)
        pos = 0
        for ii in range(parmIdx):
            pos += parmBound[ii]
        chmTemp = self.clone_chrom()
        self.parmChrom = self.parmChrom[:pos] + mother.parmChrom[pos:]
        mother.parmChrom = mother.parmChrom[:pos] + chmTemp.parmChrom[pos:]

    def mute(self, parmIndice, parmSamples, parmBound):
        # mute within itself, by the unit of parm
        # switch parm i into some other value
        parmNum = self.parmNum
        assert(isinstance(parmIndice, list))
        assert(all([ii < parmNum for ii in parmIndice]))
        acculumated_len = lambda x,y: int(np.sum(x[0:y]))  # python index 0->y
        for ii in parmIndice:
            curParmVal = random.randint(0, parmSamples[ii]-1)
            startPos = acculumated_len(parmBound, ii)
            self.parmChrom[startPos:startPos+parmBound[ii]] = self.dec2bin(curParmVal, parmBound[ii])

    def print_chrom(self):
        chromStr = ""
        for ii in self.parmChrom:
            chromStr += str(ii)
        print chromStr
            
if __name__ == "__main__":
    cwd = os.getcwd()
    infile = os.path.join(cwd, "GA_init_setting_v0.xlsx")
    df = pd.read_excel(infile)

    parmMin = df[['parmName', 'parmMin']].set_index('parmName').to_dict().values()[0]
    parmNames = parmMin.keys()
    parmMin = parmMin.values()
    parmMax = df[['parmName', 'parmMax']].set_index('parmName').to_dict().values()[0].values()
    parmSamples = df[['parmName', 'parmSamples']].set_index('parmName').to_dict().values()[0].values()
    for i in range(len(parmSamples)):
        print parmNames[i], np.linspace(parmMin[i], parmMax[i], parmSamples[i])
        
    
    
#     print df[['parmName', 'parmMin']].set_index('parmName').to_dict().values()[0].keys()
#     # print parmMin, parmMax, parmSamples
#     for parmList in iter([parmMin, parmMax, parmSamples]):
#         print parmList
#     parmBound = calculate_boundary(parmSamples)
#     print parmBound, len(parmBound)
    
#     chm1 = Chromosome()
#     chm1 = chm1.rand_init_chrom(parmSamples, parmBound)
#     chm2 = Chromosome()
#     chm2 = chm2.rand_init_chrom(parmSamples, parmBound)
    
#     chm1.print_chrom()
#     chm2.print_chrom()
    
#     parmVal1 = chm1.back_parm(parmMin, parmMax, parmSamples, parmBound)
#     print parmVal1

#     for i in range(10):    
#         chm1.crossover(chm2, i, parmBound)
#         print i, parmBound[i]
#         chm1.print_chrom()
#         chm2.print_chrom()
    
#     for i in range(10):
#         chm1.mute([i], parmSamples, parmBound)
#         chm1.print_chrom()

b0n_ratio [ 0.1  0.3  0.5]
sigmaMG1 [  30.           32.72727273   35.45454545   38.18181818   40.90909091
   43.63636364   46.36363636   49.09090909   51.81818182   54.54545455
   57.27272727   60.           62.72727273   65.45454545   68.18181818
   70.90909091   73.63636364   76.36363636   79.09090909   81.81818182
   84.54545455   87.27272727   90.           92.72727273   95.45454545
   98.18181818  100.90909091  103.63636364  106.36363636  109.09090909
  111.81818182  114.54545455  117.27272727  120.          122.72727273
  125.45454545  128.18181818  130.90909091  133.63636364  136.36363636
  139.09090909  141.81818182  144.54545455  147.27272727  150.
  152.72727273  155.45454545  158.18181818  160.90909091  163.63636364
  166.36363636  169.09090909  171.81818182  174.54545455  177.27272727
  180.          182.72727273  185.45454545  188.18181818  190.90909091
  193.63636364  196.36363636  199.09090909  201.81818182  204.54545455
  207.27272727  210.          212.72727273  215.4

In [None]:
import copy
def sort_chroms(rmses):
    chromNum = len(rmses)
    print range(0, chromNum-1)
    print range(1, chromNum)
    print rmses
    for ii in range(0, chromNum-1):
        for jj in range(ii , chromNum):
            if rmses[jj] < rmses[ii]:
                temp = copy.copy(rmses[ii])
                rmses[ii] = rmses[jj]
                rmses[jj] = copy.copy(temp)
    print rmses
    pass
rmses = [0.18363069569565638, 0.05463982085186381, 0.9556240947909088, 0.17688278609064156, 0.8143323448307423]
sort_chroms(rmses)

In [None]:
import random
import math
import copy
import re
import pandas as pd
import os, os.path

def gen_term_pairs(selectedTerms, parmNames):
    # return a table of search param name and (sigmaVal, truncation level), e.g., 'Ap': sigmaAp, b0_ratio
    termpairs = {}
    noneSigmaTerm = {"A", "Mav", "Slope"}

    sigmaParms = [aa for aa in parmNames if(re.match(r'sigma[\S]*', aa) and (not re.match(r'sigma2D[\S]*', aa)))]
    truncRatioParms = [aa for aa in parmNames if re.match(r'b[0-9]*[-_AaBbPp0-9]*', aa)]

    fixedTermPairs = {"Ap": "b0_ratio", "Bp": "b0_ratio", "Am": "b0m_ratio", "Bn": "b0n_ratio"}

    for curTerm in selectedTerms:
        curSigma = ""
        curTruncRatio = ""
        if curTerm not in noneSigmaTerm:
            if curTerm in fixedTermPairs.keys():
                curSigma = r'sigma{}'.format(curTerm)
                curTruncRatio = fixedTermPairs[curTerm]
            else:
                for sigmaName in sigmaParms:
                    if curTerm in sigmaName:
                        curSigma = sigmaName
                        break
                for truncRatio in truncRatioParms:
                    if curTerm in truncRatio:
                        curTruncRatio = truncRatio
                        break
        termpairs[curTerm] = (curSigma, curTruncRatio)
    return termpairs

if __name__ == "__main__":
    cwd = os.getcwd()
    infile = os.path.join(cwd, "GA_init_setting_v0.xlsx")
    df = pd.read_excel(infile)
    
    parmSamples = df[['parmName', 'parmSamples']].set_index('parmName').to_dict().values()[0]
    parmNames = parmSamples.keys()
    print parmNames
    useterms = ['AG1', 'AG2', 'Ap', 'Bp', 'Am', 'Bn', 'MG1', 'Slope']
    termpairs = gen_term_pairs(useterms, parmNames)
    print termpairs