In [1]:
!which python

/home/vagrant/miniconda/envs/dtailor27/bin/python


# D-Tailor testing

## Solution class
- The sequence and all of its features stored in a single class
- Troubleshooting
    - ``Features.Structure.StructureMFE``?
        - Runs ``Functions.analyze_structure_mfe``
        - Which executes ``./3rdParty/unafold/ct-energy``
    - ``Features.Structure.StructureDoubleStranded``?
        - Runs ``Functions.analyze_structure_ds``
        - Which executes ``perl 3rdParty/unafold/ss-count.pl``
    - Need to troubleshoot and install properly all the third party programs...currently none working

In [1]:
from Solution import Solution
from Features.CAI import CAI
from Features.Structure import Structure, StructureMFE, StructureDoubleStranded
from Features.NucleotideContent import NucleotideContent
from Data import cai_table_ec

## Instantiate an object of type 'Solution'
solution = Solution(sol_id='b0001', 
                    sequence='TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGA')


## Instantiate Feature objects of interest

# Feature to calculates the codon adaptation index
cai_obj = CAI(solution=solution,label="cds",cai_table=cai_table_ec, args= {'cai_range':(49,115)})

# Feature to predicts RNA Structure
st1_obj = Structure(solution=solution,label="utrCds",args= { 'structure_range' : (19,78) } )

# Two sub-features inheriting from the class Structure
st_mfe = StructureMFE(st1_obj)
st_ss = StructureDoubleStranded(st1_obj)
st1_obj.add_subfeature(st_mfe)
st1_obj.add_subfeature(st_ss)

# Feature to calculate nucleotide content
nuc_obj = NucleotideContent(solution=solution ,label="utr",args= { 'ntcontent_range':(0,50) } )


## Add features to solution
solution.add_feature(cai_obj)
solution.add_feature(st1_obj)
solution.add_feature(nuc_obj)


## Retrieve feature score
solution.scores
# {'cdsCAI': 0.6136121593930156, 'utrCdsStructureDoubleStrandedList':[18, 19, 25, 26, 38, 39, 44, 45],
# 'utrCdsStructureDoubleStranded': 8, 'utrCdsStructureMFE': -2.5,
# 'utrNucleotideContentAT': 0.63, 'utrNucleotideContentG': 0.16,'utrNucleotideContentT' : 0.18,
# 'utrNucleotideContentC': 0.22, 'utrNucleotideContentA' : 0.45, 'utrNucleotideContentGC': 0.37}

{'cdsCAI': 0.6136121593930156,
 'utrCdsStructureDoubleStranded': 'NA',
 'utrCdsStructureDoubleStrandedList': 'NA',
 'utrCdsStructureMFE': 0,
 'utrNucleotideContentA': 0.45098039215686275,
 'utrNucleotideContentAT': 0.6274509803921569,
 'utrNucleotideContentC': 0.21568627450980393,
 'utrNucleotideContentG': 0.1568627450980392,
 'utrNucleotideContentGC': 0.37254901960784315,
 'utrNucleotideContentT': 0.17647058823529413}

## SequenceAnalyzer class
- Lets you take in a bunch of sequences and compute the same properties for all of them

In [2]:
from SequenceAnalyzer import SequenceAnalyzer
from Features import CAI,Structure,RNADuplex
from Functions import validateCDS
from Data import cai_table_ec

class TranslationFeaturesEcoliAnalyzer(SequenceAnalyzer):

    '''
    Class to analyze CAI, SD strength and structure in E. coli
    '''

    def __init__(self, input_file, input_type):
        SequenceAnalyzer.__init__(self,input_file,input_type)

    def configureSolution(self, solution):
        solution.valid = validateCDS(solution.sequence[49:])
        if solution.valid:
            #CAI
            cai_obj = CAI.CAI(solution=solution,
                              label="cds",
                              cai_table = cai_table_ec,
                              args= { 'cai_range' :(49,len(solution.sequence)) } )

            #Look for RBS
            dup_obj1 = RNADuplex.RNADuplexRibosome(solution1=solution, 
                                                   label="sd16s",
                                                   args = { 'rnaMolecule1region' : (25,48) })
            dup_mfe = RNADuplex.RNADuplexMFE(dup_obj1)
            dup_obj1.add_subfeature(dup_mfe)

            #MFE [-30,30]
            st1_obj = Structure.Structure(solution=solution,
                                          label="utr", 
                                          args= { 'structure_range' : (49-30,49+30) } )
            st_mfe = Structure.StructureMFE(st1_obj)
            st1_obj.add_subfeature(st_mfe)

            solution.add_feature(cai_obj)
            solution.add_feature(dup_obj1)
            solution.add_feature(st1_obj)
    
    def outputStart(self):
        print "gene_name,sd_hyb_energy,mfe_structure,cai"

    def output(self, solution):
        if solution.valid:
            print solution.solid,',',
            print solution.scores['sd16sRNADuplexMFE'],',',
            print solution.scores['utrStructureMFE'],',',
            print solution.scores['cdsCAI']

seqAnalyzerTest = TranslationFeaturesEcoliAnalyzer("testFiles/genomes/partial_ecoli_genome.csv","CSV")
seqAnalyzerTest.run()

gene_name,sd_hyb_energy,mfe_structure,cai
b0001 , NA , 0 , 0.613612159393
b0002 , NA , 0 , 0.34043688741
b0003 , NA , 0 , 0.341658034933
b0004 , NA , 0 , 0.385891327353
b0005 , NA , 0 , 0.377281853234
b0006 , NA , 0 , 0.342733396212
b0007 , NA , 0 , 0.319183029826
b0008 , NA , 0 , 0.604195702312
b0009 , NA , 0 , 0.396623675448
b0010 , NA , 0 , 0.574062247682
b0011 , NA , 0 , 0.286738246339
b0013 , NA , 0 , 0.362374253526
b0014 , NA , 0 , 0.723381361599
b0015 , NA , 0 , 0.525547136369
b0016 , NA , 0 , 0.224833425679
b0018 , NA , 0 , 0.325397473007
b4412 , NA , 0 , 0.388829489281
b0019 , NA , 0 , 0.315147538129
b0020 , NA , 0 , 0.326664585708
b0021 , NA , 0 , 0.259970855677
b0022 , NA , 0 , 0.35382053189
b0023 , NA , 0 , 0.673378474366
b0024 , NA , 0 , 0.223455416035
b0025 , NA , 0 , 0.341974173688
b0026 , NA , 0 , 0.544170808606
b0027 , NA , 0 , 0.375930315257
b0028 , NA , 0 , 0.446968612102
b0029 , NA , 0 , 0.43576259638
b0030 , NA , 0 , 0.33007758527
b0031 , NA , 0 , 0.360583601428
b0

## SequenceDesigner class

In [3]:
def configureSolution(self, solution):
    '''
    Solution configuration
    '''
    if solution.sequence == None:
        return 0

    ## Designer specific
    solution.mutable_region=range(0,len(solution.sequence)) # whole region
    solution.cds_region = (49,len(solution.sequence))
    solution.keep_aa = True

    ## Populate solution with desired features

    # CAI
    cai_obj = CAI.CAI(solution = solution,
                      label="cds",
                      cai_table = cai_table_ec,
                      args = {'cai_range': (49,len(solution.sequence)),
                              'mutable_region': range(49,len(solution.sequence)) } )

    # Search SD
    dup_obj1 = RNADuplex.RNADuplexRibosome(solution1=solution, label="sd16s",
                                            args = { 'rnaMolecule1region' : (25,48),
                                            'mutable_region' : range(25,48) })
    dup_mfe = RNADuplex.RNADuplexMFE(dup_obj1)
    dup_obj1.add_subfeature(dup_mfe)

    # MFE [-30,30]
    st1_obj = Structure.Structure(solution=solution,label="utr",
                                args = {'structure_range' : (49-30,49+30), 
                                        'mutable_region' : range(49-30,49+30)} )
    st_mfe = Structure.StructureMFE(st1_obj)
    st1_obj.add_subfeature(st_mfe)

    solution.add_feature(cai_obj)
    solution.add_feature(dup_obj1)
    solution.add_feature(st1_obj)

In [4]:
from DesignOfExperiments.Design import FullFactorial

#Design Methodology and thresholds
design_param = { "sd16sRNADuplexMFE": {'type' : 'REAL' ,
                                       'thresholds' : {'1': (-12.7,-7.3), '2': (-7.3,-5.8),
                                                       '3': (-5.8,-5.2), '4': (-5.2,-3.3), '5': (-3.3, 2.0) } },
                "utrStructureMFE": {'type' : 'REAL' ,
                                    'thresholds' : {'1': (-29.2,-12.2), '2': (-12.2,-9.95),
                                                    '3': (-9.95,-8.4), '4': (-8.4,-6.73), '5': (-6.73,0.65) } },
                "cdsCAI" : {'type' : 'REAL' ,
                            'thresholds' : {'1': (0.13,0.29), '2': (0.29,0.33),
                                            '3': (0.33,0.37), '4': (0.37,0.42), '5': (0.42,0.86) } } }
design = FullFactorial(["sd16sRNADuplexMFE","utrStructureMFE","cdsCAI"],design_param)
design.listDesigns

['1.1.1',
 '1.1.3',
 '1.1.2',
 '1.1.5',
 '1.1.4',
 '1.3.1',
 '1.3.3',
 '1.3.2',
 '1.3.5',
 '1.3.4',
 '1.2.1',
 '1.2.3',
 '1.2.2',
 '1.2.5',
 '1.2.4',
 '1.5.1',
 '1.5.3',
 '1.5.2',
 '1.5.5',
 '1.5.4',
 '1.4.1',
 '1.4.3',
 '1.4.2',
 '1.4.5',
 '1.4.4',
 '3.1.1',
 '3.1.3',
 '3.1.2',
 '3.1.5',
 '3.1.4',
 '3.3.1',
 '3.3.3',
 '3.3.2',
 '3.3.5',
 '3.3.4',
 '3.2.1',
 '3.2.3',
 '3.2.2',
 '3.2.5',
 '3.2.4',
 '3.5.1',
 '3.5.3',
 '3.5.2',
 '3.5.5',
 '3.5.4',
 '3.4.1',
 '3.4.3',
 '3.4.2',
 '3.4.5',
 '3.4.4',
 '2.1.1',
 '2.1.3',
 '2.1.2',
 '2.1.5',
 '2.1.4',
 '2.3.1',
 '2.3.3',
 '2.3.2',
 '2.3.5',
 '2.3.4',
 '2.2.1',
 '2.2.3',
 '2.2.2',
 '2.2.5',
 '2.2.4',
 '2.5.1',
 '2.5.3',
 '2.5.2',
 '2.5.5',
 '2.5.4',
 '2.4.1',
 '2.4.3',
 '2.4.2',
 '2.4.5',
 '2.4.4',
 '5.1.1',
 '5.1.3',
 '5.1.2',
 '5.1.5',
 '5.1.4',
 '5.3.1',
 '5.3.3',
 '5.3.2',
 '5.3.5',
 '5.3.4',
 '5.2.1',
 '5.2.3',
 '5.2.2',
 '5.2.5',
 '5.2.4',
 '5.5.1',
 '5.5.3',
 '5.5.2',
 '5.5.5',
 '5.5.4',
 '5.4.1',
 '5.4.3',
 '5.4.2',
 '5.4.5',
 '5.4.4',


In [5]:
from RunningExamples.Designer.TranslationFeaturesEcoliDesigner import TranslationFeaturesEcoliDesigner
from DesignOfExperiments.Design import FullFactorial, Optimization

#Seed sequence from which mutants will be derived
seed='ttattaccggacaataatatttcaattcattaaagaggagaaaggtaccatggccctgtggatgcgcctcctgcccctgctggcgctgctggccctctggggacctgacccagccgcagcctttgtgaaccaacacctgtgcggctcacacctggtggaagctctctacctagtgtgcggggaacgaggcttcttctacacacccaagacccgccgggaggcagaggacctgcaggtggggcaggtggagctgggcgggggccctggtgcaggcagcctgcagcccttggccctggaggggtccctgcagaagcgtggcattgtggaacaatgctgtaccagcatctgctccctctaccagctggagaactactgcaactag'

#Design Methodology and thresholds
design_param = { 
 "cdsCAI" : { 'type' : 'REAL' ,
 'thresholds' : { '1': (0.13,0.29), '2': (0.29,0.33),
 '3': (0.33,0.37), '4': (0.37,0.42),
'5': (0.42,0.86) } }
 }

design = Optimization(["cdsCAI"],design_param, '5')

# root_dir = "/Users/nmih/Projects/"
root_dir = "/vagrant/"
output_file = root_dir + 'codon_optimization/d-tailor/testFiles/outputFiles/tfec_1'

tirap_designer = TranslationFeaturesEcoliDesigner("tfec", 
                                                  seed, 
                                                  design,
                                                  output_file, 
                                                  createDB=True)
tirap_designer.run()

looking for combination:  5
Solution found... inserting into DB...

###########################
# Optimized solution:
# ID:  330648123324667243168199564780158269150
# Sequence:  ttattaccggacaataatatttcaattcattaaagaggagaaaggtaccatggcgctgtggatgcgcctcctgcctctgctggcgctgctggccctctggggacctgacccggccgcagcttttgtgaaccaacacctgtgcggcagccacctggtggaagctctctacctagtgtgcggggaacgtggcttcttctacacacctaagacccgccgggaggcagaggacctgcaggtgggtcaggtagaactgggcggtggccctggtgcaggtagcctgcagcccttggctctggaggggtccctgcagaagcgtggcattgtagaacaatgctgtaccagcatctgctccctctaccagctggagaactactgcaactag
# Scores:  ['cdsCAI: 0.42264774749']
# Levels:  ['cdsCAILevel: 5']
# Number of generated solutions:  17
# Distance to seed:  16
###########################

Program finished, all combinations were found...


(17, 16, 3.141724799297725)

# Own code

## Calculating CAI for Pk

- Just use the simple Biopython calculator - **but needed to modify code (Bio.SeqUtils.CodonUsage) to get it to skip the unknown bases (N)** REMEMBER THIS!

In [1]:
from Bio.SeqUtils.CodonUsage import CodonAdaptationIndex

In [2]:
pk_cds = '/vagrant/genomes/Pk/GCF_001983325.1_ASM198332v1_cds_from_genomic.fna'

biop_cai = CodonAdaptationIndex()
biop_cai.generate_index(fasta_file=pk_cds)

PK_CAI_TABLE = {k.lower():v for k,v in biop_cai.index.items()}
PK_CAI_TABLE

illegal codon ACN in gene: lcl|NW_018150436.1_cds_XP_020546087.1_933
illegal codon NNN in gene: lcl|NW_018150437.1_cds_XP_020545573.1_1020
illegal codon NNN in gene: lcl|NW_018150437.1_cds_XP_020545573.1_1020
illegal codon NNN in gene: lcl|NW_018150437.1_cds_XP_020545573.1_1020
illegal codon NNN in gene: lcl|NW_018150437.1_cds_XP_020545573.1_1020
illegal codon NNN in gene: lcl|NW_018150437.1_cds_XP_020545573.1_1020
illegal codon NNN in gene: lcl|NW_018150437.1_cds_XP_020545573.1_1020
illegal codon NNN in gene: lcl|NW_018150437.1_cds_XP_020545573.1_1020
illegal codon NNN in gene: lcl|NW_018150437.1_cds_XP_020545573.1_1020
illegal codon NNN in gene: lcl|NW_018150437.1_cds_XP_020545573.1_1020
illegal codon NNN in gene: lcl|NW_018150437.1_cds_XP_020545573.1_1020
illegal codon NNN in gene: lcl|NW_018150437.1_cds_XP_020545573.1_1020
illegal codon CGN in gene: lcl|NW_018150439.1_cds_XP_020545092.1_1667
illegal codon TA in gene: lcl|NW_018150441.1_cds_XP_020544721.1_2022
illegal codon GAN in g

{'aaa': 1.0,
 'aac': 0.6835958956781579,
 'aag': 0.8501819970486965,
 'aat': 1.0,
 'aca': 0.9698236035279295,
 'acc': 0.6997060058798824,
 'acg': 0.3369172616547669,
 'act': 1.0,
 'aga': 1.0,
 'agc': 0.33871546465733765,
 'agg': 0.4327671815167005,
 'agt': 0.6517581628991748,
 'ata': 0.44500644063390016,
 'atc': 0.560287878608823,
 'atg': 1.0,
 'att': 1.0,
 'caa': 1.0,
 'cac': 0.4904431664411366,
 'cag': 0.5018068436200831,
 'cat': 1.0,
 'cca': 1.0,
 'ccc': 0.27423029492297724,
 'ccg': 0.26806429362707185,
 'cct': 0.6827540079008424,
 'cga': 0.13356785576870225,
 'cgc': 0.059216914930650975,
 'cgg': 0.11079352864654904,
 'cgt': 0.2555012000952747,
 'cta': 0.45494302516671425,
 'ctc': 0.2547365847695881,
 'ctg': 0.26136440804574285,
 'ctt': 0.4427739070203317,
 'gaa': 1.0,
 'gac': 0.4646572115247604,
 'gag': 0.5341032773007619,
 'gat': 1.0,
 'gca': 1.0,
 'gcc': 0.4917465037926556,
 'gcg': 0.2468331796194169,
 'gct': 0.8223192606674321,
 'gga': 0.5128889736499158,
 'ggc': 0.3511013635930

## Calculating CAI quartiles for Pk

In [3]:
from Bio import SeqIO

In [4]:
# Get all coding sequences

sequences = []
with open(pk_cds) as f:
    for s in SeqIO.parse(f, 'fasta'):
        sequences.append(s)

In [5]:
# Calculate CAIs

from Solution import Solution
from Features.CAI import CAI

all_pk_cais = {}

for s in sequences:
    solution = Solution(sol_id=s.id, 
                        sequence=str(s.seq))
    cai_obj = CAI(solution=solution,label="cds",cai_table=PK_CAI_TABLE, args= {'cai_range':(0,len(s))})
    solution.add_feature(cai_obj)
    all_pk_cais[solution.solid] = solution.scores['cdsCAI']

In [6]:
# Get quintiles for thresholds

import numpy as np

pk_cai_quintiles = []
for x in [0, 25, 50, 75, 100]:
    pk_cai_quintiles.append(np.percentile(list(all_pk_cais.values()), x))

pk_cai_thresholds = {}
for i, x in enumerate(pk_cai_quintiles):
    if i == 0:
        pk_cai_thresholds[str(i+1)] = (0, x)
    elif i == len(pk_cai_quintiles) - 1:
        pk_cai_thresholds[str(i+1)] = (pk_cai_quintiles[i-1], 1)
    else:
        pk_cai_thresholds[str(i+1)] = (pk_cai_quintiles[i-1], x)

pk_cai_thresholds

{'1': (0, 0.4515777307310756),
 '2': (0.4515777307310756, 0.7025300201378566),
 '3': (0.7025300201378566, 0.7323131353898734),
 '4': (0.7323131353898734, 0.7590354619135496),
 '5': (0.7590354619135496, 1)}

## Running CAI maximization for Pk

In [7]:
import Functions
import sys
from SequenceDesigner import SequenceDesigner
from Features.Structure import Structure,StructureMFE
from Features import CAI,RNADuplex
from DesignOfExperiments.Design import RandomSampling,Optimization,FullFactorial

class TranslationFeaturesPkDesigner(SequenceDesigner):
    
    def __init__(self, name, seed, design, dbfile, createDB=True):
        SequenceDesigner.__init__(self, name, seed, design, dbfile, createDB)
        
    def configureSolution(self, solution):
        """
        Solution configuration
        
        """
                
        if solution.sequence == None:
            return 0
        
        # TODO: need to replace these numbers and auto calculate them
        mutable_start = 0
        mutable_end = len(solution.sequence)
        cds_start = 49
        cds_end = len(solution.sequence)
        
        # Populate solution with desired features
        solution.mutable_region = range(mutable_start, mutable_end)
        solution.cds_region = (cds_start, cds_end)
        
        # This should always be true unless you want to make mutant proteins
        solution.keep_aa = True
        
        cai_obj = CAI.CAI(solution=solution,
                          label="cds",
                          cai_table=PK_CAI_TABLE, 
                          args={'cai_range' : solution.cds_region, 
                                'mutable_region' : solution.mutable_region } )
            
        solution.add_feature(cai_obj)                
    
    def validateSolution(self, solution):
        '''
        Solution validation tests
        
        Additionally, the user may also want to implement the method 
        validateSolution, which is called every time a new
        sequence is generated. This validation step is fundamental
        to avoid undesired properties in new sequence variants 
        (e.g., a spurious restriction site). Only validated sequences will
        be stored in the database. 

        '''
        if solution.sequence == None or ('?' in solution.levels.values()):
            sys.stderr.write("SolutionValidator: Level unknown - "+str(solution.levels)+"\n")                        
            solution.valid = False
            return 0
        
        # Check if solution is valid
        valid = True
      
        designed_region = solution.sequence
                
#         # No internal Promoters
#         (score, _, _) = Functions.look_for_promoters(designed_region)
#         if score >= 15.3990166: #0.95 percentile for Promoter PWM scores
#             valid = False
#             sys.stderr.write("SolutionValidator: High Promoter score: "+str(score)+"\n")                    
        
#         # No internal Terminator
#         score = Functions.look_for_terminators(designed_region)
#         if score >= 90: #90% confidence from transtermHP
#             valid = False
#             sys.stderr.write("SolutionValidator: High Terminator score\n")    
            
#         # No restriction enzymes
#         if 'ggtctc' in designed_region or 'gagacc' in designed_region:
#             sys.stderr.write("SolutionValidator: Restriction enzyme found\n")
#             valid = False        
        
        solution.valid = valid
        
        return valid

In [14]:
%%time

from DesignOfExperiments.Design import FullFactorial, Optimization

# Seed sequence from which mutants will be derived
seed = 'ATGAGTCCAAGCACCGCTCCGGCAGTTAAACCAGACGCTGGTGACTCATCTAGCATAATAGGAGAACTAAAGTCCAAGCTAAAAGCAGTGGCCGGCTCAGAGAGATCCGATGAGGTTGCTGAGATGGAGCAAGAACAAGAACAGGCGACTGAATACGAACACCTGACTAGAGTCCCTATAGTAAAGCAGAGACCCATCTCCGATAGATTGCAACCAGAGTTAGCTCAACACTATTCACCACACCTGTCAGGATTACAGGAATACAAGCAATTATACAAGCAAAGTATCGAATCACCTGGGAAGTTTTTTGGCGACAAAGCTCGTCAATTCTTAAACTGGTTTAAAGACTTCGACAGCGTGTTTGTACCTAACCCGGAGACCGGAAAGCCTTCATTAGAGAACAATGCATGGTTCCTAAATGGCCAGTTAAATGCGGCATACAATTGTGTTGATAGACATGCACTACAAACGCCCAACAAACCTGCCATCATTTATGAGGCTGACGAGCCGGGTCAGGGATACACACTAACATATGCTGAATTGTTAGAACAGGTTTGTAAGGTCGCTCAGGTCTTGCAGTATAGTATGGGCGTTCGTAAGGGAGATACAGTAGCGGTTTATATGCCAATGATTCCGCAAACACTGATTACCTTAATGGCAATCACCAGGATTGGAGCTATTCACTCTGTTGTGTTTGCTGGTTTTTCTTGCAACAGTCTGCGTGATAGAATTAACGACGCCGATTCCAGGGTAGTCATTACCACGGACGAGTCCAAACGTGGGGGTAAAATCGTTGAGACCAAAAGAATCGTCGATGACGCTATAAAGGAAACATCAGGGGTCAGAAATGTACTTGTATATAAGAGAACAAATAATCCTAAGGTGCAGCTGGTCCCAGGGCGTGATCTGGATTGGGACGACGAGATTAAAAAATATAAAGGATACTGCCCCTGCGAACCTGTGGATAGCGAGCACCCGCTTTTCTTACTATACACTTCAGGCTCTACGGGTACTCCCAAAGGCGTTCAACACAGCACTGCAGGTTATTTGTTAGGTGCTCTTCTGACCATGCGTTACACATTCGACACCCACAGGGAAGACGTATTTTTCACGGCCGGTGATGTGGGCTGGATTACTGGACACACCTACGTTGTTTACGGGCCCCTATTATACGGTTGTACAACACTTGTATTCGAAGGCACTCCCGCCTATCCAAACTACTCTCGTTATTGGGATATCGTAGACAAATATAAAGTCACGCAATTTTATGTGGCTCCCACCGCGCTTAGATTGTTGAAGAGGGCTGGGGACTCTTACGTAGAGAACCACTCACTAAAATCACTAAGGTGTTTAGGTACTGTAGGAGAGCCCATCGCAGCCGAAGTGTGGGACTGGTACAGCGAGCAAATCGGTAAAAATGAGATTCCTATAGTAGATACTTATTGGCAAACAGAGTCTGGGAGCCATATGTTAACTCCGCTAGCAGGGGGAGTTACACCGATGAAGCCAGGGAGCGCATGCTTTCCCTTCTTTGGTATAGAACCAGCTATACTGGATGCGAACACGGGAAAAGAAATTACAGCGTCACATGCTGAAGGTGTACTGGCGGTGAAATCCGGATGGCCCTCCTTTGCGAGAACGATATGGAAAAATCATGATAGGTTCTTAGATACGTACCTGAAGCCCTTCAAAGGGTATTACTTCGCAGGAGATGGTGCCGCCAGGGATAAAGATGGATATATGTGGATCTTGGGCAGGGTCGACGACGTCGTAAATGTGTCCGGACACAGGCTGTCCACGGCAGAAATTGAGTCTGCAATCCTAGACGACAGCATTGTAGCAGAGTGTGCGGTGGTGGGTTTCAATGATGACCTGACTGGACAGGCTGTGGCGGCCTTCGTAGTGTTGAAGAACAAGTCATCCTGGAGCACTGCCTCCGAATCCGAGTTACAGGACATTAAAAAACATTTGATTTTGGCGGTGAGAAAAGACATCGGACCCTTTGCAGCCCCCAAATTGATAGTACTGGTTGACGATCTACCCAAGACTCGTTCAGGTAAAATTATGAGGAGAATACTGCGTAAAATACTAGCTGGAGAAAGCGATCAATTAGGCGACGTCAGTACCTTATCAAATCCGGGCATTGTCCGTCACCTTATAGACTCAGTAAAGCTATAA'


# Design Methodology and thresholds
# A set of properties and their respective levels need to be inputted to instantiate a
# sub-class of Design. This is given in the form of a dictionary, where for each property it
# is necessary to define a type (REAL, INTEGER or TEXT) and a list of levels containing
# the respective lower and upper bounds
design_param = {
    "cdsCAI": {
        'type'      : 'REAL',
        'thresholds': pk_cai_thresholds
    }
}

# Specify the parameters you want to optimize to
design = Optimization(featuresList=["cdsCAI"], featuresObj=design_param, target='5')

# This is where the SQLite database will be saved
root_dir = "/vagrant/"
output_file = root_dir + 'codon_optimization/d-tailor/testFiles/outputFiles/tfpk_1'

# Get ready to run everything
tirap_designer = TranslationFeaturesPkDesigner("tfpk",
                                               seed,
                                               design,
                                               output_file,
                                               createDB=True)

# Print original CAI
solution = Solution(sol_id='tmp', 
                        sequence=seed)
cai_obj = CAI.CAI(solution=solution, label="cds", cai_table=PK_CAI_TABLE, args= {'cai_range':(0,len(seed))})
solution.add_feature(cai_obj)
print('Starting CAI: ', solution.scores['cdsCAI'])

# Run it
result = tirap_designer.run()
result

('Starting CAI: ', 0.6402064678148808)
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
looking for combination:  5
No solution could be found...
time elapsed: 60.99 (s) 	 solutio

In [15]:
from Bio.Seq import Seq
orig = Seq("ATGAGTCCAAGCACCGCTCCGGCAGTTAAACCAGACGCTGGTGACTCATCTAGCATAATAGGAGAACTAAAGTCCAAGCTAAAAGCAGTGGCCGGCTCAGAGAGATCCGATGAGGTTGCTGAGATGGAGCAAGAACAAGAACAGGCGACTGAATACGAACACCTGACTAGAGTCCCTATAGTAAAGCAGAGACCCATCTCCGATAGATTGCAACCAGAGTTAGCTCAACACTATTCACCACACCTGTCAGGATTACAGGAATACAAGCAATTATACAAGCAAAGTATCGAATCACCTGGGAAGTTTTTTGGCGACAAAGCTCGTCAATTCTTAAACTGGTTTAAAGACTTCGACAGCGTGTTTGTACCTAACCCGGAGACCGGAAAGCCTTCATTAGAGAACAATGCATGGTTCCTAAATGGCCAGTTAAATGCGGCATACAATTGTGTTGATAGACATGCACTACAAACGCCCAACAAACCTGCCATCATTTATGAGGCTGACGAGCCGGGTCAGGGATACACACTAACATATGCTGAATTGTTAGAACAGGTTTGTAAGGTCGCTCAGGTCTTGCAGTATAGTATGGGCGTTCGTAAGGGAGATACAGTAGCGGTTTATATGCCAATGATTCCGCAAACACTGATTACCTTAATGGCAATCACCAGGATTGGAGCTATTCACTCTGTTGTGTTTGCTGGTTTTTCTTGCAACAGTCTGCGTGATAGAATTAACGACGCCGATTCCAGGGTAGTCATTACCACGGACGAGTCCAAACGTGGGGGTAAAATCGTTGAGACCAAAAGAATCGTCGATGACGCTATAAAGGAAACATCAGGGGTCAGAAATGTACTTGTATATAAGAGAACAAATAATCCTAAGGTGCAGCTGGTCCCAGGGCGTGATCTGGATTGGGACGACGAGATTAAAAAATATAAAGGATACTGCCCCTGCGAACCTGTGGATAGCGAGCACCCGCTTTTCTTACTATACACTTCAGGCTCTACGGGTACTCCCAAAGGCGTTCAACACAGCACTGCAGGTTATTTGTTAGGTGCTCTTCTGACCATGCGTTACACATTCGACACCCACAGGGAAGACGTATTTTTCACGGCCGGTGATGTGGGCTGGATTACTGGACACACCTACGTTGTTTACGGGCCCCTATTATACGGTTGTACAACACTTGTATTCGAAGGCACTCCCGCCTATCCAAACTACTCTCGTTATTGGGATATCGTAGACAAATATAAAGTCACGCAATTTTATGTGGCTCCCACCGCGCTTAGATTGTTGAAGAGGGCTGGGGACTCTTACGTAGAGAACCACTCACTAAAATCACTAAGGTGTTTAGGTACTGTAGGAGAGCCCATCGCAGCCGAAGTGTGGGACTGGTACAGCGAGCAAATCGGTAAAAATGAGATTCCTATAGTAGATACTTATTGGCAAACAGAGTCTGGGAGCCATATGTTAACTCCGCTAGCAGGGGGAGTTACACCGATGAAGCCAGGGAGCGCATGCTTTCCCTTCTTTGGTATAGAACCAGCTATACTGGATGCGAACACGGGAAAAGAAATTACAGCGTCACATGCTGAAGGTGTACTGGCGGTGAAATCCGGATGGCCCTCCTTTGCGAGAACGATATGGAAAAATCATGATAGGTTCTTAGATACGTACCTGAAGCCCTTCAAAGGGTATTACTTCGCAGGAGATGGTGCCGCCAGGGATAAAGATGGATATATGTGGATCTTGGGCAGGGTCGACGACGTCGTAAATGTGTCCGGACACAGGCTGTCCACGGCAGAAATTGAGTCTGCAATCCTAGACGACAGCATTGTAGCAGAGTGTGCGGTGGTGGGTTTCAATGATGACCTGACTGGACAGGCTGTGGCGGCCTTCGTAGTGTTGAAGAACAAGTCATCCTGGAGCACTGCCTCCGAATCCGAGTTACAGGACATTAAAAAACATTTGATTTTGGCGGTGAGAAAAGACATCGGACCCTTTGCAGCCCCCAAATTGATAGTACTGGTTGACGATCTACCCAAGACTCGTTCAGGTAAAATTATGAGGAGAATACTGCGTAAAATACTAGCTGGAGAAAGCGATCAATTAGGCGACGTCAGTACCTTATCAAATCCGGGCATTGTCCGTCACCTTATAGACTCAGTAAAGCTATAA")
oopt = Seq('atgagtccaagcaccgctccggcagttaaaccagacgctggtgactcatttggcataataggaaaactaatcaccatcttaaaagcaatggcctgctcaaagagatcccatgagattgttaagatggagcaaaaacaagaacagacgattgaatacgaacacctgattggaaagtctatagtaaagcagagatccatctccaattgattgtaatcagagttagctcaacactattcatcatacctgtcaggattacagaaatactagcaattatacttccaaagtttcgaatcatttaggatcttttttggcaactaagctggtcaattcttaaactggtttgaagacttcgacagcgtgtttgtatttaacccgtagacctgaatctttgcattagagaactatgcatggttcctaaatggcatcttaaatgcggcatactattgtgttgatcgacatgcattataaacgtccaactaaccttccatcatttatgaggttaacgtcacgtgtcagagatactcattaacatatgctgaattgttagaatagatttgtcaggtcgctcagatcttgcagtatagtttgggcgtttgtgagggagattcaatagaggtttatttgccaatgatttcgtaaacattgattgccttaatggcaatctccaggattggaattgtttactctgttgtgtttgttggtttttctagcaactgtctgcgttatcgaattgaccacaccaattccaggttagtctttgccaaggacgagtccaaacgtcggtgtaaaatccttgagaccaaaagaaagttctatgacgttgtaaaggaaacatcaagggtctgaaatgtacttgtatattagagaacaaatcattttaaggtgttcttggtctcagggcgtgatctggattggaacgaccagattgaaaaatataaaggatactgcacctgcaaatctttggatagcgtccactcgcttttcttattatactctacaagcattaagagttttgccaaaagcctttaatactgcacttcaagttatttgttaggttctcttctgaccatgtgttactcatagtactcccacaggcaagacgtatttttcacgtcccgtgatgtgggcaggattacttgacactccaactttgtttacgggtccatattatacggttgttcaacacttgtatagtaaggcattgccgccaatccaaactactttagttattgggatatcttagactaatataaagtcacgaaattttatgtggcttccaccgagattagattgttgaagaggtcttggcactttgacgtagagaacaactcattaaaatcattaaggtgtttaggtcctgtaggaatccccatctcagccaaagtgtggtactggtactgcatccaaatccgttaaaatgagattcttgtagtagatcttaattggcaaacaaagtttgggtgctatatgttaactcaggtagcaaggtgaattgcaccgttgaagtcagggtgcgcatgctttcccttctttggtttagaaccaattatattggatgagaactcgggaaaaaaaattgcagagacatatgcttaaagtctattggcgatgaaatccagatggtcctcctttgagagaacgatatggaaaaattatgatcggttcttagatacgtacatgaagtccatcaaagggtattacttctcaggagatggtgccaccagggattaagatggatatatgtggttcttgggcagggtcgacgacttcttaaatgtgtccagacactggatgtccaaggcaaaaattgtctctgcaatcttagactacagcattgtagcaatctgttagatggtgggtttctatgatgacctgacttgataggctgtggcggccatcttagtgttgaagaacttctcatccaggagcattaccaccaaatccgagttataggactttgaaaaacatttgattttggagatgagaaaagacatcggatccattgcagcccccaaattgatagtactggttgacgatttatccaagactggttcaagttaaattgtgaggtgaatactgtgtgaaatactagctggaaaaagcaatcaattaggcgacttcagtaccatatcaaattcgggcattgtctgttactttgtagactcaataatcatataa')

In [16]:
print(str(orig))
print(str(oopt))

ATGAGTCCAAGCACCGCTCCGGCAGTTAAACCAGACGCTGGTGACTCATCTAGCATAATAGGAGAACTAAAGTCCAAGCTAAAAGCAGTGGCCGGCTCAGAGAGATCCGATGAGGTTGCTGAGATGGAGCAAGAACAAGAACAGGCGACTGAATACGAACACCTGACTAGAGTCCCTATAGTAAAGCAGAGACCCATCTCCGATAGATTGCAACCAGAGTTAGCTCAACACTATTCACCACACCTGTCAGGATTACAGGAATACAAGCAATTATACAAGCAAAGTATCGAATCACCTGGGAAGTTTTTTGGCGACAAAGCTCGTCAATTCTTAAACTGGTTTAAAGACTTCGACAGCGTGTTTGTACCTAACCCGGAGACCGGAAAGCCTTCATTAGAGAACAATGCATGGTTCCTAAATGGCCAGTTAAATGCGGCATACAATTGTGTTGATAGACATGCACTACAAACGCCCAACAAACCTGCCATCATTTATGAGGCTGACGAGCCGGGTCAGGGATACACACTAACATATGCTGAATTGTTAGAACAGGTTTGTAAGGTCGCTCAGGTCTTGCAGTATAGTATGGGCGTTCGTAAGGGAGATACAGTAGCGGTTTATATGCCAATGATTCCGCAAACACTGATTACCTTAATGGCAATCACCAGGATTGGAGCTATTCACTCTGTTGTGTTTGCTGGTTTTTCTTGCAACAGTCTGCGTGATAGAATTAACGACGCCGATTCCAGGGTAGTCATTACCACGGACGAGTCCAAACGTGGGGGTAAAATCGTTGAGACCAAAAGAATCGTCGATGACGCTATAAAGGAAACATCAGGGGTCAGAAATGTACTTGTATATAAGAGAACAAATAATCCTAAGGTGCAGCTGGTCCCAGGGCGTGATCTGGATTGGGACGACGAGATTAAAAAATATAAAGGATACTGCCCCTGCGAACCTGTGGATAGCGAGCACCCGCTTTTCTTACTATACACTTCAG