# Python Script to Design Combinatorial Library of Linkers for Grafting EF-hand motif onto Loop 7 of mFAP2b

In [1]:
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna

In [2]:
def revcomp(seq):
    return seq.translate(str.maketrans('ACGTacgtRYMKrymkVBHDvbhd', 'TGCAtgcaYRKMyrkmBVDHbvdh'))[::-1]

In [3]:
codons = {
            'A': 'GCG',
            'C': 'TGC',
            'D': 'GAT',
            'E': 'GAA',
            'F': 'TTT',
            'G': 'GGC',
            'H': 'CAT',
            'I': 'ATT',
            'K': 'AAA',
            'L': 'CTG',
            'M': 'ATG',
            'N': 'AAC',
            'P': 'CCG',
            'Q': 'CAG',
            'R': 'CGT',
            'S': 'AGC',
            'T': 'ACC',
            'V': 'GTG',
            'W': 'TGG',
            'Y': 'TAT',
            '*': 'TAA'
}

In [4]:
def oligo_sequences(start, end, target, out):

    target_dna = 'GACAAAGATGGTGACGGCTATATTTCCGCCGCCGAAGCAGCAGCTCAG'

    overlap_5prime =  'AGTTCAAATGGGACGCA'
    overlap_3prime = 'CTGACCGGTACCCTGC'

    oligos_to_order = []
    for i in out:
        
        raw_protein_insert = i.split(start)[-1].split(end)[0]
        n_protein_insert = raw_protein_insert.split(target)[0]
        c_protein_insert = raw_protein_insert.split(target)[-1]
        
        n_dna_insert = []
        if len(n_protein_insert) > 0:
            for codon in n_protein_insert:
                n_dna_insert.append(codons[codon])
        else:
            n_dna_insert.append('')

        c_dna_insert = []
        if len(c_protein_insert) > 0:
            for codon in c_protein_insert:
                c_dna_insert.append(codons[codon])
        else:
            c_dna_insert.append('')

        tmp_oligo = overlap_5prime+''.join(n_dna_insert)+target_dna+''.join(c_dna_insert)+overlap_3prime
        oligos_to_order.append(tmp_oligo)

    return oligos_to_order

In [5]:
query = ['QFKQNDNPRYHGSTNI',
         'HVKSNDSPSLSGNTKI',
         'QFKKDDDPRYHGSTHI',
         'EYKKNDDPRLKGSKSI',
         'HYKTNDYPSLNGSKSI',
         'TTKGENDFHGR'
        ]

target = 'DKDGDGYISAAEAAAQ' # EF-hand motif

start = 'MASSHHHHHHSSGLVPRGSSMSRAAQLLPGTWQVTMTNEDGQTSQGQWHFQPRSPYTMDIVAQGTISDGRPIVGYGKATVKTPDTLDIDITYPSLGNIKAQGQITMDSPTQFKWDA'
end = 'LTGTLQRQE'

G_ = ['', 'G']
G_P_ = ['', 'G', 'P']

new_sequences = []
for q in query:
    for N_term in G_:
        for Cterm in G_P_:
            for num_former in range(0, 4):
                for num_latter in range(0, 4):
                    if num_latter == 0:
                        tmp = start+q[:num_former]+N_term+target+Cterm+q[-num_latter:0]+end
                    else:
                        tmp = start+q[:num_former]+N_term+target+Cterm+q[-num_latter:]+end
                    new_sequences.append(tmp)

print('Number of oligos before removing redundant sequences = {}'.format(len(new_sequences)))
protein_list = list(set(new_sequences))
print('Number of oligos after removing redundant sequences = {}'.format(len(protein_list)))


oligo_list = oligo_sequences(start, end, target, protein_list)

overlapping_region = 'GCTATATTTCCGCCG'

dic = {}

test = zip(protein_list, oligo_list)
for i in test:
    coding_dna = Seq('C'+i[1]+'AG', generic_dna)
    protein_string = str(coding_dna.translate())
    assert(protein_string in i[0])


    oligo_name = i[0].split(start)[-1].split(end)[0]
    # Now split into two oligos for ameliorated pricing
    oligo_9_fwd = i[1].split(overlapping_region)[0]+overlapping_region
    oligo_10_fwd = overlapping_region+i[1].split(overlapping_region)[-1]
    oligo_10_rev = revcomp(oligo_10_fwd)

    dic[oligo_name] = (oligo_9_fwd, oligo_10_rev)


print('Maximum oligo length = {}'.format(max([len(v[0]) for k,v in dic.items()])))
print('Maximum oligo length = {}'.format(max([len(v[1]) for k,v in dic.items()])))

fwd = []
rev = []

for k,v in dic.items():
    fwd.append(v[0])
    rev.append(v[1])

print('Number of forward oligos with redundancy = {}'.format(len(fwd)))
print('Number of reverse oligos with redundancy = {}'.format(len(rev)))

fwd_unique = list(set(fwd))
rev_unique = list(set(rev))

print('Number of forward oligos without redundancy = {}'.format(len(fwd_unique)))
print('Number of reverse oligos without redundancy = {}'.format(len(rev_unique)))

print('\nForward Oligonucleotides:')
print('Name\t\t\tSequence')
for f in fwd_unique:
    name_ = 'C'+f[:-1]
    name = str(Seq(name_, generic_dna).translate())
    print(name+'\t'+f)

print('\nReverse Oligonucleotides:')
print('Name\t\t\tSequence')
for r in rev_unique:
    name_ = 'G'+revcomp(r)[:-1]
    name = str(Seq(name_, generic_dna).translate())
    print(name+'\t'+r)

print('\nTotal number of oligo combinations = {}'.format(len(fwd_unique)*len(rev_unique)))

Number of oligos before removing redundant sequences = 576
Number of oligos after removing redundant sequences = 454
Maximum oligo length = 60
Maximum oligo length = 60
Number of forward oligos with redundancy = 454
Number of reverse oligos with redundancy = 454
Number of forward oligos without redundancy = 30
Number of reverse oligos without redundancy = 38

Forward Oligonucleotides:
Name			Sequence
QFKWDAQFDKDGDGYISA	AGTTCAAATGGGACGCACAGTTTGACAAAGATGGTGACGGCTATATTTCCGCCG
QFKWDAEGDKDGDGYISA	AGTTCAAATGGGACGCAGAAGGCGACAAAGATGGTGACGGCTATATTTCCGCCG
QFKWDAEDKDGDGYISA	AGTTCAAATGGGACGCAGAAGACAAAGATGGTGACGGCTATATTTCCGCCG
QFKWDATTDKDGDGYISA	AGTTCAAATGGGACGCAACCACCGACAAAGATGGTGACGGCTATATTTCCGCCG
QFKWDAHVGDKDGDGYISA	AGTTCAAATGGGACGCACATGTGGGCGACAAAGATGGTGACGGCTATATTTCCGCCG
QFKWDAQFGDKDGDGYISA	AGTTCAAATGGGACGCACAGTTTGGCGACAAAGATGGTGACGGCTATATTTCCGCCG
QFKWDAHDKDGDGYISA	AGTTCAAATGGGACGCACATGACAAAGATGGTGACGGCTATATTTCCGCCG
QFKWDAEYKDKDGDGYISA	AGTTCAAATGGGACGCAGAATATAAAGACAAAGATGGTGACGGCTATATTTCCGCCG
