In [58]:
import io

from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from pathlib import Path
from io import StringIO

wd = Path('.').resolve().parent
wd

PosixPath('/home/quirin/PYTHON/mapra')

In [48]:
cam_predictions = '''
CaM-variant	Tm	Standard-Deviation-Tm	%Unfold	Standard-Deviation-%Unfold	Stabilizing-vs-Destabilizing	Comments
p.N54I	*	*	*	*	*	*
p.F90L	*	*	*	*	*	*
p.D96H	*	*	*	*	*	*
p.D96V	*	*	*	*	*	*
p.N98I	*	*	*	*	*	*
p.N98S	*	*	*	*	*	*
p.A103V	*	*	*	*	*	*
p.E105A	*	*	*	*	*	*
p.D130G	*	*	*	*	*	*
p.D130V	*	*	*	*	*	*
p.D132E	*	*	*	*	*	*
p.D132H	*	*	*	*	*	*
p.D132V	*	*	*	*	*	*
p.D134H	*	*	*	*	*	*
p.Q136P	*	*	*	*	*	*
p.E141G	*	*	*	*	*	*'''.lstrip()

CaM_seq_training_set = '''
>CaM WT
GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADG
NGTIDFPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDE
EVDEMIREADIDGDGQVNYEEFVQMMTAK

>Rec
GNSKSGALSKEILEELQLNTKFTEEELSSWYQSFLKECPSGRITRQEFQTIYSKFFPEA
DPKAYAQHVFRSFDANSDGTLDFKEYVIALHMTSAGKTNQKLEWAFSLYDVDGNGTISKN
EVLEIVTAIFKMISPEDTKHLPEDENTPEKRAEKIWGFFGKKDDDKLTEKEFIEGTLANK
EILRLIQFEPQKVKEKLKEKKL

>CIB2
GAMGNKQTIFTEEQLDNYQDCTFFNKKDILKLHSRFYELAPNLVPMDYRKSPIVHVPMSLII
QMPELRENPFKERIVAAFSEDGEGNLTFNDFVDMFSVLCESAPRELKANYAFKIYDFNTD
NFICKEDLELTLARLTKSELDEEEVVLVCDKVIEEADLDGDGKLGFADFEDMIAKAPDFL
STFHIRI

>CALM1_HUMAN Calmodulin-1 OS=Homo sapiens OX=9606 GN=CALM1 PE=1 SV=1
MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADG
NGTIDFPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDE
EVDEMIREADIDGDGQVNYEEFVQMMTAK
'''.strip()

In [49]:
with StringIO(CaM_seq_training_set) as fasta_io:
    records = {r.id: r for r in SeqIO.parse(fasta_io, 'fasta')}
records

{'CaM': SeqRecord(seq=Seq('GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDM...TAK'), id='CaM', name='CaM', description='CaM WT', dbxrefs=[]),
 'Rec': SeqRecord(seq=Seq('GNSKSGALSKEILEELQLNTKFTEEELSSWYQSFLKECPSGRITRQEFQTIYSK...KKL'), id='Rec', name='Rec', description='Rec', dbxrefs=[]),
 'CIB2': SeqRecord(seq=Seq('GAMGNKQTIFTEEQLDNYQDCTFFNKKDILKLHSRFYELAPNLVPMDYRKSPIV...IRI'), id='CIB2', name='CIB2', description='CIB2', dbxrefs=[]),
 'CALM1_HUMAN': SeqRecord(seq=Seq('MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMIN...TAK'), id='CALM1_HUMAN', name='CALM1_HUMAN', description='CALM1_HUMAN Calmodulin-1 OS=Homo sapiens OX=9606 GN=CALM1 PE=1 SV=1', dbxrefs=[])}

In [50]:
variants = [c.split('\t')[0][2:] for c in cam_predictions.split('\n')[1:]]
variants

['N54I',
 'F90L',
 'D96H',
 'D96V',
 'N98I',
 'N98S',
 'A103V',
 'E105A',
 'D130G',
 'D130V',
 'D132E',
 'D132H',
 'D132V',
 'D134H',
 'Q136P',
 'E141G']

In [51]:
seq = records['CaM'].seq.tomutable()
seq[0] = 'K'
seq, records['CaM']

(MutableSeq('KAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDM...TAK'),
 SeqRecord(seq=Seq('GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDM...TAK'), id='CaM', name='CaM', description='CaM WT', dbxrefs=[]))

In [62]:
mutants = list()
shift = 1
for v in variants:
    seq = records['CaM'].seq.tomutable()
    _from, _to, _pos = v[0], v[-1], int(v[1:-1])
    print(f'{seq[_pos + shift]}_{_from}{_pos}{_to}')
    seq[_pos + shift] = _to
    # seq[_pos % 10] = _to
    mutants.append(SeqRecord(seq, id=f'CaM_{v}', description=''))
mutants

N_N54I
F_F90L
D_D96H
D_D96V
N_N98I
N_N98S
A_A103V
E_E105A
D_D130G
D_D130V
D_D132E
D_D132H
D_D132V
D_D134H
Q_Q136P
E_E141G


[SeqRecord(seq=MutableSeq('GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDM...TAK'), id='CaM_N54I', name='<unknown name>', description='', dbxrefs=[]),
 SeqRecord(seq=MutableSeq('GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDM...TAK'), id='CaM_F90L', name='<unknown name>', description='', dbxrefs=[]),
 SeqRecord(seq=MutableSeq('GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDM...TAK'), id='CaM_D96H', name='<unknown name>', description='', dbxrefs=[]),
 SeqRecord(seq=MutableSeq('GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDM...TAK'), id='CaM_D96V', name='<unknown name>', description='', dbxrefs=[]),
 SeqRecord(seq=MutableSeq('GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDM...TAK'), id='CaM_N98I', name='<unknown name>', description='', dbxrefs=[]),
 SeqRecord(seq=MutableSeq('GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDM...TAK'), id='CaM_N98S', name='<unknown name>', description='', dbxrefs=[]),
 SeqRecord(seq=MutableSeq('GAMADQLTEEQIAEFKEAFSLFDKD

In [63]:
with open(wd / 'CAGI6' / 'cam_mutants.fasta', 'w') as fasta:
    SeqIO.write(mutants, fasta, 'fasta')

this is old stuff

In [None]:
seq[50:58]

In [37]:
records['CALM1_HUMAN'][135:142]  # E141G is the second E

SeqRecord(seq=Seq('QVNYEEF'), id='CALM1_HUMAN', name='CALM1_HUMAN', description='CALM1_HUMAN Calmodulin-1 OS=Homo sapiens OX=9606 GN=CALM1 PE=1 SV=1', dbxrefs=[])

In [7]:
shifts = {'CaM': 2, 'CIB2': 5}

for _id, variant in records.items():
    shift = shifts.get(_id, 0)
    print(f'{_id}:\t{variant[135 + shift:142 + shift].seq}')


CaM:	QVNYEEF
Rec:	DTKHLPE
CIB2:	ELDEEEV
CALM1_HUMAN:	QVNYEEF
