In [2]:
import io

from Bio import SeqIO
from pathlib import Path
from io import StringIO

wd = Path('.').resolve().parent
wd

PosixPath('/home/quirin/PYTHON/mapra')

In [3]:
cam_predictions = '''
CaM-variant	Tm	Standard-Deviation-Tm	%Unfold	Standard-Deviation-%Unfold	Stabilizing-vs-Destabilizing	Comments
p.N54I	*	*	*	*	*	*
p.F90L	*	*	*	*	*	*
p.D96H	*	*	*	*	*	*
p.D96V	*	*	*	*	*	*
p.N98I	*	*	*	*	*	*
p.N98S	*	*	*	*	*	*
p.A103V	*	*	*	*	*	*
p.E105A	*	*	*	*	*	*
p.D130G	*	*	*	*	*	*
p.D130V	*	*	*	*	*	*
p.D132E	*	*	*	*	*	*
p.D132H	*	*	*	*	*	*
p.D132V	*	*	*	*	*	*
p.D134H	*	*	*	*	*	*
p.Q136P	*	*	*	*	*	*
p.E141G	*	*	*	*	*	*'''.lstrip()

CaM_seq_training_set = '''
>CaM WT
GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADG
NGTIDFPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDE
EVDEMIREADIDGDGQVNYEEFVQMMTAK

>Rec
GNSKSGALSKEILEELQLNTKFTEEELSSWYQSFLKECPSGRITRQEFQTIYSKFFPEA
DPKAYAQHVFRSFDANSDGTLDFKEYVIALHMTSAGKTNQKLEWAFSLYDVDGNGTISKN
EVLEIVTAIFKMISPEDTKHLPEDENTPEKRAEKIWGFFGKKDDDKLTEKEFIEGTLANK
EILRLIQFEPQKVKEKLKEKKL

>CIB2
GAMGNKQTIFTEEQLDNYQDCTFFNKKDILKLHSRFYELAPNLVPMDYRKSPIVHVPMSLII
QMPELRENPFKERIVAAFSEDGEGNLTFNDFVDMFSVLCESAPRELKANYAFKIYDFNTD
NFICKEDLELTLARLTKSELDEEEVVLVCDKVIEEADLDGDGKLGFADFEDMIAKAPDFL
STFHIRI

>CALM1_HUMAN Calmodulin-1 OS=Homo sapiens OX=9606 GN=CALM1 PE=1 SV=1
MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADG
NGTIDFPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDE
EVDEMIREADIDGDGQVNYEEFVQMMTAK
'''.strip()

In [4]:
with StringIO(CaM_seq_training_set) as fasta_io:
    records = {r.id: r for r in SeqIO.parse(fasta_io, 'fasta')}
records

{'CaM': SeqRecord(seq=Seq('GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDM...TAK'), id='CaM', name='CaM', description='CaM WT', dbxrefs=[]),
 'Rec': SeqRecord(seq=Seq('GNSKSGALSKEILEELQLNTKFTEEELSSWYQSFLKECPSGRITRQEFQTIYSK...KKL'), id='Rec', name='Rec', description='Rec', dbxrefs=[]),
 'CIB2': SeqRecord(seq=Seq('GAMGNKQTIFTEEQLDNYQDCTFFNKKDILKLHSRFYELAPNLVPMDYRKSPIV...IRI'), id='CIB2', name='CIB2', description='CIB2', dbxrefs=[]),
 'CALM1_HUMAN': SeqRecord(seq=Seq('MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMIN...TAK'), id='CALM1_HUMAN', name='CALM1_HUMAN', description='CALM1_HUMAN Calmodulin-1 OS=Homo sapiens OX=9606 GN=CALM1 PE=1 SV=1', dbxrefs=[])}

In [5]:
variants = [c.split('\t')[0][2:] for c in cam_predictions.split('\n')[1:]]
variants

['N54I',
 'F90L',
 'D96H',
 'D96V',
 'N98I',
 'N98S',
 'A103V',
 'E105A',
 'D130G',
 'D130V',
 'D132E',
 'D132H',
 'D132V',
 'D134H',
 'Q136P',
 'E141G']

In [10]:
seq = records['CaM'].seq
seq

Seq('GAMADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDM...TAK')

In [14]:
shift = 2
for v in variants:
    _from, _to, _pos = v[0], v[-1], int(v[1:-1])
    print(_from, _to, _pos)
    print(seq[_pos + shift])


N I 54
E
F L 90
R
D H 96
G
D V 96
G
N I 98
G
N S 98
G
A V 103
A
E A 105
L
D G 130
I
D V 130
I
D E 132
G
D H 132
G
D V 132
G
D H 134
G
Q P 136
V
E G 141
F


In [None]:
records['CALM1_HUMAN'][135:142]  # E141G is the second E

In [87]:
shifts = {'CaM': 2, 'CIB2': 5}

for _id, variant in records.items():
    shift = shifts.get(_id, 0)
    print(f'{_id}:\t{variant[135 + shift:142 + shift].seq}')


CaM:	AEFKEAFSLF
Rec:	EILEELQLNT
CIB2:	DNYQDCTFFN
CALM1_HUMAN:	AEFKEAFSLF
