# Propensity

In [1]:
def propensity(seq, secondary):

    residues = ["G", "A", "V", "L", "I", "P", "F", "Y", "W", "S", "T", "C", "M", "N", "Q", "D", "E", "K", "R", "H"]
    helix = {residue:0 for residue in residues}
    frac_helix = secondary.count("H")/len(seq)

    print(secondary.count("H"))
    print(len(seq))
    print(frac_helix)
    print("")

    for i in range(len(seq)):
        if structure[i] == "H":
            helix[seq[i]] += 1

    print("Number of specific residues in helix")
    print(helix)
    print("\nNumber of specific residues")
    print({residue:seq.count(residue) for residue in residues})
    
    helix = {residue:helix[residue]/seq.count(residue) for residue in residues}
    print("\nFraction of specific residues in helix")
    print(helix)

    print("\nFINAL PROPENSITIES")
    propensities = {residue:round(helix[residue]/frac_helix,3) for residue in residues}

    return propensities


sequence = "LGASGIAAFAFGSTAILIILFNMAAEVHFDPLQFFRQFFWLGLYPPKAQYGMGIPPLHDGGWWLMAGLFMTLSLGSWWIRVYSRARALGLGTHIAWNFAAAIFFVLCIGCIHPTLVGSWSEGVPFGIWPHIDWLTAFSIRYGNFYYCPWHGFSIGFAYGCGLLFAAHGATILAVARFGGDREIEQITDRGTAVERAALFW"
structure = "XHHHHHHHHHHHHHHHHHHHHHHHHHXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHXXHHHHHHHHHHHHHHHHHXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXHHHHHHHHHHHHHHHHHHHHHHHHHHXXXXXXXXXXXXXXXXXXXXXXXXXXX"
for residue, prop in propensity(sequence,structure).items():
    print(f"{residue} - {prop}")

98
200
0.49

Number of specific residues in helix
{'G': 13, 'A': 19, 'V': 2, 'L': 12, 'I': 10, 'P': 1, 'F': 10, 'Y': 2, 'W': 6, 'S': 6, 'T': 3, 'C': 2, 'M': 3, 'N': 2, 'Q': 0, 'D': 0, 'E': 1, 'K': 0, 'R': 3, 'H': 3}

Number of specific residues
{'G': 25, 'A': 25, 'V': 7, 'L': 20, 'I': 17, 'P': 9, 'F': 20, 'Y': 7, 'W': 11, 'S': 9, 'T': 8, 'C': 4, 'M': 4, 'N': 3, 'Q': 4, 'D': 5, 'E': 5, 'K': 1, 'R': 9, 'H': 7}

Fraction of specific residues in helix
{'G': 0.52, 'A': 0.76, 'V': 0.2857142857142857, 'L': 0.6, 'I': 0.5882352941176471, 'P': 0.1111111111111111, 'F': 0.5, 'Y': 0.2857142857142857, 'W': 0.5454545454545454, 'S': 0.6666666666666666, 'T': 0.375, 'C': 0.5, 'M': 0.75, 'N': 0.6666666666666666, 'Q': 0.0, 'D': 0.0, 'E': 0.2, 'K': 0.0, 'R': 0.3333333333333333, 'H': 0.42857142857142855}

FINAL PROPENSITIES
G - 1.061
A - 1.551
V - 0.583
L - 1.224
I - 1.2
P - 0.227
F - 1.02
Y - 0.583
W - 1.113
S - 1.361
T - 0.765
C - 1.02
M - 1.531
N - 1.361
Q - 0.0
D - 0.0
E - 0.408
K - 0.0
R - 0.68
H - 0.8

# Chou-Fasman

In [2]:
import numpy as np

sequence = "KVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQATNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDVQAWIRGCRL"

helix_params = {residue:1 for residue in ["E","A","L","H","M","Q","W","V","F"]}
helix_params.update({residue:0.5 for residue in ["K","I"]})
helix_params.update({residue:0 for residue in ["D","T","S","R","C"]})
helix_params.update({residue:-1 for residue in ["N","Y","P","G"]})

helix_propensities = {
    'E': 1.53,
    'A': 1.45,
    'L': 1.34,
    'H': 1.24,
    'M': 1.20,
    'Q': 1.17,
    'W': 1.14,
    'V': 1.14,
    'F': 1.12,
    'K': 1.07,
    'I': 1.00,
    'D': 0.98,
    'T': 0.82,
    'S': 0.79,
    'R': 0.79,
    'C': 0.77,
    'N': 0.73,
    'Y': 0.61,
    'P': 0.59,
    'G': 0.53}

sheet_params = {residue:1 for residue in ["M","V","I","C","Y","F","Q","L","T","W"]}
sheet_params.update({residue:0.5 for residue in ["A"]})
sheet_params.update({residue:0 for residue in ["R","G","D"]})
sheet_params.update({residue:-1 for residue in ["K","S","H","N","P","E"]})

sheet_propensities = {
    'M': 1.67,
    'V': 1.65,
    'I': 1.60,
    'C': 1.30,
    'Y': 1.29,
    'F': 1.28,
    'Q': 1.23,
    'L': 1.22,
    'T': 1.20,
    'W': 1.19,
    'A': 0.97,
    'R': 0.90,
    'G': 0.81,
    'D': 0.80,
    'K': 0.74,
    'S': 0.72,
    'H': 0.71,
    'N': 0.65,
    'P': 0.62,
    'E': 0.26
}

helices = {}
i = 0
while i < len(sequence)-6+1:
    window = sequence[i:i+6]
    score = np.sum([helix_params[residue] for residue in window])
    if score >= 4:
        print(i,window,score)
        j = i-1
        while np.sum([helix_propensities[residue] for residue in sequence[j]+window[:3]]) >= 4 and j > 0:
            window = sequence[j] + window
            j = j-1
        k = i+6
        while np.sum([helix_propensities[residue] for residue in window[-3:]+sequence[k]]) >= 4 and k < len(sequence):
            window = window + sequence[k]
            k = k+1
        helices[sequence.find(window)+1] = (window, np.sum([helix_propensities[residue] for residue in window]))
        i = i + len(window)
    else:
        i = i + 1

print(helices)

sheets = {}
i = 0
while i < len(sequence)-5+1:
    window = sequence[i:i+5]
    score = np.sum([sheet_params[residue] for residue in window])
    if score >= 3:
        print(i,window,score)
        j = i-1
        while np.sum([sheet_propensities[residue] for residue in sequence[j]+window[:2]]) >= 3:
            window = sequence[j] + window
            j = j-1
            if j < 0:
                break
        k = i+5
        while np.sum([sheet_propensities[residue] for residue in window[-2:]+sequence[k]]) >= 3:
            window = window + sequence[k]
            k = k+1
            if k >= len(sequence):
                break
        sheets[sequence.find(window)+1] = (window, np.sum([sheet_propensities[residue] for residue in window]))
        i = i + len(window)
    else:
        i = i + 1

print(sheets)

for helix_pos, (helix_seq, helix_prop) in list(helices.items()):
    for sheet_pos, (sheet_seq, sheet_prop) in list(sheets.items()):
        common = set(range(helix_pos,helix_pos+len(helix_seq))).intersection(set(range(sheet_pos,sheet_pos+len(sheet_seq))))
        if len(common) != 0:
            matching = sequence[min(common)-1:max(common)]
            if np.sum([helix_propensities[residue] for residue in matching]) < np.sum([sheet_propensities[residue] for residue in matching]):
                new_helix = helix_seq.replace(matching,"")
                new_prop = np.sum([helix_propensities[residue] for residue in new_helix])
                new_pos = sequence.find(new_helix) + 1
                del helices[helix_pos]
                if len(new_helix) > 0:
                    helices[new_pos] = (new_helix,new_prop)
            elif np.sum([helix_propensities[residue] for residue in matching]) > np.sum([sheet_propensities[residue] for residue in matching]):
                new_sheet = sheet_seq.replace(matching,"")
                new_prop = np.sum([sheet_propensities[residue] for residue in new_sheet])
                new_pos = sequence.find(new_sheet) + 1
                del sheets[sheet_pos]
                if len(new_sheet) > 0:
                    sheets[new_pos] = (new_sheet,new_prop)

helices = dict(sorted(helices.items()))
sheets = dict(sorted(sheets.items()))

print("Helices")
for helix_pos, (helix_seq, helix_prop) in list(helices.items()):
    print(f"{helix_seq} at {helix_pos} with propensity {helix_prop:.3f}")
print("\nSheets")
for sheet_pos, (sheet_seq, sheet_prop) in list(sheets.items()):
    print(f"{sheet_seq} at {sheet_pos} with propensity {sheet_prop:.3f}")

4 RCELAA 4
27 WVCAAK 4.5
104 MNAWVA 4
117 TDVQAW 4
{5: ('RCELAAAMKRH', 13.08), 28: ('WVCAAKFESNF', 12.310000000000002), 105: ('MNAWVAWRN', 9.77), 118: ('TDVQAWIR', 8.489999999999998)}
1 VFGRC 3
7 LAAAM 3.5
27 WVCAA 4.0
50 TDYGI 3
106 AWVAW 4.0
116 GTDVQ 3
{1: ('KVFGRC', 6.680000000000001), 8: ('LAAAMKR', 7.44), 27: ('NWVCAA', 6.7299999999999995), 51: ('TDYGILQIN', 10.4), 107: ('AWVAWR', 6.870000000000001), 117: ('GTDVQAWIRGCRL', 14.580000000000002)}
Helices
ELAAAMKRH at 7 with propensity 11.520
KFESNF at 33 with propensity 6.360
MNAWVAWRN at 105 with propensity 9.770

Sheets
KVFGRC at 1 with propensity 6.680
NWVCAA at 27 with propensity 6.730
TDYGILQIN at 51 with propensity 10.400
GTDVQAWIRGCRL at 117 with propensity 14.580


In [13]:
np.sum([sheet_propensities[residue] for residue in "RCE"])

2.46