In [1]:
## Importing Python Modules

from Bio import SeqIO
import os
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio.Seq import Seq
import pandas as pd
import matplotlib.pyplot as plt

## Defining Paths and Variables

km_path = "/Users/kaylahmarcello/Desktop/projects/TCS/FASTAS/" 
aliphatic_index = ["A", "V", "I", "L"]
acidic = ["D", "E"]

## Calculations

import csv

with open(os.path.join(km_path,"TCS.fasta")) as handle: 
        f = open('TCS.csv', 'w')
        writer = csv.writer(f)
        header = ['name', 'proline count', 'arginine count', 'lysine count', 'r/k ratio', 'aliphatic percent sum', 'aromaticity', 'flexibility sum', 'flexibility_avg', 'gravy']
        writer.writerow(header)
        for record in SeqIO.parse(handle, "fasta"):
            print(record.id)                       # printing the gene id
            #print(vars(record))
            #print(record._seq)
            x = ProteinAnalysis(str(record._seq))  # storing the sequence in a variable
            g_count = x.count_amino_acids()["G"]   # calculating number of G residues
            print("proline count", g_count)
            r_count = x.count_amino_acids()["R"]   # calculating the number of R AA residues
            print("arginine count", r_count)   
            k_count = x.count_amino_acids()["K"]   # calculating the number of K AA residues 
            print("lysine count", k_count)
            if k_count >0:
                r_k_ratio = r_count / k_count      # ratio of R to K, avoiding zero divider 
                print("r/k ratio", r_k_ratio)
            else:
                r_k_ratio = "NA"
            aliphatic_percent_sum = 0              # calculating the percentage of total protein that are aliphatic residues
            for aa in aliphatic_index: 
                    aliphatic_count = x.get_amino_acids_percent()[aa]
                    aliphatic_percent_sum = aliphatic_percent_sum + aliphatic_count
                    #print("aliphatic count", aliphatic_count)
                    #print("aliphatic percent sum", aliphatic_percent_sum)
            print("final aliphatic percent sum", aliphatic_percent_sum)
            acidic_percentage_sum = 0             # calculating percentage of toal protein that are acidic residues
            for aa in acidic: 
                    acidic_percentage = x.get_amino_acids_percent()[aa]
                    acidic_percentage_sum = acidic_percentage_sum + acidic_percentage
                    #print("acidic percentage", acidic_percentage)
                    #print("acidic percentage sum", acidic_percentage_sum)
            print("final acidic percentage sum", acidic_percentage_sum)
            aromaticity = x.aromaticity()         # calculating aromaticity
            print("aromaticity", aromaticity)
            
            flexibility = x.flexibility()         # calculating flexibility 
            #print("flexibility", flexibility)
            flexibility_sum = sum(flexibility)
            print("flexibility sum", flexibility_sum)
            flexibility_avg = (sum(flexibility)/len(flexibility))
            print("flexibility avg", flexibility_avg)
            gravy = x.gravy()                     # calculating gravy
            print("gravy", gravy)
            data = [record.id, g_count, r_count, k_count, r_k_ratio, aliphatic_percent_sum, aromaticity, flexibility_sum, flexibility_avg, gravy]
            writer.writerow(data)
        f.close() 
            

## Visualizing df

df = pd.read_csv('TCS.csv')

print(df.head(10))

print(df.info())

df.corr()  # This was for fun bc I'm learning Pandas




GCF_000007925#WP_225866420.1|RR|OmpR
proline count 20
arginine count 22
lysine count 4
r/k ratio 5.5
final aliphatic percent sum 0.36651583710407243
final acidic percentage sum 0.11312217194570134
aromaticity 0.027149321266968326
flexibility sum 211.0583928571429
flexibility avg 0.9955584568733156
gravy 0.04117647058823531
GCF_000007925#WP_164923246.1|RR|NarL
proline count 8
arginine count 7
lysine count 16
r/k ratio 0.4375
final aliphatic percent sum 0.38341968911917096
final acidic percentage sum 0.12435233160621761
aromaticity 0.056994818652849735
flexibility sum 184.11679761904762
flexibility avg 1.000634769668737
gravy 0.0367875647668395
GCF_000007925#WP_052039745.1|RR|OmpR
proline count 20
arginine count 24
lysine count 10
r/k ratio 2.4
final aliphatic percent sum 0.366412213740458
final acidic percentage sum 0.1450381679389313
aromaticity 0.05343511450381679
flexibility sum 253.1305119047619
flexibility avg 1.0005158573310746
gravy -0.03206106870229013
GCF_000007925#WP_011125693

flexibility sum 547.735761904762
flexibility avg 0.9958832034632036
gravy -0.10787119856887309
GCF_000009705#WP_010999452.1|RR|CheY
proline count 8
arginine count 5
lysine count 6
r/k ratio 0.8333333333333334
final aliphatic percent sum 0.36585365853658536
final acidic percentage sum 0.0975609756097561
aromaticity 0.024390243902439025
flexibility sum 113.656630952381
flexibility avg 0.9969879908103596
gravy 0.025203252032520333
GCF_000009705#WP_010999512.1|RR|CheY
proline count 3
arginine count 6
lysine count 6
r/k ratio 1.0
final aliphatic percent sum 0.3680555555555555
final acidic percentage sum 0.1388888888888889
aromaticity 0.04861111111111111
flexibility sum 134.1337976190476
flexibility avg 0.9935836860670193
gravy 0.041666666666666616
GCF_000009705#WP_010999525.1|RR|NarL
proline count 9
arginine count 10
lysine count 16
r/k ratio 0.625
final aliphatic percent sum 0.3452914798206278
final acidic percentage sum 0.13004484304932734
aromaticity 0.09417040358744394
flexibility sum 2

flexibility sum 685.7846071428565
flexibility avg 0.9982308692035757
gravy -0.07514367816091956
GCF_000012505#WP_011359144.1|RR|OmpR
proline count 19
arginine count 21
lysine count 12
r/k ratio 1.75
final aliphatic percent sum 0.31660231660231664
final acidic percentage sum 0.1583011583011583
aromaticity 0.05791505791505792
flexibility sum 250.0817023809525
flexibility avg 1.00032680952381
gravy -0.328957528957529
GCF_000012505#WP_011359102.1|HK|Classic
proline count 29
arginine count 29
lysine count 3
r/k ratio 9.666666666666666
final aliphatic percent sum 0.32887700534759357
final acidic percentage sum 0.11497326203208556
aromaticity 0.05614973262032086
flexibility sum 365.15790476190466
flexibility avg 1.0004326157860401
gravy -0.3219251336898402
GCF_000012505#WP_011358994.1|RR|OmpR
proline count 18
arginine count 28
lysine count 5
r/k ratio 5.6
final aliphatic percent sum 0.37154150197628455
final acidic percentage sum 0.1422924901185771
aromaticity 0.05138339920948616
flexibility 

flexibility sum 1295.5718095238094
flexibility avg 1.0012146905129902
gravy -0.36830391404451124
GCF_000017845#WP_009544325.1|RR|CheY
proline count 7
arginine count 9
lysine count 4
r/k ratio 2.25
final aliphatic percent sum 0.32191780821917804
final acidic percentage sum 0.1506849315068493
aromaticity 0.0821917808219178
flexibility sum 136.60188095238092
flexibility avg 0.9970940215502258
gravy -0.21232876712328774
GCF_000017845#WP_009544326.1|RR|PleD_VieA
proline count 32
arginine count 40
lysine count 34
r/k ratio 1.1764705882352942
final aliphatic percent sum 0.3356353591160221
final acidic percentage sum 0.1408839779005525
aromaticity 0.08287292817679559
flexibility sum 713.75325
flexibility avg 0.9982562937062937
gravy -0.23411602209944762
GCF_000017845#WP_009544340.1|HK|Classic
proline count 13
arginine count 12
lysine count 40
r/k ratio 0.3
final aliphatic percent sum 0.2903930131004367
final acidic percentage sum 0.11135371179039302
aromaticity 0.07641921397379912
flexibility 

GCF_000018105#WP_012165434.1|RR|PleD_VieA
proline count 31
arginine count 34
lysine count 24
r/k ratio 1.4166666666666667
final aliphatic percent sum 0.33176100628930816
final acidic percentage sum 0.14150943396226415
aromaticity 0.05817610062893082
flexibility sum 626.7072261904759
flexibility avg 0.999533056125161
gravy -0.18333333333333382
GCF_000018105#WP_012165480.1|HK|Classic
proline count 27
arginine count 36
lysine count 9
r/k ratio 4.0
final aliphatic percent sum 0.33261339092872566
final acidic percentage sum 0.1123110151187905
aromaticity 0.06263498920086394
flexibility sum 452.62265476190504
flexibility avg 0.9969661999160904
gravy -0.23347732181425512
GCF_000018105#WP_012165516.1|HK|Hybrid
proline count 59
arginine count 48
lysine count 53
r/k ratio 0.9056603773584906
final aliphatic percent sum 0.3355018587360595
final acidic percentage sum 0.11710037174721188
aromaticity 0.06784386617100371
flexibility sum 1066.998999999999
flexibility avg 0.9999990627928762
gravy -0.187

flexibility sum 214.6976190476192
flexibility avg 0.9985935769656707
gravy -0.16160714285714284
GCF_000020025#WP_012412429.1|HK|Classic
proline count 30
arginine count 20
lysine count 18
r/k ratio 1.1111111111111112
final aliphatic percent sum 0.3536585365853659
final acidic percentage sum 0.09024390243902439
aromaticity 0.08292682926829269
flexibility sum 399.30598809523786
flexibility avg 0.9957755314095708
gravy 0.04780487804878041
GCF_000020025#WP_012412354.1|HK|Classic
proline count 30
arginine count 36
lysine count 27
r/k ratio 1.3333333333333333
final aliphatic percent sum 0.32399999999999995
final acidic percentage sum 0.128
aromaticity 0.06666666666666667
flexibility sum 739.5578452380943
flexibility avg 0.9980537722511393
gravy -0.215733333333333
GCF_000020025#WP_012412271.1|HK|Classic
proline count 46
arginine count 47
lysine count 49
r/k ratio 0.9591836734693877
final aliphatic percent sum 0.33069977426636565
final acidic percentage sum 0.11963882618510158
aromaticity 0.074

flexibility sum 435.27476190476193
flexibility avg 1.000631636562671
gravy -0.14549549549549573
GCF_000020025#WP_012408167.1|HK|Classic
proline count 21
arginine count 24
lysine count 28
r/k ratio 0.8571428571428571
final aliphatic percent sum 0.34800838574423476
final acidic percentage sum 0.10272536687631027
aromaticity 0.06708595387840671
flexibility sum 466.99164285714295
flexibility avg 0.9978453907203909
gravy -0.021802935010482246
GCF_000020025#WP_012408171.1|RR|CheY
proline count 6
arginine count 6
lysine count 7
r/k ratio 0.8571428571428571
final aliphatic percent sum 0.3576158940397351
final acidic percentage sum 0.152317880794702
aromaticity 0.07284768211920531
flexibility sum 141.4592857142857
flexibility avg 0.996192152917505
gravy 0.09735099337748349
GCF_000020025#WP_012408160.1|HK|Classic
proline count 25
arginine count 16
lysine count 22
r/k ratio 0.7272727272727273
final aliphatic percent sum 0.3364928909952607
final acidic percentage sum 0.12085308056872038
aromaticit

flexibility sum 553.6574523809523
flexibility avg 0.9975809952809952
gravy -0.08191489361702146
GCF_000021825#WP_012597848.1|HK|Hybrid
proline count 35
arginine count 22
lysine count 46
r/k ratio 0.4782608695652174
final aliphatic percent sum 0.3229018492176387
final acidic percentage sum 0.11948790896159317
aromaticity 0.06970128022759602
flexibility sum 695.2079285714284
flexibility avg 1.0017405310827499
gravy -0.24509246088193382
GCF_000021825#WP_012597621.1|RR|CheY
proline count 3
arginine count 4
lysine count 12
r/k ratio 0.3333333333333333
final aliphatic percent sum 0.35766423357664234
final acidic percentage sum 0.13138686131386862
aromaticity 0.043795620437956206
flexibility sum 128.33409523809524
flexibility avg 1.002610119047619
gravy -0.06496350364963505
GCF_000021825#WP_012597598.1|RR|PleD
proline count 18
arginine count 13
lysine count 26
r/k ratio 0.5
final aliphatic percent sum 0.35714285714285715
final acidic percentage sum 0.14285714285714285
aromaticity 0.0714285714

proline count 8
arginine count 8
lysine count 7
r/k ratio 1.1428571428571428
final aliphatic percent sum 0.3089430894308943
final acidic percentage sum 0.15447154471544716
aromaticity 0.07317073170731708
flexibility sum 113.90885714285716
flexibility avg 0.999200501253133
gravy -0.1804878048780488
GCF_000147335#WP_013323444.1|RR|unclassified
proline count 19
arginine count 21
lysine count 17
r/k ratio 1.2352941176470589
final aliphatic percent sum 0.31343283582089554
final acidic percentage sum 0.11691542288557213
aromaticity 0.1044776119402985
flexibility sum 390.9340119047618
flexibility avg 0.9947430328365441
gravy -0.11119402985074638
GCF_000147335#WP_013323504.1|RR|OmpR
proline count 51
arginine count 46
lysine count 35
r/k ratio 1.3142857142857143
final aliphatic percent sum 0.3395522388059702
final acidic percentage sum 0.12437810945273632
aromaticity 0.07462686567164178
flexibility sum 793.6401904761898
flexibility avg 0.9982895477687922
gravy -0.18109452736318393
GCF_000147335

flexibility sum 1350.7269404761919
flexibility avg 0.9990583879261774
gravy -0.16752387950036704
GCF_000169095#WP_009782314.1|HK|Classic
proline count 39
arginine count 62
lysine count 53
r/k ratio 1.169811320754717
final aliphatic percent sum 0.31535648994515536
final acidic percentage sum 0.12522851919561243
aromaticity 0.09597806215722121
flexibility sum 1081.744
flexibility avg 0.9969990783410138
gravy -0.24058500914076725
GCF_000169095#WP_009782334.1|RR|CheY
proline count 10
arginine count 9
lysine count 19
r/k ratio 0.47368421052631576
final aliphatic percent sum 0.3181818181818182
final acidic percentage sum 0.09917355371900827
aromaticity 0.0743801652892562
flexibility sum 232.12596428571436
flexibility avg 0.9962487737584307
gravy -0.2888429752066117
GCF_000169095#WP_009782341.1|PP|HisKa
proline count 0
arginine count 3
lysine count 5
r/k ratio 0.6
final aliphatic percent sum 0.3023255813953488
final acidic percentage sum 0.11627906976744187
aromaticity 0.06976744186046512
fle

flexibility sum 1212.083392857144
flexibility avg 1.0000688059877425
gravy -0.2647010647010638
GCF_000169095#WP_039895636.1|HK|Hybrid
proline count 19
arginine count 8
lysine count 33
r/k ratio 0.24242424242424243
final aliphatic percent sum 0.3325740318906606
final acidic percentage sum 0.10933940774487472
aromaticity 0.06150341685649203
flexibility sum 430.04489285714254
flexibility avg 1.0001044019933547
gravy -0.13644646924829154
GCF_000169095#WP_039895892.1|HK|Classic
proline count 37
arginine count 22
lysine count 37
r/k ratio 0.5945945945945946
final aliphatic percent sum 0.3257443082311734
final acidic percentage sum 0.11208406304728546
aromaticity 0.06654991243432574
flexibility sum 563.102773809524
flexibility avg 1.0019622309778007
gravy -0.14500875656742598
GCF_000169095#WP_039896932.1|RR|CheY
proline count 7
arginine count 4
lysine count 7
r/k ratio 0.5714285714285714
final aliphatic percent sum 0.3492063492063492
final acidic percentage sum 0.14285714285714285
aromaticity

gravy -0.1941411451398139
GCF_000204075#WP_011319330.1|HK|Hybrid
proline count 25
arginine count 12
lysine count 28
r/k ratio 0.42857142857142855
final aliphatic percent sum 0.3180778032036613
final acidic percentage sum 0.13501144164759726
aromaticity 0.04576659038901602
flexibility sum 428.93778571428584
flexibility avg 1.0021910881174902
gravy -0.19176201372997745
GCF_000204075#WP_011319331.1|HK|Hybrid
proline count 63
arginine count 43
lysine count 42
r/k ratio 1.0238095238095237
final aliphatic percent sum 0.346524064171123
final acidic percentage sum 0.11229946524064172
aromaticity 0.058823529411764705
flexibility sum 924.6370119047605
flexibility avg 0.9985280906098926
gravy -0.05871657754010703
GCF_000204075#WP_011319431.1|HK|Hybrid
proline count 41
arginine count 39
lysine count 29
r/k ratio 1.3448275862068966
final aliphatic percent sum 0.346045197740113
final acidic percentage sum 0.1285310734463277
aromaticity 0.09745762711864406
flexibility sum 696.0414880952386
flexibilit

flexibility sum 795.2006309523813
flexibility avg 0.9977423223994747
gravy -0.07456575682382148
GCF_000270265#WP_010873875.1|HK|Classic
proline count 21
arginine count 13
lysine count 24
r/k ratio 0.5416666666666666
final aliphatic percent sum 0.32166301969365424
final acidic percentage sum 0.10940919037199125
aromaticity 0.07658643326039388
flexibility sum 446.95390476190454
flexibility avg 0.9976649659863941
gravy -0.14617067833698058
GCF_000270265#WP_010873880.1|RR|OmpR
proline count 16
arginine count 19
lysine count 9
r/k ratio 2.111111111111111
final aliphatic percent sum 0.32365145228215764
final acidic percentage sum 0.15352697095435686
aromaticity 0.058091286307053944
flexibility sum 231.56519047619042
flexibility avg 0.9981258210180621
gravy -0.22614107883817425
GCF_000270265#WP_010873908.1|HK|Classic
proline count 48
arginine count 61
lysine count 28
r/k ratio 2.1785714285714284
final aliphatic percent sum 0.33596837944664026
final acidic percentage sum 0.13965744400527008
ar

flexibility sum 1004.4647619047605
flexibility avg 0.9994674247808563
gravy -0.13925049309664714
GCF_000284455#WP_010872553.1|RR|NarL
proline count 15
arginine count 12
lysine count 7
r/k ratio 1.7142857142857142
final aliphatic percent sum 0.4247787610619469
final acidic percentage sum 0.12389380530973451
aromaticity 0.03982300884955752
flexibility sum 216.03227380952384
flexibility avg 0.9955404323019532
gravy 0.21371681415929178
GCF_000284455#WP_010872617.1|HK|Unorthodox
proline count 88
arginine count 55
lysine count 46
r/k ratio 1.1956521739130435
final aliphatic percent sum 0.3568596352101506
final acidic percentage sum 0.12450436161776368
aromaticity 0.07137192704203013
flexibility sum 1247.8631428571439
flexibility avg 0.9966958010041085
gravy 0.03576526566217342
GCF_000284455#WP_010872618.1|HK|Hybrid
proline count 15
arginine count 16
lysine count 23
r/k ratio 0.6956521739130435
final aliphatic percent sum 0.37431693989071035
final acidic percentage sum 0.11475409836065574
aro

flexibility sum 231.44926190476193
flexibility avg 0.9976261288998359
gravy 0.04439834024896274
GCF_000300115#WP_045868917.1|HK|Classic
proline count 90
arginine count 52
lysine count 93
r/k ratio 0.5591397849462365
final aliphatic percent sum 0.3496932515337423
final acidic percentage sum 0.10429447852760737
aromaticity 0.07975460122699386
flexibility sum 1780.1513095238104
flexibility avg 0.9978426622891313
gravy -0.11935303959843878
GCF_000300115#WP_045868826.1|HK|Classic
proline count 20
arginine count 20
lysine count 41
r/k ratio 0.4878048780487805
final aliphatic percent sum 0.32368896925858953
final acidic percentage sum 0.108499095840868
aromaticity 0.08499095840867993
flexibility sum 544.1059761904758
flexibility avg 1.0001948091736688
gravy -0.2555153707052444
GCF_000300115#WP_045868733.1|HK|Hybrid
proline count 94
arginine count 71
lysine count 135
r/k ratio 0.5259259259259259
final aliphatic percent sum 0.34133709981167604
final acidic percentage sum 0.11958568738229755
aro

flexibility sum 1636.2246071428538
flexibility avg 1.0001372904296173
gravy -0.2643768996960471
GCF_000300115#WP_052335030.1|HK|Classic
proline count 25
arginine count 15
lysine count 20
r/k ratio 0.75
final aliphatic percent sum 0.3638211382113822
final acidic percentage sum 0.10365853658536586
aromaticity 0.06097560975609756
flexibility sum 481.13699999999994
flexibility avg 0.996142857142857
gravy 0.05813008130081298
GCF_000300115#WP_052335055.1|HK|Hybrid
proline count 62
arginine count 60
lysine count 62
r/k ratio 0.967741935483871
final aliphatic percent sum 0.350207468879668
final acidic percentage sum 0.12282157676348548
aromaticity 0.051452282157676346
flexibility sum 1198.127988095237
flexibility avg 1.0017792542602315
gravy -0.20946058091286213
GCF_000300115#WP_052335067.1|HK|Hybrid
proline count 64
arginine count 67
lysine count 45
r/k ratio 1.488888888888889
final aliphatic percent sum 0.34432823813354785
final acidic percentage sum 0.1263073209975865
aromaticity 0.07481898

flexibility sum 223.39346428571432
flexibility avg 1.0017644138372839
gravy -0.17068965517241377
GCF_000309385#WP_017297505.1|RR|CheB
proline count 26
arginine count 17
lysine count 10
r/k ratio 1.7
final aliphatic percent sum 0.4017094017094017
final acidic percentage sum 0.08547008547008547
aromaticity 0.045584045584045586
flexibility sum 339.4644999999999
flexibility avg 0.9925862573099411
gravy 0.3809116809116805
GCF_000309385#WP_017297506.1|RR|CheY
proline count 7
arginine count 9
lysine count 7
r/k ratio 1.2857142857142858
final aliphatic percent sum 0.3484848484848485
final acidic percentage sum 0.10606060606060605
aromaticity 0.030303030303030304
flexibility sum 122.82716666666664
flexibility avg 0.9985948509485093
gravy 0.06212121212121206
GCF_000309385#WP_017297477.1|RR|CheY
proline count 5
arginine count 11
lysine count 5
r/k ratio 2.2
final aliphatic percent sum 0.3426573426573427
final acidic percentage sum 0.14685314685314685
aromaticity 0.06293706293706294
flexibility su

flexibility sum 409.548892857143
flexibility avg 1.001342036325533
gravy -0.2303827751196177
GCF_000312225#WP_002789923.1|RR|CheY
proline count 25
arginine count 40
lysine count 21
r/k ratio 1.9047619047619047
final aliphatic percent sum 0.3613138686131387
final acidic percentage sum 0.12226277372262773
aromaticity 0.08576642335766424
flexibility sum 536.3506428571425
flexibility avg 0.9950846806254963
gravy -0.0308394160583943
GCF_000312225#WP_002789931.1|RR|CheY
proline count 3
arginine count 3
lysine count 6
r/k ratio 0.5
final aliphatic percent sum 0.3116883116883117
final acidic percentage sum 0.1168831168831169
aromaticity 0.09090909090909091
flexibility sum 68.18329761904764
flexibility avg 1.0026955532212887
gravy -0.26363636363636367
GCF_000312225#WP_002790089.1|HK|Classic
proline count 37
arginine count 34
lysine count 27
r/k ratio 1.2592592592592593
final aliphatic percent sum 0.3438914027149321
final acidic percentage sum 0.13574660633484165
aromaticity 0.0784313725490196
f

flexibility sum 657.5854761904754
flexibility avg 0.9978535298793254
gravy 0.0034431137724548954
GCF_000315565#WP_017316186.1|HK|Hybrid
proline count 51
arginine count 35
lysine count 47
r/k ratio 0.7446808510638298
final aliphatic percent sum 0.34822601839684625
final acidic percentage sum 0.1314060446780552
aromaticity 0.06438896189224705
flexibility sum 751.9715000000008
flexibility avg 0.9999621010638309
gravy -0.08107752956636076
GCF_000315565#WP_017316188.1|HK|Classic
proline count 44
arginine count 64
lysine count 33
r/k ratio 1.9393939393939394
final aliphatic percent sum 0.32667450058754405
final acidic percentage sum 0.1398354876615746
aromaticity 0.07168037602820213
flexibility sum 841.6747142857142
flexibility avg 0.999613674923651
gravy -0.30564042303172656
GCF_000315565#WP_017316192.1|RR|OmpR
proline count 16
arginine count 19
lysine count 8
r/k ratio 2.375
final aliphatic percent sum 0.32456140350877194
final acidic percentage sum 0.14473684210526316
aromaticity 0.070175

gravy 0.0815068493150685
GCF_000315585#WP_017312301.1|HK|Classic
proline count 33
arginine count 32
lysine count 46
r/k ratio 0.6956521739130435
final aliphatic percent sum 0.3190348525469169
final acidic percentage sum 0.13002680965147453
aromaticity 0.09115281501340483
flexibility sum 735.8836666666667
flexibility avg 0.998485300768883
gravy -0.2258713136729225
GCF_000315585#WP_017312302.1|RR|CheY
proline count 6
arginine count 9
lysine count 7
r/k ratio 1.2857142857142858
final aliphatic percent sum 0.3112582781456954
final acidic percentage sum 0.152317880794702
aromaticity 0.0728476821192053
flexibility sum 142.08933333333334
flexibility avg 1.0006291079812206
gravy -0.30198675496688754
GCF_000315585#WP_017312303.1|HK|Hybrid
proline count 48
arginine count 44
lysine count 32
r/k ratio 1.375
final aliphatic percent sum 0.31341107871720114
final acidic percentage sum 0.13702623906705538
aromaticity 0.07580174927113703
flexibility sum 677.1429761904753
flexibility avg 1.0002111908278

GCF_000316115#WP_006515484.1|HK|Classic
proline count 32
arginine count 35
lysine count 13
r/k ratio 2.6923076923076925
final aliphatic percent sum 0.3592233009708738
final acidic percentage sum 0.06310679611650485
aromaticity 0.06796116504854369
flexibility sum 399.11510714285714
flexibility avg 0.9903600673520029
gravy 0.18640776699029113
GCF_000316115#WP_006515483.1|RR|NarL
proline count 17
arginine count 12
lysine count 8
r/k ratio 1.5
final aliphatic percent sum 0.3700440528634361
final acidic percentage sum 0.14977973568281938
aromaticity 0.03524229074889868
flexibility sum 218.04145238095228
flexibility avg 1.0001901485364784
gravy -0.033480176211453674
GCF_000316115#WP_006515427.1|RR|unclassified
proline count 16
arginine count 21
lysine count 13
r/k ratio 1.6153846153846154
final aliphatic percent sum 0.3305785123966942
final acidic percentage sum 0.10743801652892562
aromaticity 0.0743801652892562
flexibility sum 352.7361309523809
flexibility avg 0.9964297484530534
gravy -0.04

flexibility sum 1118.5090833333325
flexibility avg 1.0013510146225
gravy -0.18374777975133208
GCF_000316605#WP_015135423.1|RR|YesN
proline count 14
arginine count 13
lysine count 25
r/k ratio 0.52
final aliphatic percent sum 0.2900763358778626
final acidic percentage sum 0.1297709923664122
aromaticity 0.09923664122137404
flexibility sum 253.78990476190478
flexibility avg 1.0031221532091097
gravy -0.26030534351145046
GCF_000316605#WP_015135424.1|HK|Classic
proline count 22
arginine count 31
lysine count 19
r/k ratio 1.631578947368421
final aliphatic percent sum 0.29850746268656714
final acidic percentage sum 0.1318407960199005
aromaticity 0.06467661691542288
flexibility sum 392.9952976190474
flexibility avg 0.9999880346540646
gravy -0.3930348258706464
GCF_000316605#WP_015135547.1|HK|Classic
proline count 37
arginine count 42
lysine count 39
r/k ratio 1.0769230769230769
final aliphatic percent sum 0.30730478589420657
final acidic percentage sum 0.09949622166246852
aromaticity 0.119647355

flexibility sum 427.34963095238123
flexibility avg 0.9984804461504234
gravy -0.30846681922196806
GCF_000316625#WP_015114471.1|HK|Classic
proline count 42
arginine count 20
lysine count 30
r/k ratio 0.6666666666666666
final aliphatic percent sum 0.3417085427135678
final acidic percentage sum 0.11055276381909547
aromaticity 0.06030150753768845
flexibility sum 587.1273333333337
flexibility avg 0.9985158730158736
gravy 0.02294807370184259
GCF_000316625#WP_015114526.1|RR|OmpR
proline count 19
arginine count 19
lysine count 11
r/k ratio 1.7272727272727273
final aliphatic percent sum 0.3333333333333333
final acidic percentage sum 0.14864864864864863
aromaticity 0.07657657657657657
flexibility sum 213.1086309523809
flexibility avg 1.0005100044712718
gravy -0.17567567567567563
GCF_000316625#WP_015114527.1|RR|CheY
proline count 6
arginine count 5
lysine count 9
r/k ratio 0.5555555555555556
final aliphatic percent sum 0.3666666666666667
final acidic percentage sum 0.125
aromaticity 0.075000000000

flexibility sum 744.7728809523811
flexibility avg 0.9970185822655704
gravy 0.022089947089947017
GCF_000316645#WP_015139192.1|RR|CheB
proline count 22
arginine count 10
lysine count 16
r/k ratio 0.625
final aliphatic percent sum 0.36085626911314983
final acidic percentage sum 0.09174311926605505
aromaticity 0.055045871559633024
flexibility sum 316.41232142857143
flexibility avg 0.9950073000898473
gravy 0.1764525993883789
GCF_000316645#WP_015139193.1|HK|Hybrid
proline count 21
arginine count 14
lysine count 27
r/k ratio 0.5185185185185185
final aliphatic percent sum 0.3347921225382932
final acidic percentage sum 0.12253829321663019
aromaticity 0.05908096280087527
flexibility sum 448.4614166666665
flexibility avg 1.0010299479166662
gravy -0.1422319474835887
GCF_000316645#WP_015139250.1|HK|Unorthodox
proline count 83
arginine count 91
lysine count 75
r/k ratio 1.2133333333333334
final aliphatic percent sum 0.3294392523364486
final acidic percentage sum 0.11507009345794392
aromaticity 0.075

flexibility sum 1784.4111190476187
flexibility avg 0.9957651333971087
gravy -0.09616879511382577
GCF_000317045#WP_015171831.1|HK|Unorthodox
proline count 123
arginine count 161
lysine count 43
r/k ratio 3.744186046511628
final aliphatic percent sum 0.3390858208955224
final acidic percentage sum 0.13712686567164178
aromaticity 0.06436567164179105
flexibility sum 2129.30551190476
flexibility avg 0.9973327924612458
gravy -0.19141791044775974
GCF_000317045#WP_015171832.1|RR|PleD
proline count 16
arginine count 28
lysine count 6
r/k ratio 4.666666666666667
final aliphatic percent sum 0.3497109826589595
final acidic percentage sum 0.1416184971098266
aromaticity 0.05491329479768786
flexibility sum 335.95008333333374
flexibility avg 0.9968845202769547
gravy -0.24248554913294837
GCF_000317045#WP_015171898.1|RR|PleD_VieA
proline count 36
arginine count 49
lysine count 18
r/k ratio 2.7222222222222223
final aliphatic percent sum 0.36
final acidic percentage sum 0.13241379310344828
aromaticity 0.07

flexibility sum 559.1800119047615
flexibility avg 0.9985357355442169
gravy -0.14885764499121298
GCF_000317085#WP_015169850.1|HK|Hybrid
proline count 39
arginine count 26
lysine count 31
r/k ratio 0.8387096774193549
final aliphatic percent sum 0.3309037900874635
final acidic percentage sum 0.12099125364431487
aromaticity 0.06705539358600583
flexibility sum 675.9590238095246
flexibility avg 0.9984623689948665
gravy -0.1266763848396504
GCF_000317085#WP_015169958.1|HK|Classic
proline count 30
arginine count 37
lysine count 40
r/k ratio 0.925
final aliphatic percent sum 0.34338138925294887
final acidic percentage sum 0.11140235910878113
aromaticity 0.06290956749672345
flexibility sum 752.0158214285716
flexibility avg 0.9973684634331188
gravy -0.15779816513761502
GCF_000317085#WP_015169959.1|HK|Hybrid
proline count 20
arginine count 10
lysine count 23
r/k ratio 0.43478260869565216
final aliphatic percent sum 0.3410852713178294
final acidic percentage sum 0.11886304909560723
aromaticity 0.064

gravy -0.1735337243401751
GCF_000317145#WP_015158539.1|HK|Hybrid
proline count 63
arginine count 82
lysine count 31
r/k ratio 2.6451612903225805
final aliphatic percent sum 0.32744924977934686
final acidic percentage sum 0.15092674315975285
aromaticity 0.08737864077669903
flexibility sum 1121.811261904762
flexibility avg 0.9980527241145569
gravy -0.24068843777581542
GCF_000317145#WP_015158393.1|HK|Classic
proline count 29
arginine count 36
lysine count 18
r/k ratio 2.0
final aliphatic percent sum 0.3333333333333333
final acidic percentage sum 0.14074074074074075
aromaticity 0.07222222222222222
flexibility sum 530.9807857142857
flexibility avg 0.9999638149044929
gravy -0.13462962962963027
GCF_000317145#WP_015158375.1|RR|OmpR
proline count 15
arginine count 23
lysine count 6
r/k ratio 3.8333333333333335
final aliphatic percent sum 0.3511111111111111
final acidic percentage sum 0.16444444444444445
aromaticity 0.06666666666666667
flexibility sum 215.41117857142862
flexibility avg 0.9972739

flexibility sum 584.1852857142858
flexibility avg 0.9969032179424672
gravy 0.01680672268907562
GCF_000317265#WP_016872648.1|RR|CheY
proline count 8
arginine count 5
lysine count 10
r/k ratio 0.5
final aliphatic percent sum 0.3560606060606061
final acidic percentage sum 0.14393939393939392
aromaticity 0.03787878787878788
flexibility sum 123.22075000000001
flexibility avg 1.0017947154471545
gravy -0.05151515151515159
GCF_000317265#WP_016872627.1|HK|Classic
proline count 30
arginine count 43
lysine count 22
r/k ratio 1.9545454545454546
final aliphatic percent sum 0.338255033557047
final acidic percentage sum 0.11812080536912753
aromaticity 0.06845637583892616
flexibility sum 732.1447142857143
flexibility avg 0.9947618400621118
gravy -0.10000000000000062
GCF_000317265#WP_016872612.1|HK|Classic
proline count 39
arginine count 35
lysine count 28
r/k ratio 1.25
final aliphatic percent sum 0.32913165266106437
final acidic percentage sum 0.12745098039215685
aromaticity 0.08683473389355742
flexi

flexibility sum 459.3577857142858
flexibility avg 0.9986038819875778
gravy -0.3083155650319833
GCF_000317265#WP_235082945.1|HK|Hybrid
proline count 49
arginine count 39
lysine count 47
r/k ratio 0.8297872340425532
final aliphatic percent sum 0.3595092024539877
final acidic percentage sum 0.10920245398773007
aromaticity 0.06625766871165645
flexibility sum 805.7041071428581
flexibility avg 0.9996328872740176
gravy -0.07042944785276073
GCF_000317265#WP_235082951.1|HK|Hybrid
proline count 39
arginine count 23
lysine count 37
r/k ratio 0.6216216216216216
final aliphatic percent sum 0.34087481146304677
final acidic percentage sum 0.11010558069381599
aromaticity 0.06636500754147813
flexibility sum 652.5522261904765
flexibility avg 0.9977862785787103
gravy -0.10648567119155411
GCF_000317265#WP_235083149.1|HK|Hybrid
proline count 39
arginine count 27
lysine count 33
r/k ratio 0.8181818181818182
final aliphatic percent sum 0.31900826446280994
final acidic percentage sum 0.12231404958677686
aroma

flexibility sum 2005.7312380952392
flexibility avg 0.998870138493645
gravy -0.09667823500247878
GCF_000317285#WP_016878254.1|HK|Hybrid
proline count 43
arginine count 25
lysine count 44
r/k ratio 0.5681818181818182
final aliphatic percent sum 0.3546423135464231
final acidic percentage sum 0.1293759512937595
aromaticity 0.0624048706240487
flexibility sum 647.9666904761905
flexibility avg 0.9999485964138742
gravy -0.06194824961948272
GCF_000317285#WP_016878350.1|HK|Classic
proline count 21
arginine count 22
lysine count 32
r/k ratio 0.6875
final aliphatic percent sum 0.3253275109170306
final acidic percentage sum 0.11353711790393013
aromaticity 0.08733624454148473
flexibility sum 448.4078214285718
flexibility avg 0.9986811167674206
gravy -0.11441048034934519
GCF_000317285#WP_016878352.1|RR|OmpR
proline count 11
arginine count 16
lysine count 11
r/k ratio 1.4545454545454546
final aliphatic percent sum 0.3652173913043478
final acidic percentage sum 0.1391304347826087
aromaticity 0.07826086

flexibility sum 604.7976428571428
flexibility avg 1.0063188733063941
gravy -0.3132786885245902
GCF_000317435#WP_015200008.1|HK|Hybrid
proline count 40
arginine count 27
lysine count 37
r/k ratio 0.7297297297297297
final aliphatic percent sum 0.34285714285714286
final acidic percentage sum 0.13174603174603175
aromaticity 0.05873015873015873
flexibility sum 621.5741309523813
flexibility avg 1.0009245264933677
gravy -0.15428571428571486
GCF_000317435#WP_015200009.1|RR|CheY
proline count 9
arginine count 7
lysine count 12
r/k ratio 0.5833333333333334
final aliphatic percent sum 0.33783783783783783
final acidic percentage sum 0.14864864864864866
aromaticity 0.07432432432432433
flexibility sum 139.03176190476182
flexibility avg 1.0002285029119555
gravy -0.15337837837837826
GCF_000317435#WP_015200086.1|HK|Classic
proline count 36
arginine count 29
lysine count 25
r/k ratio 1.16
final aliphatic percent sum 0.3182579564489112
final acidic percentage sum 0.1306532663316583
aromaticity 0.06197654

proline count 19
arginine count 24
lysine count 21
r/k ratio 1.1428571428571428
final aliphatic percent sum 0.32790697674418606
final acidic percentage sum 0.11627906976744187
aromaticity 0.06046511627906977
flexibility sum 421.12890476190495
flexibility avg 1.0003061870829095
gravy -0.28511627906976794
GCF_000317475#WP_015176631.1|HK|Classic
proline count 67
arginine count 64
lysine count 50
r/k ratio 1.28
final aliphatic percent sum 0.28313253012048195
final acidic percentage sum 0.14859437751004018
aromaticity 0.07630522088353414
flexibility sum 990.7168571428571
flexibility avg 1.003765812708062
gravy -0.42630522088353273
GCF_000317475#WP_015176638.1|HK|Hybrid
proline count 60
arginine count 64
lysine count 40
r/k ratio 1.6
final aliphatic percent sum 0.33545647558386416
final acidic percentage sum 0.1464968152866242
aromaticity 0.07749469214437368
flexibility sum 932.7432023809524
flexibility avg 0.9997247613943756
gravy -0.18014861995753667
GCF_000317475#WP_015176652.1|HK|Classic

flexibility sum 235.52516666666682
flexibility avg 1.0022347517730503
gravy -0.042622950819672156
GCF_000317475#WP_051041631.1|HK|Classic
proline count 29
arginine count 32
lysine count 14
r/k ratio 2.2857142857142856
final aliphatic percent sum 0.3479212253829322
final acidic percentage sum 0.09846827133479213
aromaticity 0.0787746170678337
flexibility sum 444.46664285714303
flexibility avg 0.9921130420918371
gravy 0.007221006564551455
GCF_000317475#WP_051041633.1|HK|Hybrid
proline count 40
arginine count 38
lysine count 56
r/k ratio 0.6785714285714286
final aliphatic percent sum 0.327433628318584
final acidic percentage sum 0.0986093552465234
aromaticity 0.06321112515802782
flexibility sum 784.0944166666658
flexibility avg 1.0026782821824372
gravy -0.18950695322376776
GCF_000317475#WP_051041637.1|RR|CheY
proline count 7
arginine count 3
lysine count 4
r/k ratio 0.75
final aliphatic percent sum 0.3571428571428571
final acidic percentage sum 0.07142857142857142
aromaticity 0.0079365079

flexibility sum 646.5375357142855
flexibility avg 1.0023837763012178
gravy -0.281498470948012
GCF_000317495#WP_015205380.1|HK|Classic
proline count 28
arginine count 18
lysine count 32
r/k ratio 0.5625
final aliphatic percent sum 0.32695984703632885
final acidic percentage sum 0.11281070745697896
aromaticity 0.0994263862332696
flexibility sum 511.3119404761907
flexibility avg 0.9947703122104878
gravy -0.02600382409177851
GCF_000317495#WP_015205422.1|HK|Classic
proline count 24
arginine count 28
lysine count 9
r/k ratio 3.111111111111111
final aliphatic percent sum 0.3517241379310345
final acidic percentage sum 0.11954022988505747
aromaticity 0.07126436781609195
flexibility sum 423.3797857142856
flexibility avg 0.9938492622401071
gravy 0.0022988505747126684
GCF_000317495#WP_015205448.1|RR|OmpR
proline count 17
arginine count 19
lysine count 9
r/k ratio 2.111111111111111
final aliphatic percent sum 0.3244444444444444
final acidic percentage sum 0.14222222222222222
aromaticity 0.08
flexib

flexibility sum 1130.6774880952385
flexibility avg 1.0005995469869366
gravy -0.19771729587357248
GCF_000317515#WP_015183999.1|RR|CheY
proline count 8
arginine count 6
lysine count 8
r/k ratio 0.75
final aliphatic percent sum 0.34400000000000003
final acidic percentage sum 0.152
aromaticity 0.024
flexibility sum 116.26370238095234
flexibility avg 1.00227329638752
gravy -0.16079999999999986
GCF_000317515#WP_015184000.1|RR|CyC-C
proline count 22
arginine count 18
lysine count 19
r/k ratio 0.9473684210526315
final aliphatic percent sum 0.34277620396600567
final acidic percentage sum 0.1274787535410765
aromaticity 0.0623229461756374
flexibility sum 343.0702619047621
flexibility avg 0.9972972729789596
gravy -0.049008498583569406
GCF_000317515#WP_015184072.1|HK|Hybrid
proline count 70
arginine count 75
lysine count 70
r/k ratio 1.0714285714285714
final aliphatic percent sum 0.32621747831887926
final acidic percentage sum 0.12274849899933289
aromaticity 0.08338892595063375
flexibility sum 1487

flexibility sum 458.8690952380954
flexibility avg 0.9975415113871638
gravy -0.09488272921108745
GCF_000317535#WP_015207660.1|HK|Hybrid
proline count 103
arginine count 82
lysine count 127
r/k ratio 0.6456692913385826
final aliphatic percent sum 0.3266432513798294
final acidic percentage sum 0.12293025589563472
aromaticity 0.08429503261414953
flexibility sum 1983.2844523809558
flexibility avg 0.9996393409178205
gravy -0.18670346211740968
GCF_000317535#WP_015207661.1|HK|Hybrid
proline count 27
arginine count 23
lysine count 25
r/k ratio 0.92
final aliphatic percent sum 0.33399209486166004
final acidic percentage sum 0.1442687747035573
aromaticity 0.06521739130434782
flexibility sum 497.1397857142859
flexibility avg 1.0002812589824666
gravy -0.19011857707509916
GCF_000317535#WP_015207721.1|HK|Classic
proline count 19
arginine count 24
lysine count 18
r/k ratio 1.3333333333333333
final aliphatic percent sum 0.36455696202531646
final acidic percentage sum 0.13924050632911392
aromaticity 0.0

flexibility sum 513.5894047619041
flexibility avg 0.9972609801202021
gravy -0.13148854961832107
GCF_000317555#WP_015188934.1|HK|Hybrid
proline count 52
arginine count 65
lysine count 39
r/k ratio 1.6666666666666667
final aliphatic percent sum 0.3540925266903915
final acidic percentage sum 0.12366548042704625
aromaticity 0.06316725978647687
flexibility sum 1111.5599761904775
flexibility avg 0.9969147768524462
gravy -0.12108540925266921
GCF_000317555#WP_015189071.1|HK|Hybrid
proline count 42
arginine count 30
lysine count 33
r/k ratio 0.9090909090909091
final aliphatic percent sum 0.35358255451713394
final acidic percentage sum 0.1277258566978193
aromaticity 0.05607476635514018
flexibility sum 632.4469642857142
flexibility avg 0.9991263258858044
gravy -0.07866043613707197
GCF_000317555#WP_015189072.1|RR|CheY
proline count 10
arginine count 14
lysine count 7
r/k ratio 2.0
final aliphatic percent sum 0.3076923076923077
final acidic percentage sum 0.17307692307692307
aromaticity 0.076923076

flexibility sum 438.7962500000005
flexibility avg 0.9972642045454557
gravy -0.08930957683741639
GCF_000317575#WP_015212136.1|RR|OmpR
proline count 12
arginine count 15
lysine count 12
r/k ratio 1.25
final aliphatic percent sum 0.3235294117647059
final acidic percentage sum 0.12184873949579832
aromaticity 0.08403361344537816
flexibility sum 228.2319404761904
flexibility avg 0.9966460282803075
gravy -0.16638655462184876
GCF_000317575#WP_041619119.1|RR|OmpR
proline count 18
arginine count 21
lysine count 11
r/k ratio 1.9090909090909092
final aliphatic percent sum 0.32644628099173556
final acidic percentage sum 0.18181818181818182
aromaticity 0.049586776859504134
flexibility sum 233.39242857142858
flexibility avg 1.0016842427958308
gravy -0.2396694214876034
GCF_000317575#WP_041619145.1|RR|OmpR
proline count 11
arginine count 18
lysine count 19
r/k ratio 0.9473684210526315
final aliphatic percent sum 0.3375
final acidic percentage sum 0.15416666666666667
aromaticity 0.0875
flexibility sum 2

flexibility sum 632.2770476190478
flexibility avg 0.998857895132777
gravy -0.17647975077881672
GCF_000317695#WP_015214952.1|HK|Hybrid
proline count 69
arginine count 59
lysine count 79
r/k ratio 0.7468354430379747
final aliphatic percent sum 0.32525951557093424
final acidic percentage sum 0.11280276816608996
aromaticity 0.08650519031141868
flexibility sum 1434.179976190478
flexibility avg 0.9987325739488009
gravy -0.20650519031141823
GCF_000317695#WP_015215144.1|HK|Classic
proline count 28
arginine count 24
lysine count 32
r/k ratio 0.75
final aliphatic percent sum 0.30405405405405406
final acidic percentage sum 0.12162162162162161
aromaticity 0.07601351351351351
flexibility sum 582.2359761904765
flexibility avg 0.9986894960385533
gravy -0.2170608108108112
GCF_000317695#WP_015215166.1|HK|Hybrid
proline count 52
arginine count 49
lysine count 43
r/k ratio 1.1395348837209303
final aliphatic percent sum 0.3478747203579418
final acidic percentage sum 0.12527964205816555
aromaticity 0.08277

flexibility sum 133.46595238095233
flexibility avg 0.9960145700071069
gravy -0.1692307692307692
GCF_000332035#WP_006529062.1|HK|Classic
proline count 34
arginine count 36
lysine count 37
r/k ratio 0.972972972972973
final aliphatic percent sum 0.33333333333333337
final acidic percentage sum 0.12327044025157233
aromaticity 0.08805031446540881
flexibility sum 783.3154285714285
flexibility avg 0.9965845147219192
gravy -0.07283018867924558
GCF_000332035#WP_006529110.1|RR|OmpR
proline count 27
arginine count 25
lysine count 22
r/k ratio 1.1363636363636365
final aliphatic percent sum 0.3540229885057471
final acidic percentage sum 0.13103448275862067
aromaticity 0.06206896551724138
flexibility sum 426.44651190476213
flexibility avg 1.0010481500111788
gravy -0.14896551724137974
GCF_000332035#WP_006529236.1|RR|CheY
proline count 7
arginine count 6
lysine count 10
r/k ratio 0.6
final aliphatic percent sum 0.3684210526315789
final acidic percentage sum 0.15789473684210525
aromaticity 0.04511278195

KeyError: 'X'