In [1]:
import pandas as pd
import numpy as np
from src import preprocess

In [2]:
donor = pd.read_csv('./PreprocessData/variant-donor.csv')

In [3]:
ref_seq_matrix = []
alt_seq_matrix = []
for i in range(len(donor)):
    ref_seq_matrix.append(preprocess.transform_seq_to_Vmatrix(donor['ref.sequence'][i]))
    alt_seq_matrix.append(preprocess.transform_seq_to_Vmatrix(donor['alt.sequence'][i]))

In [4]:
ref_seq_matrix = np.asarray(ref_seq_matrix)
alt_seq_matrix = np.asarray(alt_seq_matrix)

ref_seq_matrix = ref_seq_matrix.reshape(ref_seq_matrix.shape[0], ref_seq_matrix.shape[1], ref_seq_matrix.shape[2], 1)
alt_seq_matrix = alt_seq_matrix.reshape(alt_seq_matrix.shape[0], alt_seq_matrix.shape[1], alt_seq_matrix.shape[2], 1)

In [5]:
ref_class = [1 for i in range(len(ref_seq_matrix))]
alt_class = [0 for i in range(len(alt_seq_matrix))]

## Loading Models

In [6]:
from keras.models import load_model
from src import calculate

donor_Vmodel_1 = load_model('./Models/donor_Vmodel_1-1.hf5')
donor_Vmodel_2 = load_model('./Models/donor_Vmodel_2-1.hf5')
donor_Vmodel_3 = load_model('./Models/donor_Vmodel_3-1.hf5')

Using TensorFlow backend.


In [7]:
y1_var_ref_predict = donor_Vmodel_1.predict_proba(ref_seq_matrix)
y1_var_ref_predict_classes = donor_Vmodel_1.predict_classes(ref_seq_matrix)

y1_var_alt_predict = donor_Vmodel_1.predict_proba(alt_seq_matrix)
y1_var_alt_predict_classes = donor_Vmodel_1.predict_classes(alt_seq_matrix)

In [8]:
alt_less = 0
alt_less_than_0_5 = 0
for i in range(len(y1_var_ref_predict)):
    if donor['class'][i] == 1:
        if y1_var_alt_predict[i][1] < 0.5 and y1_var_ref_predict[i][1] >= 0.5: alt_less+=1
        if y1_var_alt_predict[i][1] < 0.5: alt_less_than_0_5+=1
        print("Sequence[{0}] is {1} and Pathogenic Score: log10({2}/{3}) = {4}".format(i, donor['class'][i], y1_var_ref_predict[i][1], y1_var_alt_predict[i][1], calculate.calculate_mutation_score(y1_var_ref_predict[i][1], y1_var_alt_predict[i][1])))

Sequence[110] is 1 and Pathogenic Score: log10(0.9841760993003845/0.9887286424636841) = -0.0020043090917170048
Sequence[114] is 1 and Pathogenic Score: log10(0.9229908585548401/0.03295813873410225) = 1.4472347497940063
Sequence[129] is 1 and Pathogenic Score: log10(0.914045512676239/0.6001048684120178) = 0.18274065852165222
Sequence[150] is 1 and Pathogenic Score: log10(0.9375131726264954/0.12083745747804642) = 0.889775812625885
Sequence[151] is 1 and Pathogenic Score: log10(0.9234822392463684/0.3353845477104187) = 0.4398854970932007
Sequence[157] is 1 and Pathogenic Score: log10(0.9809290766716003/0.897098183631897) = 0.03879763185977936
Sequence[163] is 1 and Pathogenic Score: log10(0.9578472971916199/0.14461368322372437) = 0.8210868835449219
Sequence[166] is 1 and Pathogenic Score: log10(0.8055871725082397/0.046815987676382065) = 1.2357183694839478
Sequence[175] is 1 and Pathogenic Score: log10(0.4644324779510498/0.45001018047332764) = 0.013700242154300213
Sequence[180] is 1 and Pat

Sequence[3043] is 1 and Pathogenic Score: log10(0.41651320457458496/0.051489751785993576) = 0.9079079627990723
Sequence[3075] is 1 and Pathogenic Score: log10(0.9476918578147888/0.711859405040741) = 0.1242729052901268
Sequence[3076] is 1 and Pathogenic Score: log10(0.9476918578147888/0.6496093273162842) = 0.16401490569114685
Sequence[3080] is 1 and Pathogenic Score: log10(0.9866111278533936/0.8838309049606323) = 0.04777683690190315
Sequence[3083] is 1 and Pathogenic Score: log10(0.8726716041564941/0.3285672068595886) = 0.4242266118526459
Sequence[3085] is 1 and Pathogenic Score: log10(0.979756772518158/0.8696993589401245) = 0.05174912512302399
Sequence[3086] is 1 and Pathogenic Score: log10(0.979756772518158/0.8870836496353149) = 0.04315369576215744
Sequence[3138] is 1 and Pathogenic Score: log10(0.9178627133369446/0.14377331733703613) = 0.8050994277000427
Sequence[3153] is 1 and Pathogenic Score: log10(0.8715282082557678/0.2976866662502289) = 0.4665220379829407
Sequence[3154] is 1 and

Sequence[6451] is 1 and Pathogenic Score: log10(0.803449273109436/0.0767560824751854) = 1.0198456048965454
Sequence[6473] is 1 and Pathogenic Score: log10(0.7923893928527832/0.0433526337146759) = 1.2619231939315796
Sequence[6491] is 1 and Pathogenic Score: log10(0.8271275758743286/0.4730672538280487) = 0.2426496148109436
Sequence[6504] is 1 and Pathogenic Score: log10(0.8735030293464661/0.40054211020469666) = 0.3386162221431732
Sequence[6506] is 1 and Pathogenic Score: log10(0.04546213895082474/0.04912582039833069) = -0.03365994989871979
Sequence[6526] is 1 and Pathogenic Score: log10(0.8915791511535645/0.13600073754787445) = 0.8166186213493347
Sequence[6535] is 1 and Pathogenic Score: log10(0.9603955149650574/0.6750349402427673) = 0.1531238555908203
Sequence[6536] is 1 and Pathogenic Score: log10(0.9491482973098755/0.15102192759513855) = 0.7982940673828125
Sequence[6601] is 1 and Pathogenic Score: log10(0.8661589026451111/0.622719943523407) = 0.14330480992794037
Sequence[6615] is 1 an

Sequence[10122] is 1 and Pathogenic Score: log10(0.9432854056358337/0.0957174003124237) = 0.9936522245407104
Sequence[10127] is 1 and Pathogenic Score: log10(0.6554417014122009/0.06809831410646439) = 0.9833977222442627
Sequence[10128] is 1 and Pathogenic Score: log10(0.6554417014122009/0.5790008306503296) = 0.05385487526655197
Sequence[10129] is 1 and Pathogenic Score: log10(0.8117431402206421/0.03131250664591789) = 1.413700819015503
Sequence[10130] is 1 and Pathogenic Score: log10(0.8117431402206421/0.622542679309845) = 0.11524948477745056
Sequence[10141] is 1 and Pathogenic Score: log10(0.9900514483451843/0.8657106161117554) = 0.058285005390644073
Sequence[10147] is 1 and Pathogenic Score: log10(0.9883421063423157/0.8807342648506165) = 0.050062425434589386
Sequence[10177] is 1 and Pathogenic Score: log10(0.7324206829071045/0.03247237578034401) = 1.3532465696334839
Sequence[10249] is 1 and Pathogenic Score: log10(0.9471356272697449/0.7454986572265625) = 0.10396531969308853
Sequence[10

In [9]:
y2_var_ref_predict = donor_Vmodel_2.predict_proba(ref_seq_matrix)
y2_var_ref_predict_classes = donor_Vmodel_2.predict_classes(ref_seq_matrix)

y2_var_alt_predict = donor_Vmodel_2.predict_proba(alt_seq_matrix)
y2_var_alt_predict_classes = donor_Vmodel_2.predict_classes(alt_seq_matrix)

In [10]:
alt_less_2 = 0
alt_2_less_than_0_5 = 0
for i in range(len(y2_var_ref_predict)):
    if donor['class'][i] == 1:
        if y2_var_alt_predict[i][1] < 0.5 and y2_var_ref_predict[i][1] >= 0.5: alt_less_2+=1
        if y2_var_alt_predict[i][1] < 0.5: alt_2_less_than_0_5+=1
        print("Sequence[{0}] is {1} and Pathogenic Score: log10({2}/{3}) = {4}".format(i, donor['class'][i], y2_var_ref_predict[i][1], y2_var_alt_predict[i][1], calculate.calculate_mutation_score(y2_var_ref_predict[i][1], y2_var_alt_predict[i][1])))

Sequence[110] is 1 and Pathogenic Score: log10(0.9894173741340637/0.9900195598602295) = -0.00026424668612889946
Sequence[114] is 1 and Pathogenic Score: log10(0.9667335748672485/0.2571851313114166) = 0.5750609636306763
Sequence[129] is 1 and Pathogenic Score: log10(0.9623429775238037/0.7487733364105225) = 0.10897950083017349
Sequence[150] is 1 and Pathogenic Score: log10(0.9356465935707092/0.3686048090457916) = 0.40455082058906555
Sequence[151] is 1 and Pathogenic Score: log10(0.9597053527832031/0.5827322602272034) = 0.21666884422302246
Sequence[157] is 1 and Pathogenic Score: log10(0.9900738000869751/0.9771636724472046) = 0.005700259003788233
Sequence[163] is 1 and Pathogenic Score: log10(0.9719240069389343/0.4314158856868744) = 0.3527362048625946
Sequence[166] is 1 and Pathogenic Score: log10(0.9182328581809998/0.06746374815702438) = 1.1338824033737183
Sequence[175] is 1 and Pathogenic Score: log10(0.7768281102180481/0.7851327657699585) = -0.004618165548890829
Sequence[180] is 1 and 

Sequence[5190] is 1 and Pathogenic Score: log10(0.9767867922782898/0.026793766766786575) = 1.5617660284042358
Sequence[5191] is 1 and Pathogenic Score: log10(0.9758603572845459/0.03729216381907463) = 1.4177701473236084
Sequence[5192] is 1 and Pathogenic Score: log10(0.9758603572845459/0.06588074564933777) = 1.1706291437149048
Sequence[5196] is 1 and Pathogenic Score: log10(0.9873565435409546/0.9853640198707581) = 0.0008773206500336528
Sequence[5199] is 1 and Pathogenic Score: log10(0.9577258825302124/0.24627436697483063) = 0.589821994304657
Sequence[5203] is 1 and Pathogenic Score: log10(0.989597499370575/0.9214205145835876) = 0.031000714749097824
Sequence[5204] is 1 and Pathogenic Score: log10(0.9862180352210999/0.4664122760295868) = 0.32520294189453125
Sequence[5205] is 1 and Pathogenic Score: log10(0.5668303370475769/0.021534373983740807) = 1.4203208684921265
Sequence[5206] is 1 and Pathogenic Score: log10(0.5668303370475769/0.5873627066612244) = -0.015453278087079525
Sequence[5213]

Sequence[11422] is 1 and Pathogenic Score: log10(0.8834363222122192/0.08844194561243057) = 0.9995169639587402
Sequence[11425] is 1 and Pathogenic Score: log10(0.9797748327255249/0.9478452801704407) = 0.014388851821422577
Sequence[11432] is 1 and Pathogenic Score: log10(0.973341703414917/0.9045864939689636) = 0.03181524574756622
Sequence[11441] is 1 and Pathogenic Score: log10(0.9792891144752502/0.9527480006217957) = 0.011932876892387867
Sequence[11485] is 1 and Pathogenic Score: log10(0.96290522813797/0.18485912680625916) = 0.7167426347732544
Sequence[11492] is 1 and Pathogenic Score: log10(0.866176962852478/0.9199693202972412) = -0.026166722178459167
Sequence[11526] is 1 and Pathogenic Score: log10(0.9825851917266846/0.7242574095726013) = 0.1324772834777832
Sequence[11535] is 1 and Pathogenic Score: log10(0.26091858744621277/0.023577626794576645) = 1.0440049171447754
Sequence[11558] is 1 and Pathogenic Score: log10(0.9555802345275879/0.4073883295059204) = 0.3702585697174072
Sequence[1

In [11]:
y3_var_ref_predict = donor_Vmodel_3.predict_proba(ref_seq_matrix)
y3_var_ref_predict_classes = donor_Vmodel_3.predict_classes(ref_seq_matrix)

y3_var_alt_predict = donor_Vmodel_3.predict_proba(alt_seq_matrix)
y3_var_alt_predict_classes = donor_Vmodel_3.predict_classes(alt_seq_matrix)

In [12]:
alt_less_3 = 0
alt_3_less_than_0_5 = 0
for i in range(len(y3_var_ref_predict)):
    if donor['class'][i] == 1:
        if y3_var_alt_predict[i][1] < 0.5 and y3_var_ref_predict[i][1] >= 0.5: alt_less_3+=1
        if y3_var_alt_predict[i][1] < 0.5: alt_3_less_than_0_5+=1
        print("Sequence[{0}] is {1} and Pathogenic Score: log10({2}/{3}) = {4}".format(i, donor['class'][i], y3_var_ref_predict[i][1], y3_var_alt_predict[i][1], calculate.calculate_mutation_score(y3_var_ref_predict[i][1], y3_var_alt_predict[i][1])))

Sequence[110] is 1 and Pathogenic Score: log10(0.9794498682022095/0.98895663022995) = -0.004195033106952906
Sequence[114] is 1 and Pathogenic Score: log10(0.8774374127388/0.23214510083198547) = 0.5774565935134888
Sequence[129] is 1 and Pathogenic Score: log10(0.9888837337493896/0.78709876537323) = 0.0991160124540329
Sequence[150] is 1 and Pathogenic Score: log10(0.8780531287193298/0.23277826607227325) = 0.5765783786773682
Sequence[151] is 1 and Pathogenic Score: log10(0.9870526194572449/0.7626056671142578) = 0.11204025894403458
Sequence[157] is 1 and Pathogenic Score: log10(0.9781640768051147/0.6541876792907715) = 0.17470933496952057
Sequence[163] is 1 and Pathogenic Score: log10(0.9767444133758545/0.6389629244804382) = 0.18430526554584503
Sequence[166] is 1 and Pathogenic Score: log10(0.7925202250480652/0.13890215754508972) = 0.7563014030456543
Sequence[175] is 1 and Pathogenic Score: log10(0.6615619659423828/0.6178200244903564) = 0.029708534479141235
Sequence[180] is 1 and Pathogenic

Sequence[5979] is 1 and Pathogenic Score: log10(0.9796528816223145/0.5935236215591431) = 0.2176342010498047
Sequence[5982] is 1 and Pathogenic Score: log10(0.8307207226753235/0.33867499232292175) = 0.3896718919277191
Sequence[5987] is 1 and Pathogenic Score: log10(0.2679172158241272/0.015219549648463726) = 1.2455987930297852
Sequence[5989] is 1 and Pathogenic Score: log10(0.569612443447113/0.12135311961174011) = 0.6715285181999207
Sequence[6014] is 1 and Pathogenic Score: log10(0.9538909792900085/0.4662807881832123) = 0.31085121631622314
Sequence[6074] is 1 and Pathogenic Score: log10(0.8928216695785522/0.25981736183166504) = 0.5360965728759766
Sequence[6082] is 1 and Pathogenic Score: log10(0.9714056849479675/0.5892615914344788) = 0.21709248423576355
Sequence[6090] is 1 and Pathogenic Score: log10(0.22297227382659912/0.011947186663746834) = 1.2709852457046509
Sequence[6092] is 1 and Pathogenic Score: log10(0.6981996893882751/0.20229195058345795) = 0.5380010604858398
Sequence[6109] is 

Sequence[11097] is 1 and Pathogenic Score: log10(0.9999969005584717/0.9999275207519531) = 3.0130222512525506e-05
Sequence[11103] is 1 and Pathogenic Score: log10(0.936945378780365/0.38504543900489807) = 0.38620230555534363
Sequence[11107] is 1 and Pathogenic Score: log10(0.7774824500083923/0.32435914874076843) = 0.37966445088386536
Sequence[11116] is 1 and Pathogenic Score: log10(0.8443652391433716/0.18607202172279358) = 0.6568492650985718
Sequence[11123] is 1 and Pathogenic Score: log10(0.9849949479103088/0.7348994016647339) = 0.12720610201358795
Sequence[11125] is 1 and Pathogenic Score: log10(0.9058905243873596/0.2857358753681183) = 0.5011109709739685
Sequence[11126] is 1 and Pathogenic Score: log10(0.8187307715415955/0.15989096462726593) = 0.7093172073364258
Sequence[11127] is 1 and Pathogenic Score: log10(0.9980815649032593/0.9563746452331543) = 0.01853797398507595
Sequence[11135] is 1 and Pathogenic Score: log10(0.4760861396789551/0.4518697261810303) = 0.022672303020954132
Sequen

In [13]:
donor.groupby('class').count()

Unnamed: 0_level_0,ref.sequence,alt.sequence
class,Unnamed: 1_level_1,Unnamed: 2_level_1
0,11200,11200
1,801,801


In [14]:
alt_less_than_0_5/801

0.5955056179775281

In [15]:
alt_2_less_than_0_5/801

0.5081148564294632

In [16]:
alt_3_less_than_0_5/801

0.5318352059925093