In [1]:
import pandas as pd
import numpy as np
from src import preprocess

In [2]:
accp = pd.read_csv('./PreprocessData/variant-acceptor.csv')

In [3]:
ref_seq_matrix = []
alt_seq_matrix = []
for i in range(len(accp)):
    ref_seq_matrix.append(preprocess.transform_seq_to_Vmatrix(accp['ref.sequence'][i]))
    alt_seq_matrix.append(preprocess.transform_seq_to_Vmatrix(accp['alt.sequence'][i]))

In [4]:
ref_seq_matrix = np.asarray(ref_seq_matrix)
alt_seq_matrix = np.asarray(alt_seq_matrix)

ref_seq_matrix = ref_seq_matrix.reshape(ref_seq_matrix.shape[0], ref_seq_matrix.shape[1], ref_seq_matrix.shape[2], 1)
alt_seq_matrix = alt_seq_matrix.reshape(alt_seq_matrix.shape[0], alt_seq_matrix.shape[1], alt_seq_matrix.shape[2], 1)

In [5]:
ref_class = [1 for i in range(len(ref_seq_matrix))]
alt_class = [0 for i in range(len(alt_seq_matrix))]

## Loading Models

In [6]:
from keras.models import load_model
from src import calculate

acceptor_Vmodel_1 = load_model('./Models/accp_Vmodel_1-1.hf5')
acceptor_Vmodel_2 = load_model('./Models/accp_Vmodel_2-1.hf5')
acceptor_Vmodel_3 = load_model('./Models/accp_Vmodel_3-1.hf5')

Using TensorFlow backend.


In [7]:
y1_var_ref_predict = acceptor_Vmodel_1.predict_proba(ref_seq_matrix)
y1_var_ref_predict_classes = acceptor_Vmodel_1.predict_classes(ref_seq_matrix)

y1_var_alt_predict = acceptor_Vmodel_1.predict_proba(alt_seq_matrix)
y1_var_alt_predict_classes = acceptor_Vmodel_1.predict_classes(alt_seq_matrix)

In [8]:
alt_less = 0
alt_less_than_0_5 = 0
for i in range(len(y1_var_ref_predict)):
    if accp['class'][i] == 1:
        if y1_var_alt_predict[i][1] < 0.5 and y1_var_ref_predict[i][1] >= 0.5: alt_less+=1
        if y1_var_alt_predict[i][1] < 0.5: alt_less_than_0_5+=1
        print("Sequence[{0}] is {1} and Pathogenic Score: log10({2}/{3}) = {4}".format(i, accp['class'][i], y1_var_ref_predict[i][1], y1_var_alt_predict[i][1], calculate.calculate_mutation_score(y1_var_ref_predict[i][1], y1_var_alt_predict[i][1])))

Sequence[113] is 1 and Pathogenic Score: log10(0.9726365208625793/0.4789036214351654) = 0.3077024519443512
Sequence[171] is 1 and Pathogenic Score: log10(0.8837357759475708/0.8041802644729614) = 0.040969040244817734
Sequence[187] is 1 and Pathogenic Score: log10(0.8640497326850891/0.8012401461601257) = 0.03277604654431343
Sequence[217] is 1 and Pathogenic Score: log10(0.7979162931442261/0.2358863651752472) = 0.5292544960975647
Sequence[247] is 1 and Pathogenic Score: log10(0.9927720427513123/0.9119475483894348) = 0.03687966614961624
Sequence[252] is 1 and Pathogenic Score: log10(0.9364356398582458/0.08065523952245712) = 1.064845323562622
Sequence[254] is 1 and Pathogenic Score: log10(0.9364356398582458/0.1757759153842926) = 0.7265185713768005
Sequence[501] is 1 and Pathogenic Score: log10(0.9773878455162048/0.964396059513092) = 0.005811482667922974
Sequence[525] is 1 and Pathogenic Score: log10(0.9298864603042603/0.6863031387329102) = 0.1319139301776886
Sequence[539] is 1 and Pathogeni

Sequence[7365] is 1 and Pathogenic Score: log10(0.7014657258987427/0.08011595904827118) = 0.9422874450683594
Sequence[7452] is 1 and Pathogenic Score: log10(0.8723604679107666/0.21308143436908722) = 0.6121503710746765
Sequence[7456] is 1 and Pathogenic Score: log10(0.9703676700592041/0.9314913749694824) = 0.0177574772387743
Sequence[7462] is 1 and Pathogenic Score: log10(0.927844226360321/0.6659914255142212) = 0.1440064162015915
Sequence[7518] is 1 and Pathogenic Score: log10(0.9700524210929871/0.06870178878307343) = 1.1498271226882935
Sequence[7601] is 1 and Pathogenic Score: log10(0.7862632274627686/0.13645805418491364) = 0.7605687975883484
Sequence[7905] is 1 and Pathogenic Score: log10(0.9770305156707764/0.9375842213630676) = 0.017897849902510643
Sequence[7908] is 1 and Pathogenic Score: log10(0.9093672037124634/0.3174482583999634) = 0.4570663273334503
Sequence[7928] is 1 and Pathogenic Score: log10(0.9180649518966675/0.21163031458854675) = 0.6372955441474915
Sequence[7929] is 1 an

In [9]:
y2_var_ref_predict = acceptor_Vmodel_2.predict_proba(ref_seq_matrix)
y2_var_ref_predict_classes = acceptor_Vmodel_2.predict_classes(ref_seq_matrix)

y2_var_alt_predict = acceptor_Vmodel_2.predict_proba(alt_seq_matrix)
y2_var_alt_predict_classes = acceptor_Vmodel_2.predict_classes(alt_seq_matrix)

In [10]:
alt_less_2 = 0
alt_2_less_than_0_5 = 0
for i in range(len(y2_var_ref_predict)):
    if accp['class'][i] == 1:
        if y2_var_alt_predict[i][1] < 0.5 and y2_var_ref_predict[i][1] >= 0.5: alt_less_2+=1
        if y2_var_alt_predict[i][1] < 0.5: alt_2_less_than_0_5+=1
        print("Sequence[{0}] is {1} and Pathogenic Score: log10({2}/{3}) = {4}".format(i, accp['class'][i], y2_var_ref_predict[i][1], y2_var_alt_predict[i][1], calculate.calculate_mutation_score(y2_var_ref_predict[i][1], y2_var_alt_predict[i][1])))

Sequence[113] is 1 and Pathogenic Score: log10(0.9196118116378784/0.6413964033126831) = 0.15647803246974945
Sequence[171] is 1 and Pathogenic Score: log10(0.8265446424484253/0.8351724147796631) = -0.004509839229285717
Sequence[187] is 1 and Pathogenic Score: log10(0.6547959446907043/0.5577994585037231) = 0.06962788850069046
Sequence[217] is 1 and Pathogenic Score: log10(0.7840296626091003/0.35628417134284973) = 0.34253597259521484
Sequence[247] is 1 and Pathogenic Score: log10(0.9726210832595825/0.858012318611145) = 0.054450154304504395
Sequence[252] is 1 and Pathogenic Score: log10(0.8959714770317078/0.34010785818099976) = 0.4206775426864624
Sequence[254] is 1 and Pathogenic Score: log10(0.8959714770317078/0.2292425036430359) = 0.5919990539550781
Sequence[501] is 1 and Pathogenic Score: log10(0.9525958895683289/0.943087100982666) = 0.004356890916824341
Sequence[525] is 1 and Pathogenic Score: log10(0.8986486196517944/0.7222657799720764) = 0.09489285945892334
Sequence[539] is 1 and Pat

Sequence[7015] is 1 and Pathogenic Score: log10(0.9320966601371765/0.22741395235061646) = 0.612643837928772
Sequence[7080] is 1 and Pathogenic Score: log10(0.8394901752471924/0.27726927399635315) = 0.48111385107040405
Sequence[7148] is 1 and Pathogenic Score: log10(0.7889232039451599/0.33447837829589844) = 0.37266668677330017
Sequence[7240] is 1 and Pathogenic Score: log10(0.9075250625610352/0.7616549134254456) = 0.07610037177801132
Sequence[7248] is 1 and Pathogenic Score: log10(0.8830877542495728/0.29720407724380493) = 0.4729490876197815
Sequence[7303] is 1 and Pathogenic Score: log10(0.8891603350639343/0.8558668494224548) = 0.01657385379076004
Sequence[7325] is 1 and Pathogenic Score: log10(0.6721853613853455/0.28998374938964844) = 0.3651154041290283
Sequence[7365] is 1 and Pathogenic Score: log10(0.49675801396369934/0.2340400069952011) = 0.32685476541519165
Sequence[7452] is 1 and Pathogenic Score: log10(0.6381638050079346/0.1079292818903923) = 0.7717928886413574
Sequence[7456] is 

In [11]:
y3_var_ref_predict = acceptor_Vmodel_3.predict_proba(ref_seq_matrix)
y3_var_ref_predict_classes = acceptor_Vmodel_3.predict_classes(ref_seq_matrix)

y3_var_alt_predict = acceptor_Vmodel_3.predict_proba(alt_seq_matrix)
y3_var_alt_predict_classes = acceptor_Vmodel_3.predict_classes(alt_seq_matrix)

In [12]:
alt_less_3 = 0
alt_3_less_than_0_5 = 0
for i in range(len(y3_var_ref_predict)):
    if accp['class'][i] == 1:
        if y3_var_alt_predict[i][1] < 0.5 and y3_var_ref_predict[i][1] >= 0.5: alt_less_3+=1
        if y3_var_alt_predict[i][1] < 0.5: alt_3_less_than_0_5+=1
        print("Sequence[{0}] is {1} and Pathogenic Score: log10({2}/{3}) = {4}".format(i, accp['class'][i], y3_var_ref_predict[i][1], y3_var_alt_predict[i][1], calculate.calculate_mutation_score(y3_var_ref_predict[i][1], y3_var_alt_predict[i][1])))

Sequence[113] is 1 and Pathogenic Score: log10(0.9187245965003967/0.9018204212188721) = 0.008065263740718365
Sequence[171] is 1 and Pathogenic Score: log10(0.8799456357955933/0.964343249797821) = -0.0397757962346077
Sequence[187] is 1 and Pathogenic Score: log10(0.6093025803565979/0.6376147866249084) = -0.019725365564227104
Sequence[217] is 1 and Pathogenic Score: log10(0.4773847162723541/0.5292174816131592) = -0.04476567730307579
Sequence[247] is 1 and Pathogenic Score: log10(0.9873933792114258/0.9875260591506958) = -5.8350891777081415e-05
Sequence[252] is 1 and Pathogenic Score: log10(0.9371836185455322/0.8536840081214905) = 0.040527526289224625
Sequence[254] is 1 and Pathogenic Score: log10(0.9371836185455322/0.14430208504199982) = 0.8125520944595337
Sequence[501] is 1 and Pathogenic Score: log10(0.914861261844635/0.88211989402771) = 0.0158276055008173
Sequence[525] is 1 and Pathogenic Score: log10(0.8981754183769226/0.9082892537117004) = -0.004863012582063675
Sequence[539] is 1 and

Sequence[7905] is 1 and Pathogenic Score: log10(0.9114989042282104/0.9090141654014587) = 0.0011855110060423613
Sequence[7908] is 1 and Pathogenic Score: log10(0.8059986233711243/0.04485384747385979) = 1.2545346021652222
Sequence[7928] is 1 and Pathogenic Score: log10(0.7333165407180786/0.37110060453414917) = 0.2957998216152191
Sequence[7929] is 1 and Pathogenic Score: log10(0.8143590688705444/0.0664680153131485) = 1.0882031917572021
Sequence[8072] is 1 and Pathogenic Score: log10(0.9734550714492798/0.7688956260681152) = 0.10244852304458618
Sequence[8240] is 1 and Pathogenic Score: log10(0.9860572814941406/0.4442532956600189) = 0.34627148509025574
Sequence[8260] is 1 and Pathogenic Score: log10(0.8802881240844727/0.9262105226516724) = -0.022084858268499374
Sequence[8325] is 1 and Pathogenic Score: log10(0.9535447955131531/0.8877264857292175) = 0.0310619305819273
Sequence[8333] is 1 and Pathogenic Score: log10(0.9093281626701355/0.753997802734375) = 0.08135055750608444
Sequence[8453] is 

In [13]:
accp.groupby('class').count()

Unnamed: 0_level_0,ref.sequence,alt.sequence
class,Unnamed: 1_level_1,Unnamed: 2_level_1
0,10944,10944
1,356,356


In [14]:
alt_less_than_0_5/356

0.5449438202247191

In [15]:
alt_2_less_than_0_5/356

0.5308988764044944

In [16]:
alt_3_less_than_0_5/356

0.3707865168539326