# This is a quick tutorial telling you how to use a trained deep forest model.

## Import the required packages

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from deepforest import CascadeForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import matthews_corrcoef, confusion_matrix
from sklearn.metrics import f1_score,precision_score,recall_score,accuracy_score

## Load the extracted features and their labels

In [2]:
df_train = pd.read_csv("data/train_ATP_feature.csv")
df_test = pd.read_csv("data/test_ATP_feature.csv")

In [3]:
df_train

Unnamed: 0,Id,Sequence,label,AAC|A,AAC|C,AAC|D,AAC|E,AAC|F,AAC|G,AAC|H,...,PAAC|lambda4,PHYC|IEP,PHYC|Net Charge,PHYC|Hydrophobic Moment,PHYC|Hydrophobicity,PHYC|Transmembrane Propensity,PHYC|Aromacity,PHYC|Alpha Helical Propensity,PHYC|Aliphatic Index,PHYC|Boman Index
0,ParaPep_1406,GNNRPVYIPQPRPPHPRL,0,0.000000,0.000000,0.000000,0.000000,0.000000,5.555556,5.555556,...,0.236673,11.711365,2.846247,0.059213,0.094444,-0.962222,0.055556,0.819444,59.444444,2.975556
1,AP01299,GLFTLIKGAAKLIGKTVPKKQARLGMNLWLVKLPTNVKT,0,7.692308,0.000000,0.000000,0.000000,2.564103,10.256410,0.000000,...,0.149167,11.471863,7.753099,0.123503,-0.164103,-0.248205,0.051282,1.035128,120.000000,0.246667
2,ADAM_5620,SCNCVCGFCCSCSP,0,0.000000,42.857143,0.000000,0.000000,7.142857,7.142857,0.000000,...,0.139160,5.228699,-0.599259,0.161821,-0.635714,-0.228571,0.071429,0.934286,20.714286,0.085714
3,Positive_40,RTKKWIVWI,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.314946,11.166321,2.758094,0.422402,-0.366667,-0.150000,0.222222,1.007778,118.888889,1.116667
4,Positive_183,LLGDFFRKSKEKIGKEFKRIVQRIKDFLRNLVPRTES,1,0.000000,0.000000,5.405405,8.108108,10.810811,5.405405,0.000000,...,0.145662,10.605286,5.764731,0.562441,0.621622,-0.850811,0.108108,1.055676,89.459459,3.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
857,nonAMP_ID_38763,DRLLNIQPPPREKMF,0,0.000000,0.000000,6.666667,6.666667,6.666667,0.000000,0.000000,...,0.225993,8.745544,0.763014,0.237360,0.413333,-0.904000,0.066667,1.031333,78.000000,2.867333
858,Positive_58,EVEPSDTIENVKAKIQ,1,6.250000,0.000000,6.250000,18.750000,0.000000,0.000000,0.000000,...,0.190854,4.407410,-2.158429,0.221016,0.700000,-1.087500,0.000000,1.075000,91.250000,2.416875
859,AVP0965,AVASVPRARGKYWWG,0,20.000000,0.000000,0.000000,0.000000,0.000000,13.333333,0.000000,...,0.245339,11.000549,2.793857,0.189462,-0.286667,-0.222667,0.200000,0.933333,58.666667,1.258667
860,nonAMP_ID_129620,KEQLGEEGYREMGHKGGETRKEQLGEEGYREMGHKGG,0,0.000000,0.000000,0.000000,24.324324,0.000000,27.027027,5.405405,...,0.181168,5.601257,-2.046280,0.085521,0.978378,-1.324595,0.054054,1.057838,21.081081,3.447568


In [4]:
train_label = df_train.iloc[:,2].values
test_label = df_test.iloc[:,2].values

In [5]:
train_AAC = df_train.iloc[:,3:23].values
train_DPC = df_train.iloc[:,23:423].values
train_CKSAAGP = df_train.iloc[:,423:523].values
train_PAAC = df_train.iloc[:,523:547].values
train_PHYC = df_train.iloc[:,547:].values

test_AAC = df_test.iloc[:,3:23].values
test_DPC = df_test.iloc[:,23:423].values
test_CKSAAGP = df_test.iloc[:,423:523].values
test_PAAC = df_test.iloc[:,523:547].values
test_PHYC = df_test.iloc[:,547:].values

train_AAC_DPC_PAAC_CKSAAGP_PHYC = np.concatenate((train_AAC,train_DPC,train_CKSAAGP,train_PAAC,train_PHYC),axis=1)
test_AAC_DPC_PAAC_CKSAAGP_PHYC = np.concatenate((test_AAC,test_DPC,test_CKSAAGP,test_PAAC,test_PHYC),axis=1)

## Load the trained model

In [6]:
model_DF = CascadeForestClassifier()
model_DF.load("model/AAC_DPC_PAAC_CKSAAGP_PHYC")

## Predict and output probabilities

In [8]:
standardScaler = StandardScaler()
standardScaler.fit(train_AAC_DPC_PAAC_CKSAAGP_PHYC)
test_AAC_DPC_PAAC_CKSAAGP_PHYC_std = standardScaler.transform(test_AAC_DPC_PAAC_CKSAAGP_PHYC)
y_prob_DF = model_DF.predict_proba(test_AAC_DPC_PAAC_CKSAAGP_PHYC_std)
y_prob_DF

[2023-03-02 07:57:02.706] Start to evalute the model:
[2023-03-02 07:57:02.711] Evaluating cascade layer = 0 
[2023-03-02 07:57:02.885] Evaluating cascade layer = 1 
[2023-03-02 07:57:03.048] Evaluating cascade layer = 2 
[2023-03-02 07:57:03.247] Evaluating cascade layer = 3 


array([[0.9045, 0.0955],
       [0.236 , 0.764 ],
       [0.9025, 0.0975],
       [0.045 , 0.955 ],
       [0.0495, 0.9505],
       [0.957 , 0.043 ],
       [0.925 , 0.075 ],
       [0.7295, 0.2705],
       [0.971 , 0.029 ],
       [0.7095, 0.2905],
       [0.2315, 0.7685],
       [0.8855, 0.1145],
       [0.8955, 0.1045],
       [0.8505, 0.1495],
       [0.9555, 0.0445],
       [0.0175, 0.9825],
       [0.9645, 0.0355],
       [0.962 , 0.038 ],
       [0.925 , 0.075 ],
       [0.174 , 0.826 ],
       [0.8625, 0.1375],
       [0.911 , 0.089 ],
       [0.92  , 0.08  ],
       [0.053 , 0.947 ],
       [0.649 , 0.351 ],
       [0.9465, 0.0535],
       [0.996 , 0.004 ],
       [0.9895, 0.0105],
       [0.939 , 0.061 ],
       [0.9505, 0.0495],
       [0.951 , 0.049 ],
       [0.8685, 0.1315],
       [0.8405, 0.1595],
       [0.8885, 0.1115],
       [0.963 , 0.037 ],
       [0.906 , 0.094 ],
       [0.4105, 0.5895],
       [0.9065, 0.0935],
       [0.9445, 0.0555],
       [0.96  , 0.04  ],
