## This is a quick tutorial that tells you how to extract peptide features, including Binary profile, BLOSUM62 and Z-scale

### Import the required packages 

In [1]:
import numpy as np
from PeptideFeature.BINARY import *
from PeptideFeature.BLOSUM62 import *
from PeptideFeature.ZSCALE import *

In [4]:
def feature_generator(file_path,temp_file_path):
    count = 0
    f = open(file_path, 'r', encoding='utf-8')
    fasta_list = np.array(f.readlines())
    
    zsl = np.zeros((int((len(fasta_list)/2)),100,5))
    blo = np.zeros((int((len(fasta_list)/2)),100,20))
    binary = np.zeros((int((len(fasta_list)/2)),100,20))
    for flag in range(0, len(fasta_list), 2):
        fasta_str = [[fasta_list[flag].strip('\n').strip(), fasta_list[flag + 1].strip('\n').strip()]]
        
        bin_output = BINARY(fasta_str)
        blo_output = BLOSUM62(fasta_str)
        zsl_output = ZSCALE(fasta_str)
        feature_id = bin_output[1][0].split('>')[1]
        bin_output[1].remove(bin_output[1][0])
        blo_output[1].remove(blo_output[1][0])
        zsl_output[1].remove(zsl_output[1][0])

        aa_count = 0
        for i in range(0, len(bin_output[1]), 20):
            temp = bin_output[1][i:i + 20]
            binary[count][aa_count] = temp
            aa_count = aa_count+1
        
            
        aa_count = 0    
        for i in range(0, len(blo_output[1]), 20):
            temp = blo_output[1][i:i + 20]
            blo[count][aa_count] = temp
            aa_count = aa_count+1
            
        aa_count = 0
        for i in range(0, len(zsl_output[1]), 5):
            temp = zsl_output[1][i:i + 5]
            zsl[count][aa_count] = temp
            aa_count = aa_count+1            
        count = count +1
    np.save(temp_file_path+"_bin", binary)
    np.save(temp_file_path+"_blo", blo)
    np.save(temp_file_path+"_zsl", zsl)

### Generate features

In [6]:
feature_generator("dataset/sequence/DeepAFP-main-train.fasta","dataset/feature/DeepAFP-main-train")
feature_generator("dataset/sequence/DeepAFP-main-test.fasta","dataset/feature/DeepAFP-main-test")

### Load features

In [7]:
train_bin = np.load("dataset/feature/DeepAFP-main-train_bin.npy",allow_pickle=True)
train_blo = np.load("dataset/feature/DeepAFP-main-train_blo.npy",allow_pickle=True)
train_zsl = np.load("dataset/feature/DeepAFP-main-train_zsl.npy",allow_pickle=True)

test_bin = np.load("dataset/feature/DeepAFP-main-test_bin.npy",allow_pickle=True)
test_blo = np.load("dataset/feature/DeepAFP-main-test_blo.npy",allow_pickle=True)
test_zsl = np.load("dataset/feature/DeepAFP-main-test_zsl.npy",allow_pickle=True)

In [8]:
train_bin

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.