In [7]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Imports

In [2]:
pip install propy3

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting propy3
  Downloading propy3-1.1.1-py3-none-any.whl (290 kB)
[K     |████████████████████████████████| 290 kB 4.4 MB/s 
[?25hInstalling collected packages: propy3
Successfully installed propy3-1.1.1


In [3]:
pip install Biopython

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting Biopython
  Downloading biopython-1.80-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[K     |████████████████████████████████| 3.0 MB 4.3 MB/s 
Installing collected packages: Biopython
Successfully installed Biopython-1.80


In [4]:
pip install pyfaidx

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyfaidx
  Downloading pyfaidx-0.7.1.tar.gz (103 kB)
[K     |████████████████████████████████| 103 kB 5.3 MB/s 
Building wheels for collected packages: pyfaidx
  Building wheel for pyfaidx (setup.py) ... [?25l[?25hdone
  Created wheel for pyfaidx: filename=pyfaidx-0.7.1-py3-none-any.whl size=27748 sha256=dbab783b12fd68273159905adf69b2ef6ba2e77fcfed33ae6cfd0f4ff2943d92
  Stored in directory: /root/.cache/pip/wheels/1a/d6/99/7334c4d11bfb574e6d6ea706256053b268a12f2127af1cfd40
Successfully built pyfaidx
Installing collected packages: pyfaidx
Successfully installed pyfaidx-0.7.1


In [1]:
## Dependency Imports
# Compute protein descriptors
from propy import PyPro
from propy import AAComposition
from propy import CTD

# Build Sequence Object
from Bio.SeqUtils.ProtParam import ProteinAnalysis

# Read Fasta File
from pyfaidx import Fasta

# Grouping iterable
from itertools import chain

# Return file path
import glob

# Unpack Files
import json
import pickle

# Dataframes
import pandas as pd
import numpy as np

#### Unpack all save models and features

In [3]:
## Classification Model Imports:
# Scaler
with open('Classification Dependencies/Scaler Classification.pkl', 'rb') as fh:
    scaler_for_classification = pickle.load(fh)

# First Model
with open('Classification Dependencies/SVM Linear Classification trained.pkl', 'rb') as fh:
    classification_model_1 = pickle.load(fh)

# Second Model
with open('Classification Dependencies/SVM RBF Classification trained.pkl', 'rb') as fh:
    classification_model_2 = pickle.load(fh)

# Selected features Classification
with open('Classification Dependencies/Features for Classification Model.json') as fh:
    classification_features = json.loads(fh.read())

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [4]:
## Regression Model Imports:
# Scaler
with open('Regression Dependencies/Scaler Regression.pkl', 'rb') as fh:
    scaler_for_regression = pickle.load(fh)

# Classification with ki
with open('Regression Dependencies/SVC RBF bucket Classification trained.pkl', 'rb') as fh:
    classification_model_for_buckets = pickle.load(fh)

# Regression for Medium Bucket
with open('Regression Dependencies/SVR RBF medium bucket Regression trained.pkl', 'rb') as fh:
    regression_model_medium_bucket = pickle.load(fh)

# Regression for Small Bucket
with open('Regression Dependencies/SVR RBF small bucket Regression trained.pkl', 'rb') as fh:
    regression_model_small_bucket = pickle.load(fh)

# Selected features Regression
with open('Regression Dependencies/Features for Regression Model.json') as fh:
    regression_features = json.loads(fh.read())


In [5]:
## Pipeline for multi-stage model:
def model_pipeline(allFeaturesData, ensemble=bool):
    """
    Function for the multistage pipeline.  The following stages are applied:
        - Classification of the peptides.  Positive classification means efficacy in antithrombin response.
        - Classification of the positive peptides into buckets.  0 = small, 1 = medium, 2 = large.
        - Regression on peptides that are set to buckets 0 and 1, to predict "KI (nM)" values for the peptides.

    Parameters
    ----------
    allFeaturesData: Pandas DataFrame containing the peptides with extracted features.

    ensemble: Boolean Flag to determine use of an ennsemble method for the initial peptide classification.

    Returns:
    --------
    result: Pandas DataFrame containing the results with columns ['Name','Seq','Predicted','KI (nM) Predicted']
    """
    # CLASSIFICATION
    # ----------------------------------

    # get only necessary features
    clf_data = allFeaturesData[classification_features]

    # Apply preprocessing function
    clf_data = pd.DataFrame(scaler_for_classification.transform(clf_data),
                                                        columns = clf_data.columns)

    # Model 1 - Applying threshold on decision function
    decisionFuctionModel1 = classification_model_1.decision_function(clf_data)
    threshold = 0.25
    y_predict_model_1 = []
    for j in decisionFuctionModel1:
        if j > threshold:
            y_predict_model_1.append(1)
        else:
            y_predict_model_1.append(0)

    # Model 2
    y_predict_model_2 = classification_model_2.predict(clf_data)

    # Ensemble Model Prediction. If the flag is set to 'True', we combine results of SVC with RBF + Linear Kernels.
    if ensemble==True:
        y_predict_ensemble = []
        for i in range(len(y_predict_model_1)):
            if (y_predict_model_1[i] == 1) & (y_predict_model_2[i] == 1):
                y_predict_ensemble.append('Positive')
            else:
                y_predict_ensemble.append('Negative')
        allFeaturesData = pd.concat([allFeaturesData, pd.DataFrame(y_predict_ensemble, columns=["Predicted"])], axis=1)

    elif ensemble==False:
        predictions_model_2 =  pd.DataFrame(y_predict_model_2, columns=["Predicted"])
        predictions_model_2.replace({1:'Positive',0:'Negative'}, inplace=True)
        allFeaturesData = pd.concat([allFeaturesData, predictions_model_2], axis=1)
    
    # REGRESSION
    # ----------------------------------

    # get positively predicted peptides
    reg_data = pd.DataFrame(allFeaturesData[allFeaturesData['Predicted']=='Positive'], columns = allFeaturesData.columns)
    
    # Exception for negative peptides
    if len(reg_data) == 0:
         # save result in a new dataframe
        result = allFeaturesData[['Name','Seq','Predicted']]
        return result
        
    # Apply preprocessing function and select only necessary features
    reg_data_reduced = pd.DataFrame(scaler_for_regression.transform(reg_data.iloc[:,2:-1]), 
                                                        columns = reg_data.columns[2:-1])[regression_features]

    # Predict the buckets.
    buckets_pred = classification_model_for_buckets.predict(reg_data_reduced)
    reg_data_reduced['Bucket'] = buckets_pred

    # Fixed Ki range and Source Interval
    ki_range = (-11.330603908176274, 17.19207365866807)
    source_interval = (-5,5)

    # Make predictions for all of the buckets. The large bucket is predict as 0. Only make predictions if the arrays aren't empty.
    if reg_data_reduced[buckets_pred==0].size != 0:
        sml_pred = regression_model_small_bucket.predict(reg_data_reduced[buckets_pred==0].iloc[:,:-1])
        sml_pred = np.exp(np.interp(sml_pred, source_interval, ki_range))
    if reg_data_reduced[buckets_pred==1].size != 0:
        med_pred = regression_model_medium_bucket.predict(reg_data_reduced[buckets_pred==1].iloc[:,:-1])
        med_pred = np.exp(np.interp(med_pred, source_interval, ki_range))
    lrg_pred = np.zeros(np.count_nonzero(reg_data_reduced[buckets_pred==2]))

    # Put back the predictions in the original order.
    y_predict_regression = np.array([])
    for i in buckets_pred:
        if i == 0:
            y_predict_regression = np.append(y_predict_regression, sml_pred[0])
            sml_pred = np.delete(sml_pred, 0)
        elif i == 1:
            y_predict_regression = np.append(y_predict_regression, med_pred[0])
            med_pred = np.delete(med_pred, 0)
        elif i == 2:
            y_predict_regression = np.append(y_predict_regression, lrg_pred[0])
            lrg_pred = np.delete(lrg_pred, 0)

    reg_data['KI (nM) Predicted'] = y_predict_regression

    allFeaturesData = pd.merge(allFeaturesData,reg_data[['Seq','KI (nM) Predicted']],on='Seq', how='left')

    # save result in a new dataframe
    result = allFeaturesData[['Name','Seq','Predicted','KI (nM) Predicted']]

    return result

### Inference Function

#### Fasta File

In [7]:
## Inference Function for Fasta File:
def inferenceFasta(fastafile, ensemble=True):
    
    """ The inference function gets the protein sequence, trained model, preprocessing function and selected
    features as input. 

    The function read the sequence as string and extract the peptide features using appropriate packages into 
    the dataframe.

    The necessary features are selected from the extracted features which then undergoes preprocessing function, the
    target value is predicted using trained function and give out the results. """
    
    new_peptides = []
    for file in glob.glob(fastafile):
        new_peptides.append(file)
        
    for f in new_peptides:
        fa = Fasta(f)
        # empty list to save the features
        allFeaturesData = []
        for seq in fa:
            # Make sure the sequence is a string
            s = str(seq)
            
            # replace the unappropriate peptide sequence to A
            s = s.replace('X','A')
            s = s.replace('x','A')
            s = s.replace('U','A')
            s = s.replace('Z','A')
            s = s.replace('B','A')

            # Calculating primary features
            analysed_seq = ProteinAnalysis(s)
            wt = analysed_seq.molecular_weight()
            arm = analysed_seq.aromaticity()
            instab = analysed_seq.instability_index()
            flex = analysed_seq.flexibility()
            pI = analysed_seq.isoelectric_point()

            # create a list for the primary features
            pFeatures = [seq.name, s, len(seq), wt, arm, instab, pI]

            # Get Amino Acid Composition (AAC), Composition Transition Distribution (CTD) and Dipeptide Composition (DPC)
            resultAAC = AAComposition.CalculateAAComposition(s)
            resultCTD = CTD.CalculateCTD(s)
            resultDPC = AAComposition.CalculateDipeptideComposition(s)

            # Collect all the features into lists
            aacFeatures = [j for i,j in resultAAC.items()]
            ctdFeatures = [l for k,l in resultCTD.items()]
            dpcFeatures = [n for m,n in resultDPC.items()]
            allFeaturesData.append(pFeatures + aacFeatures + ctdFeatures + dpcFeatures)
        
        # Collect feature names
        pFeaturesName = ['Name','Seq' ,'SeqLength','Weight','Aromaticity','Instability','IsoelectricPoint']
        aacFeaturesData = [i for i,j in resultAAC.items()]
        ctdFeaturesData = [k for k,l in resultCTD.items()]
        dpcFeaturesData = [m for m,n in resultDPC.items()]
        
        featuresName  = []
        featuresName.append(pFeaturesName+aacFeaturesData+ctdFeaturesData+dpcFeaturesData)
        featuresFlattenList = list(chain.from_iterable(featuresName))
        
        # create dataframe using all extracted features and the names
        allFeaturesData = pd.DataFrame(allFeaturesData, columns = featuresFlattenList)
        
        result = model_pipeline(allFeaturesData, ensemble)

        return result

#### Single Sequence

In [8]:
## Inference Function for Single Sequences:
def inferenceSingleSeqence(seq, ensemble=True):
    
    """ The inference function gets the protein sequence, trained model, preprocessing function and selected
    features as input. 
    
    The function read the sequence as string and extract the peptide features using appropriate packages into 
    the dataframe.
    
    The necessary features are selected from the extracted features which then undergoes preprocessing function, the
    target value is predicted using trained function and give out the results. """
    
    # empty list to save the features
    allFeaturesData = []
    
    # Make sure the sequence is a string
    s = str(seq)
    
    # replace the unappropriate peptide sequence to A
    s = s.replace('X','A')
    s = s.replace('x','A')
    s = s.replace('U','A')
    s = s.replace('Z','A')
    s = s.replace('B','A')
    
    # Calculating primary features
    analysed_seq = ProteinAnalysis(s)
    wt = analysed_seq.molecular_weight()
    arm = analysed_seq.aromaticity()
    instab = analysed_seq.instability_index()
    flex = analysed_seq.flexibility()
    pI = analysed_seq.isoelectric_point()
    
    # create a list for the primary features
    pFeatures = [seq, s, len(seq), wt, arm, instab, pI]
     
    # Get Amino Acid Composition (AAC), Composition Transition Distribution (CTD) and Dipeptide Composition (DPC)
    resultAAC = AAComposition.CalculateAAComposition(s)
    resultCTD = CTD.CalculateCTD(s)
    resultDPC = AAComposition.CalculateDipeptideComposition(s)
    
    # Collect all the features into lists
    aacFeatures = [j for i,j in resultAAC.items()]
    ctdFeatures = [l for k,l in resultCTD.items()]
    dpcFeatures = [n for m,n in resultDPC.items()]
    allFeaturesData.append(pFeatures + aacFeatures + ctdFeatures + dpcFeatures)
    
    # Collect feature names
    name1 = ['Name','Seq' ,'SeqLength','Weight','Aromaticity','Instability','IsoelectricPoint']
    name2 = [i for i,j in resultAAC.items()]
    name3 = [k for k,l in resultCTD.items()]
    name4 = [m for m,n in resultDPC.items()]
    name  = []
    name.append(name1+name2+name3+name4)
    flatten_list = list(chain.from_iterable(name))
    
    # create dataframe using all extracted features and the names
    allFeaturesData = pd.DataFrame(allFeaturesData, columns = flatten_list)

    result = model_pipeline(allFeaturesData, ensemble)

    return result

#### .csv file

In [9]:
## Inference Function for .csv Files:
def inference_csv(csv, ensemble=True):
    sequences = pd.read_csv('combined_hits.csv')
    sequences = sequences.replace(r"^ +| +$", r"", regex=True)
    sequences = sequences['Seq']

    sequence_data = []
    for seq in sequences:
        # Make sure the sequence is a string
        s = str(seq)
        
        # replace the unappropriate peptide sequence to A
        s = s.replace('X','A')
        s = s.replace('x','A')
        s = s.replace('U','A')
        s = s.replace('Z','A')
        s = s.replace('B','A')

        # Calculating primary features
        analysed_seq = ProteinAnalysis(s)
        wt = analysed_seq.molecular_weight()
        arm = analysed_seq.aromaticity()
        instab = analysed_seq.instability_index()
        flex = analysed_seq.flexibility()
        pI = analysed_seq.isoelectric_point()

        # create a list for the primary features
        pFeatures = [seq, s, len(seq), wt, arm, instab, pI]

        # Get Amino Acid Composition (AAC), Composition Transition Distribution (CTD) and Dipeptide Composition (DPC)
        resultAAC = AAComposition.CalculateAAComposition(s)
        resultCTD = CTD.CalculateCTD(s)
        resultDPC = AAComposition.CalculateDipeptideComposition(s)

        # Collect all the features into lists
        aacFeatures = [j for i,j in resultAAC.items()]
        ctdFeatures = [l for k,l in resultCTD.items()]
        dpcFeatures = [n for m,n in resultDPC.items()]
        sequence_data.append(pFeatures + aacFeatures + ctdFeatures + dpcFeatures)

    # Collect feature names
    pFeaturesName = ['Name','Seq' ,'SeqLength','Weight','Aromaticity','Instability','IsoelectricPoint']
    aacFeaturesData = [i for i,j in resultAAC.items()]
    ctdFeaturesData = [k for k,l in resultCTD.items()]
    dpcFeaturesData = [m for m,n in resultDPC.items()]

    featuresName  = []
    featuresName.append(pFeaturesName+aacFeaturesData+ctdFeaturesData+dpcFeaturesData)
    featuresFlattenList = list(chain.from_iterable(featuresName))

    # create dataframe using all extracted features and the names
    sequence_data = pd.DataFrame(sequence_data, columns = featuresFlattenList)

    result = model_pipeline(sequence_data, ensemble)

    return result

### Implementation

In [10]:
## Implementation Function:
def inference(file=str, ensemble=True):
    if file.endswith('.fasta') == True:
        result = inferenceFasta(file, ensemble)
        name = file.split(sep='.')[0]
        result.to_csv('%s_fasta_results.csv' %(name))
    elif file.endswith('.csv') == True:
        result = inference_csv(file, ensemble)
        name = file.split(sep='.')[0]
        result.to_csv('%s_csv_results.csv' %(name))
    else:
        result = inferenceSingleSeqence(file, ensemble)
        result.to_csv('single_results.csv')
    return result

#### Test Cases

In [22]:
inference('combined_hits.csv', ensemble=True)

Unnamed: 0,Name,Seq,Predicted,KI (nM) Predicted
0,DSGNESDGDTEELSTLLEMGPDNIWDNDDL,DSGNESDGDTEELSTLLEMGPDNIWDNDDL,Positive,3.063117
1,KTRGNESDGDTEELSTLLEMGPDNIWDNDDL,KTRGNESDGDTEELSTLLEMGPDNIWDNDDL,Positive,3.732421
2,RQGNESDGDTEELSTLLEMGPDNIWDNDDL,RQGNESDGDTEELSTLLEMGPDNIWDNDDL,Positive,1.317487
3,RSGNESDGDTEELSTLLEMGPDNIWDNDDL,RSGNESDGDTEELSTLLEMGPDNIWDNDDL,Positive,3.344986
4,DSGNESDGDTEELSTLLEMGPDNILDHDDL,DSGNESDGDTEELSTLLEMGPDNILDHDDL,Positive,1.077039
5,DSGNESDGDTEELSALLEMGPDNIWDNDDL,DSGNESDGDTEELSALLEMGPDNIWDNDDL,Positive,2.863599
6,DSGNESDGDTEELSTLLEMGPDNIWGNDDL,DSGNESDGDTEELSTLLEMGPDNIWGNDDL,Positive,3.112762
7,DSGNESDGDTEELSTLPEMGPDDIWNNDDL,DSGNESDGDTEELSTLPEMGPDDIWNNDDL,Positive,0.921109
8,KTVGNESDGDTEELSTLLEMGPDNIWDNDDL,KTVGNESDGDTEELSTLLEMGPDNIWDNDDL,Positive,3.664924
9,DSGNESDGDTEELSTLLEMGPDDILDHDDL,DSGNESDGDTEELSTLLEMGPDDILDHDDL,Positive,0.082409


In [12]:
inference('APEADQTTPEEKPAEPEPVA', ensemble=True)

Unnamed: 0,Name,Seq,Predicted,KI (nM) Predicted
0,APEADQTTPEEKPAEPEPVA,APEADQTTPEEKPAEPEPVA,Positive,1.663297


In [13]:
inference('APEADQTTPEEKPAEPEPVA', ensemble=False)

Unnamed: 0,Name,Seq,Predicted,KI (nM) Predicted
0,APEADQTTPEEKPAEPEPVA,APEADQTTPEEKPAEPEPVA,Positive,1.663297


In [14]:
inference('QSPLPERQE', ensemble=True)

Unnamed: 0,Name,Seq,Predicted,KI (nM) Predicted
0,QSPLPERQE,QSPLPERQE,Positive,1.207007


In [15]:
inference('QSPLPERQE', ensemble=False)

Unnamed: 0,Name,Seq,Predicted,KI (nM) Predicted
0,QSPLPERQE,QSPLPERQE,Positive,1.207007


In [16]:
inference('HTLGYINDNEEGPR', ensemble=True)

Unnamed: 0,Name,Seq,Predicted
0,HTLGYINDNEEGPR,HTLGYINDNEEGPR,Negative


In [None]:
inference('HTLGYINDNEEGPR', ensemble=False)

Unnamed: 0,Name,Seq,Predicted
0,HTLGYINDNEEGPR,HTLGYINDNEEGPR,Negative


### Testing the Function

In [17]:
inference('smallhits.fasta', ensemble=True)

Unnamed: 0,Name,Seq,Predicted,KI (nM) Predicted
0,P80849,EDLPEK,Negative,
1,Q7M066,FEQNTAQA,Negative,
2,A0A5K0XKV1,EKFEGPGVK,Negative,
3,A0A6A3HBE0,QSPLPERQE,Positive,1.207007
4,A0A5K1D874,DHLNAEQGK,Negative,
5,Q2UVK8,EGISFPKFEN,Negative,
6,A0A5B7JC83,MERQGSGREE,Negative,
7,A0A5K1ESP7,VEKFYQQCDP,Negative,
8,A0A022PZR9,SYQCRPFQQL,Negative,
9,D9U971,GTEGCENAKP,Negative,


In [18]:
inference('smallhits.fasta', ensemble=False)

Unnamed: 0,Name,Seq,Predicted,KI (nM) Predicted
0,P80849,EDLPEK,Negative,
1,Q7M066,FEQNTAQA,Negative,
2,A0A5K0XKV1,EKFEGPGVK,Negative,
3,A0A6A3HBE0,QSPLPERQE,Positive,1.207007
4,A0A5K1D874,DHLNAEQGK,Negative,
5,Q2UVK8,EGISFPKFEN,Negative,
6,A0A5B7JC83,MERQGSGREE,Negative,
7,A0A5K1ESP7,VEKFYQQCDP,Negative,
8,A0A022PZR9,SYQCRPFQQL,Negative,
9,D9U971,GTEGCENAKP,Negative,


In [19]:
pd.set_option('display.max_rows',None)
inference('longhits.fasta', ensemble=True)

Unnamed: 0,Name,Seq,Predicted,KI (nM) Predicted
0,A0A699VUS0,THDDVDQENVVEETVDDVAQPTSPLPPSPSVPPSPPHQSPRSSPSQ...,Negative,
1,A0A7I9YXR2,MSAQDKVKNKIEDVSGKAKEALGKATNDPGVRDEGRGDQTKASLKD...,Negative,
2,A0A0A6ZHE3,MVQIKFLFAFLAVMTIVVLAANMADADFLSGKFKGGCMMWSTEKCR...,Negative,
3,A0A7W8YT97,MFKQFLDKVDGNQGYLLSSLGIFMLFFLLVGILLLTMKKDDIKYMS...,Negative,
4,L1P496,MHGSICSSANYLPLPTLPALGRRGGSLVHGGFSVSETSFPPLRLGA...,Negative,
5,Q5Z0N8,MPVSATESSTTPTARPAIEGESPVVWPDPSPLTGWWERVMRGGPVD...,Negative,
6,A0A4V2REQ9,MGAKFEVFKDGRGEYRFRLKAPNGQIIASSEGYKSKDSALNGVASV...,Negative,
7,A0A375IAT7,MKKLIAALVVGLFATGAFAQAAAPAPAEPAAPAATKEAPKKTTKKK...,Negative,
8,A0A524LF58,MSGQGSTGNVIAALASFFIPGLGQLLQGRLLIAIVMFVLAAALWFI...,Negative,
9,A0A7Z0HL73,MTTPELTRREKRPAERYGERTARPEKLRNIEVWARSAPIRLAGYED...,Negative,


In [20]:
pd.set_option('display.max_rows',None)
inference('longhits.fasta', ensemble=False)

Unnamed: 0,Name,Seq,Predicted,KI (nM) Predicted
0,A0A699VUS0,THDDVDQENVVEETVDDVAQPTSPLPPSPSVPPSPPHQSPRSSPSQ...,Negative,
1,A0A7I9YXR2,MSAQDKVKNKIEDVSGKAKEALGKATNDPGVRDEGRGDQTKASLKD...,Negative,
2,A0A0A6ZHE3,MVQIKFLFAFLAVMTIVVLAANMADADFLSGKFKGGCMMWSTEKCR...,Negative,
3,A0A7W8YT97,MFKQFLDKVDGNQGYLLSSLGIFMLFFLLVGILLLTMKKDDIKYMS...,Negative,
4,L1P496,MHGSICSSANYLPLPTLPALGRRGGSLVHGGFSVSETSFPPLRLGA...,Negative,
5,Q5Z0N8,MPVSATESSTTPTARPAIEGESPVVWPDPSPLTGWWERVMRGGPVD...,Negative,
6,A0A4V2REQ9,MGAKFEVFKDGRGEYRFRLKAPNGQIIASSEGYKSKDSALNGVASV...,Negative,
7,A0A375IAT7,MKKLIAALVVGLFATGAFAQAAAPAPAEPAAPAATKEAPKKTTKKK...,Negative,
8,A0A524LF58,MSGQGSTGNVIAALASFFIPGLGQLLQGRLLIAIVMFVLAAALWFI...,Negative,
9,A0A7Z0HL73,MTTPELTRREKRPAERYGERTARPEKLRNIEVWARSAPIRLAGYED...,Negative,


In [21]:
inference('sorted_peptides.csv', ensemble=True)

Unnamed: 0,Name,Seq,Predicted,KI (nM) Predicted
0,DSGNESDGDTEELSTLLEMGPDNIWDNDDL,DSGNESDGDTEELSTLLEMGPDNIWDNDDL,Positive,3.063117
1,KTRGNESDGDTEELSTLLEMGPDNIWDNDDL,KTRGNESDGDTEELSTLLEMGPDNIWDNDDL,Positive,3.732421
2,RQGNESDGDTEELSTLLEMGPDNIWDNDDL,RQGNESDGDTEELSTLLEMGPDNIWDNDDL,Positive,1.317487
3,RSGNESDGDTEELSTLLEMGPDNIWDNDDL,RSGNESDGDTEELSTLLEMGPDNIWDNDDL,Positive,3.344986
4,DSGNESDGDTEELSTLLEMGPDNILDHDDL,DSGNESDGDTEELSTLLEMGPDNILDHDDL,Positive,1.077039
5,DSGNESDGDTEELSALLEMGPDNIWDNDDL,DSGNESDGDTEELSALLEMGPDNIWDNDDL,Positive,2.863599
6,DSGNESDGDTEELSTLLEMGPDNIWGNDDL,DSGNESDGDTEELSTLLEMGPDNIWGNDDL,Positive,3.112762
7,DSGNESDGDTEELSTLPEMGPDDIWNNDDL,DSGNESDGDTEELSTLPEMGPDDIWNNDDL,Positive,0.921109
8,KTVGNESDGDTEELSTLLEMGPDNIWDNDDL,KTVGNESDGDTEELSTLLEMGPDNIWDNDDL,Positive,3.664924
9,DSGNESDGDTEELSTLLEMGPDDILDHDDL,DSGNESDGDTEELSTLLEMGPDDILDHDDL,Positive,0.082409
