# Binding Prediction via API

http://tools.iedb.org/main/tools-api/

## MHC I

In [1]:
import requests
import csv
import pandas as pd
import os

ModuleNotFoundError: No module named 'pandas'

In [89]:
# HLA allele reference set (27 alleles long)
hla_ref_set_mhc1 = ['HLA-A*01:01','HLA-A*02:01','HLA-A*02:03','HLA-A*02:06','HLA-A*03:01','HLA-A*11:01','HLA-A*23:01','HLA-A*24:02','HLA-A*26:01','HLA-A*30:01','HLA-A*30:02','HLA-A*31:01','HLA-A*32:01','HLA-A*33:01','HLA-A*68:01','HLA-A*68:02','HLA-B*07:02','HLA-B*08:01','HLA-B*15:01','HLA-B*35:01','HLA-B*40:01','HLA-B*44:02','HLA-B*44:03','HLA-B*51:01','HLA-B*53:01','HLA-B*57:01','HLA-B*58:01']



In [90]:
def read_data(filename):
    
    # read in single sequence
    with open(filename, mode='r') as read_file:
        return read_file.readline().strip('\n')
    


    
def build_alleles_and_length_strings(alleles,lengths):
    
    a_list = list()
    l_list = list()
    
    for l in lengths:
        a_list += alleles # create a copy of the alleles for each length
        l_list += [str(l) for i in range(len(alleles))] # generate a length for each allele in the copy
        
    # concatenate elements in each list with comma separator
    joined_a = ",".join(a_list)
    joined_l = ",".join(l_list)
    
    return (joined_a,joined_l)   

    
def predict_mhc1(a_set,l_set,sequence):
    '''
    Note that the default value for species is human.
    '''
    
    # define post
    data = {'method':'recommended',
            'allele':a_set,
            'length':l_set,
            'sequence_text':sequence}

    site = 'http://tools-cluster-interface.iedb.org/tools_api/mhci/'  

    # perform prediction
    return requests.post(site, data=data)


def save_result(response, filename):
    
    with open(filename, mode='w') as write_file:
        file_writer = csv.writer(write_file, delimiter=',')

        for each_row in response.text.split('\n'):
            file_writer.writerow(each_row.split('\t'))

In [None]:
# get list of filenames to process
files = os.listdir('data/ConservedRegion')
print(files)





# define parameters for this prediction



#pd.read_csv(filename)

In [91]:
# define parameters for this prediction
read_file = 'data/Binding_Prediction/mhc1_test1/mhc1_test1_api_seq.txt'
write_file = 'data/Binding_Prediction/mhc1_test1/mhc1_test1_api_result.csv'
allele_list = hla_ref_set_mhc1
lengths_list = [9,10]

# perform prediction for this sequence
seq = read_data(read_file)
a_str,l_str = build_alleles_and_length_strings(allele_list, lengths_list)
r1 = predict_mhc1(a_str,l_str,seq)
save_result(r1, write_file)

In [50]:
# combine results into single csv?

## MHC II

In [95]:
# HLA allele reference set
hla_ref_set_mhc2 = ['HLA-DRB1*01:01','HLA-DRB1*03:01','HLA-DRB1*04:01','HLA-DRB1*04:05','HLA-DRB1*07:01','HLA-DRB1*08:02','HLA-DRB1*09:01','HLA-DRB1*11:01','HLA-DRB1*12:01','HLA-DRB1*13:02','HLA-DRB1*15:01','HLA-DRB3*01:01','HLA-DRB3*02:02','HLA-DRB4*01:01','HLA-DRB5*01:01','HLA-DQA1*05:01/DQB1*02:01','HLA-DQA1*05:01/DQB1*03:01','HLA-DQA1*03:01/DQB1*03:02','HLA-DQA1*04:01/DQB1*04:02','HLA-DQA1*01:01/DQB1*05:01','HLA-DQA1*01:02/DQB1*06:02','HLA-DPA1*02:01/DPB1*01:01','HLA-DPA1*01:03/DPB1*02:01','HLA-DPA1*01:03/DPB1*04:01','HLA-DPA1*03:01/DPB1*04:02','HLA-DPA1*02:01/DPB1*05:01','HLA-DPA1*02:01/DPB1*14:01']


In [96]:
def predict_mhc2(a_set,l_set,sequence):
    '''
    API doesn't appear to allow us to set species/locus.
    '''
    
    # define post
    data = {'method':'recommended',
            'allele':a_set,
            'length':l_set,
            'sequence_text':sequence}

    site = 'http://tools-cluster-interface.iedb.org/tools_api/mhcii/'  

    # perform prediction
    return requests.post(site, data=data)

In [99]:
# define parameters for this prediction
read_file = 'data/Binding_Prediction/mhc2_test2/mhc2_test2_api_seq.txt'
write_file = 'data/Binding_Prediction/mhc2_test2/mhc2_test2_api_result.csv'
allele_list = hla_ref_set_mhc2
lengths_list = [15,16]

# perform prediction for this sequence
seq = read_data(read_file)
a_str,l_str = build_alleles_and_length_strings(allele_list, lengths_list)
r2 = predict_mhc2(a_str,l_str,seq)
save_result(r2, write_file)