In [1]:
import itertools

In [4]:
def split_protein_sequence_classII(protein_sequence, min_length=13, max_length=25):
    """
    Splits a protein sequence into peptides suitable for Class II MHC binding, generating most possible overlapping peptides
    between the minimum and maximum length specified.

    Args:
        protein_sequence (str): The amino acid sequence of the protein.
        min_length (int): The minimum length of each peptide (default is 13).
        max_length (int): The maximum length of each peptide (default is 25).

    Returns:
        list: A list of peptide sequences.
    """
    peptides = []
    for length in range(min_length, max_length + 1):
        peptides.extend([protein_sequence[i:i+length] 
                         for i in range(0, len(protein_sequence) - length + 1)])
    return peptides

In [None]:
# Example usage
protein_sequence = "MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAG"

# Class II peptides
classII_peptides = split_protein_sequence_classII(protein_sequence)
print("Class II Peptides:", classII_peptides)

In [31]:
import requests
import pandas as pd
from io import StringIO
from itertools import product

IEDB_API_URL_CLASSII = "http://tools-cluster-interface.iedb.org/tools_api/mhcii/"


def run_netmhciipan_binding_affinity_classII(peptides, hla_alleles):
    """
    Uses IEDB API to generate binding affinity for each peptide and HLA interaction for Class II MHC.

    Args:
        peptides (list): A list of peptide sequences.
        hla_alleles (list): A list of HLA alleles for which to make predictions.

    Returns:
        dict: The binding affinity results for each peptide and HLA allele combination.
    """
    # Prepare sequence text in FASTA format and escape special characters
    sequence_text = "".join([f">peptide{i}\n{peptide}\n" for i, peptide in enumerate(peptides)])
    hla_alleles_str = ",".join(hla_alleles)

    # Prepare the payload for the POST request
    payload = {
        "method": "netmhciipan-4.1",
        "sequence_text": sequence_text,
        "allele": hla_alleles_str,
        "length": "asis"
    }

    # Make the POST request to IEDB API
    try:
        response = requests.post(IEDB_API_URL_CLASSII, data=payload)
        response.raise_for_status()
        return {"results": response.text}
    except requests.exceptions.RequestException as e:
        return {"error": str(e)}

In [29]:
# Example usage for Class II
peptides_classII = ["DPTIEDSYRKQVVID", "IQLIQNHFVDEYDPTIEDSYRKQ"]
hla_alleles_classII = ["HLA-DRB1*01:01", "HLA-DQA1*05:01/DQB1*02:01"]
result_classII = run_netmhciipan_binding_affinity_classII(peptides_classII, hla_alleles_classII)
print("Class II Results:", result_classII)

Class II Results: {'results': 'allele\tseq_num\tstart\tend\tlength\tcore_peptide\tpeptide\tic50\trank\nHLA-DQA1*05:01/DQB1*02:01\t2\t1\t23\t23\tYDPTIEDSY\tIQLIQNHFVDEYDPTIEDSYRKQ\t369.97\t3.7\nHLA-DRB1*01:01\t2\t1\t23\t23\tFVDEYDPTI\tIQLIQNHFVDEYDPTIEDSYRKQ\t1370.67\t84.0\nHLA-DQA1*05:01/DQB1*02:01\t1\t1\t15\t15\tIEDSYRKQV\tDPTIEDSYRKQVVID\t10064.76\t86.0\nHLA-DRB1*01:01\t1\t1\t15\t15\tIEDSYRKQV\tDPTIEDSYRKQVVID\t3987.26\t91.0\n'}


In [13]:
import pandas as pd
from io import StringIO

def process_classII_results(results_text):
    """
    Processes the Class II results returned by the IEDB API.

    Args:
        results_text (str): The raw results text returned by the API.

    Returns:
        DataFrame: A pandas DataFrame with processed results.
    """
    # Use StringIO to convert the string into a file-like object
    results_io = StringIO(results_text)
    # Read the results into a pandas DataFrame
    df = pd.read_csv(results_io, sep="\t")
    return df

# Process the results for Class II
if "results" in result_classII:
    df_classII_results = process_classII_results(result_classII["results"])
    print("Processed Class II Results:")
    print(df_classII_results)
else:
    print("Error in Class II Results:", result_classII["error"])

Processed Class II Results:
                      allele  seq_num  start  end  length core_peptide  \
0  HLA-DQA1*05:01/DQB1*02:01        2      1   23      23    YDPTIEDSY   
1             HLA-DRB1*01:01        2      1   23      23    FVDEYDPTI   
2  HLA-DQA1*05:01/DQB1*02:01        1      1   15      15    IEDSYRKQV   
3             HLA-DRB1*01:01        1      1   15      15    IEDSYRKQV   

                   peptide      ic50  rank  
0  IQLIQNHFVDEYDPTIEDSYRKQ    369.97   3.7  
1  IQLIQNHFVDEYDPTIEDSYRKQ   1370.67  84.0  
2          DPTIEDSYRKQVVID  10064.76  86.0  
3          DPTIEDSYRKQVVID   3987.26  91.0  


In [1]:
import itertools
import requests
import pandas as pd
from io import StringIO
import csv
import os

# Assuming the notebook is running from the T-cell directory, use the current directory
IEDB_API_URL_CLASSII = "http://tools-cluster-interface.iedb.org/tools_api/mhcii/"
CSV_FILE_CLASSII = "classII_peptides.csv"

# Step 1: Split Protein Sequence into Peptides
def split_protein_sequence_classII(protein_sequence, min_length=13, max_length=25):
    """
    Splits a protein sequence into peptides suitable for Class II MHC binding, generating most possible overlapping peptides
    between the minimum and maximum length specified.

    Args:
        protein_sequence (str): The amino acid sequence of the protein.
        min_length (int): The minimum length of each peptide (default is 13).
        max_length (int): The maximum length of each peptide (default is 25).

    Returns:
        list: A list of peptide sequences.
    """
    peptides = []
    for length in range(min_length, max_length + 1):
        peptides.extend([protein_sequence[i:i+length] 
                         for i in range(0, len(protein_sequence) - length + 1)])
    return peptides

# Step 2: Create Initial CSV File
def create_initial_csv_classII(peptides, csv_file=CSV_FILE_CLASSII):
    """
    Creates an initial CSV file with peptide sequences for Class II.

    Args:
        peptides (list): A list of peptide sequences.
        csv_file (str): The CSV file path to create.
    """
    columns = [
        "Peptide_Sequence", 
        "ClassII_TCR_Recognition", 
        "ClassII_MHC_Binding_Affinity",
        "ClassII_pMHC_Stability", 
        "Best_Binding_Affinity", 
        "Best_pMHC_Stability"
    ]
    initial_df = pd.DataFrame(peptides, columns=["Peptide_Sequence"])
    
    # Initialize other columns with empty strings and set dtype to 'string'
    for col in columns[1:]:
        initial_df[col] = ""
        initial_df[col] = initial_df[col].astype('string')  # Explicitly set dtype
    
    # Save the DataFrame to CSV with quoting for non-numeric fields
    initial_df.to_csv(csv_file, index=False, quoting=csv.QUOTE_NONNUMERIC)
    print(f"Initial CSV file '{csv_file}' created with {len(peptides)} peptides.")

# Step 3: Run NetMHCII Binding Affinity Predictions
def run_netmhciipan_binding_affinity_classII(peptides, hla_alleles):
    """
    Uses IEDB API to generate binding affinity for each peptide and HLA interaction for Class II MHC.

    Args:
        peptides (list): A list of peptide sequences.
        hla_alleles (list): A list of HLA alleles for which to make predictions.

    Returns:
        dict: The binding affinity results for each peptide and HLA allele combination.
    """
    # Prepare sequence text in FASTA format
    sequence_text = "".join([f">peptide{i}\n{peptide}\n" for i, peptide in enumerate(peptides)])
    hla_alleles_str = ",".join(hla_alleles)

    # Prepare the payload for the POST request
    payload = {
        "method": "netmhciipan-4.1",
        "sequence_text": sequence_text,
        "allele": hla_alleles_str,
        "length": "asis"
    }

    # Make the POST request to IEDB API
    try:
        response = requests.post(IEDB_API_URL_CLASSII, data=payload)
        response.raise_for_status()
        return {"results": response.text}
    except requests.exceptions.RequestException as e:
        return {"error": str(e)}

# Step 4: Process Class II Results
def process_classII_results(results_text):
    """
    Processes the Class II results returned by the IEDB API.

    Args:
        results_text (str): The raw results text returned by the API.

    Returns:
        DataFrame: A pandas DataFrame with processed results.
    """
    # Use StringIO to convert the string into a file-like object
    results_io = StringIO(results_text)
    # Read the results into a pandas DataFrame
    df = pd.read_csv(results_io, sep="\t")
    return df

# Step 5: Update CSV with Class II Results
def update_csv_with_classII_results(df, csv_file=CSV_FILE_CLASSII):
    """
    Updates the CSV file with Class II MHC binding results.

    Args:
        df (DataFrame): A pandas DataFrame with processed results.
        csv_file (str): The CSV file path to update.
    """
    if not os.path.exists(csv_file):
        raise FileNotFoundError(f"The CSV file {csv_file} does not exist. Please create it first.")
    
    # Read existing CSV with all columns as strings
    existing_df = pd.read_csv(csv_file, dtype=str)
    
    # Iterate through each row in the new results and update the CSV
    for _, row in df.iterrows():
        peptide = row['peptide']
        allele = row['allele']
        ic50 = row['ic50']
        pMHC_stability = row.get('pMHC_stability', "")  # Assuming stability information is included

        # Find the existing entry for the peptide
        if peptide in existing_df["Peptide_Sequence"].values:
            idx = existing_df.index[existing_df["Peptide_Sequence"] == peptide][0]

            # Update the ClassII_MHC_Binding_Affinity
            affinity_str = f"{allele}={ic50} nM"
            existing_affinity = existing_df.at[idx, "ClassII_MHC_Binding_Affinity"]
            if pd.isna(existing_affinity) or existing_affinity == "" or existing_affinity is None:
                existing_df.at[idx, "ClassII_MHC_Binding_Affinity"] = affinity_str
            else:
                existing_df.at[idx, "ClassII_MHC_Binding_Affinity"] = f"{existing_affinity}|{affinity_str}"
            
            # Update the ClassII_pMHC_Stability
            if pMHC_stability:
                stability_str = f"{allele}={pMHC_stability} hrs"
                existing_stability = existing_df.at[idx, "ClassII_pMHC_Stability"]
                if pd.isna(existing_stability) or existing_stability == "" or existing_stability is None:
                    existing_df.at[idx, "ClassII_pMHC_Stability"] = stability_str
                else:
                    existing_df.at[idx, "ClassII_pMHC_Stability"] = f"{existing_stability}|{stability_str}"

            # Update the Best_Binding_Affinity
            existing_affinities = existing_df.at[idx, "ClassII_MHC_Binding_Affinity"].split('|')
            best_affinity = min(existing_affinities, key=lambda x: float(x.split('=')[1].replace(' nM', '')))
            existing_df.at[idx, "Best_Binding_Affinity"] = best_affinity

            # Update the Best_pMHC_Stability (assuming the best stability is linked to the best affinity)
            if pMHC_stability:
                stability_info = [stab for stab in existing_df.at[idx, "ClassII_pMHC_Stability"].split('|') if stab.startswith(best_affinity.split('=')[0])]
                if stability_info:
                    existing_df.at[idx, "Best_pMHC_Stability"] = stability_info[0]

    # Save the updated DataFrame back to CSV
    existing_df.to_csv(csv_file, index=False, quoting=csv.QUOTE_NONNUMERIC)
    print(f"CSV file '{csv_file}' has been updated with Class II binding affinity results.")

# Main function for execution
if __name__ == "__main__":
    # Example usage
    protein_sequence = "MTEYKLVVVGAGGVGKSALT"

    # Step 1: Split the protein sequence into peptides for Class II
    classII_peptides = split_protein_sequence_classII(protein_sequence)
    print(f"Generated {len(classII_peptides)} Class II peptides.")

    # Step 2: Create initial CSV file for Class II
    create_initial_csv_classII(classII_peptides)

    # Step 3: Define HLA alleles for Class II
    hla_alleles_classII = ["HLA-DRB1*01:01", "HLA-DQA1*05:01/DQB1*02:01"]

    # Step 4: Run NetMHCIIpan binding affinity predictions for Class II
    result_classII = run_netmhciipan_binding_affinity_classII(classII_peptides, hla_alleles_classII)
    if "results" in result_classII:
        print("Successfully retrieved Class II binding affinity results.")
        # Step 5: Process the results
        df_classII_results = process_classII_results(result_classII["results"])
        print("Processed Class II Results:")
        print(df_classII_results.head())
        
        # Step 6: Update the CSV with results
        update_csv_with_classII_results(df_classII_results)
        print(f"CSV file '{CSV_FILE_CLASSII}' has been updated with Class II binding affinity results.")
    else:
        print("Error in Class II Results:", result_classII["error"])


Generated 36 Class II peptides.
Initial CSV file 'classII_peptides.csv' created with 36 peptides.


KeyboardInterrupt: 