## Install Conda

In [1]:
# !pip install -q condacolab
# import condacolab
# condacolab.install()

In [2]:
# !conda -V

## Import input data

In [3]:
import pandas as pd

dataset = pd.read_csv("../data/example/Sequence_Info.csv")  ## read your csv file

In [4]:
dataset

Unnamed: 0,Name,VH,VL
0,abituzumab,QVQLQQSGGELAKPGASVKVSCKASGYTFSSFWMHWVRQAPGQGLE...,DIQMTQSPSSLSASVGDRVTITCRASQDISNYLAWYQQKPGKAPKL...
1,abrilumab,QVQLVQSGAEVKKPGASVKVSCKVSGYTLSDLSIHWVRQAPGKGLE...,DIQMTQSPSSVSASVGDRVTITCRASQGISSWLAWYQQKPGKAPKL...
2,adalimumab,EVQLVESGGGLVQPGRSLRLSCAASGFTFDDYAMHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQGIRNYLAWYQQKPGKAPKL...
3,alemtuzumab,QVQLQESGPGLVRPSQTLSLTCTVSGFTFTDFYMNWVRQPPGRGLE...,DIQMTQSPSSLSASVGDRVTITCKASQNIDKYLNWYQQKPGKAPKL...
4,alirocumab,EVQLVESGGGLVQPGGSLRLSCAASGFTFNNYAMNWVRQAPGKGLD...,DIVMTQSPDSLAVSLGERATINCKSSQSVLYRSNNRNFLGWYQQKP...
...,...,...,...
132,vedolizumab,QVQLVQSGAEVKKPGASVKVSCKGSGYTFTSYWMHWVRQAPGQRLE...,DVVMTQSPLSLPVTPGEPASISCRSSQSLAKSYGNTYLSWYLQKPG...
133,veltuzumab,QVQLQQSGAEVKKPGSSVKVSCKASGYTFTSYNMHWVKQAPGQGLE...,DIQLTQSPSSLSASVGDRVTMTCRASSSVSYIHWFQQKPGKAPKPW...
134,visilizumab,QVQLVQSGAEVKKPGASVKVSCKASGYTFISYTMHWVRQAPGQGLE...,DIQMTQSPSSLSASVGDRVTITCSASSSVSYMNWYQQKPGKAPKRL...
135,zalutumumab,QVQLVESGGGVVQPGRSLRLSCAASGFTFSTYGMHWVRQAPGKGLE...,AIQLTQSPSSLSASVGDRVTITCRASQDISSALVWYQQKPGKAPKL...


## Install ANARCI for Sequence alignment

https://github.com/oxpig/ANARCI

In [5]:
# !conda install -c bioconda anarci

### Generate a FASTA file

In [6]:
name = dataset["Name"].to_list()
Heavy_seq = dataset["VH"].to_list()
Light_seq = dataset["VL"].to_list()

In [7]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

file_out = "../data/example/seq_H.fasta"

with open(file_out, "w") as output_handle:
    for i in range(len(name)):
        seq_name = name[i]
        seq = Heavy_seq[i]
        record = SeqRecord(
            Seq(seq),
            id=seq_name,
            name="",
            description="",
        )
        SeqIO.write(record, output_handle, "fasta")

file_out = "../data/example/seq_L.fasta"

with open(file_out, "w") as output_handle:
    for i in range(len(name)):
        seq_name = name[i]
        seq = Light_seq[i]
        record = SeqRecord(
            Seq(seq),
            id=seq_name,
            name="",
            description="",
        )
        SeqIO.write(record, output_handle, "fasta")

### Sequence alignment using ANARCI
Do it for heavy and light chain seperately.

In [8]:
!ANARCI -i ../data/example/seq_H.fasta -o ../data/example/seq_aligned -s imgt -r heavy --csv

  ANARCI                                                 \\\    //


In [9]:
!ANARCI -i ../data/example/seq_L.fasta -o ../data/example/seq_aligned -s imgt -r light --csv

  ANARCI                                                 \\\    //


## Combine aligned heavy chain and light chain sequences

In [10]:
H_aligned = pd.read_csv("../data/example/seq_aligned_H.csv")
L_aligned = pd.read_csv("../data/example/seq_aligned_KL.csv")

In [11]:
# https://github.com/Lailabcode/DeepSCM/blob/main/deepscm-master/seq_preprocessing.py


def seq_preprocessing():
    infile_H = pd.read_csv("../data/example/seq_aligned_H.csv")
    infile_L = pd.read_csv("../data/example/seq_aligned_KL.csv")
    outfile = open("../data/example/seq_aligned_HL.txt", "w")

    H_inclusion_list = [
        "1",
        "2",
        "3",
        "4",
        "5",
        "6",
        "7",
        "8",
        "9",
        "10",
        "11",
        "12",
        "13",
        "14",
        "15",
        "16",
        "17",
        "18",
        "19",
        "20",
        "21",
        "22",
        "23",
        "24",
        "25",
        "26",
        "27",
        "28",
        "29",
        "30",
        "31",
        "32",
        "33",
        "34",
        "35",
        "36",
        "37",
        "38",
        "39",
        "40",
        "41",
        "42",
        "43",
        "44",
        "45",
        "46",
        "47",
        "48",
        "49",
        "50",
        "51",
        "52",
        "53",
        "54",
        "55",
        "56",
        "57",
        "58",
        "59",
        "60",
        "61",
        "62",
        "63",
        "64",
        "65",
        "66",
        "67",
        "68",
        "69",
        "70",
        "71",
        "72",
        "73",
        "74",
        "75",
        "76",
        "77",
        "78",
        "79",
        "80",
        "81",
        "82",
        "83",
        "84",
        "85",
        "86",
        "87",
        "88",
        "89",
        "90",
        "91",
        "92",
        "93",
        "94",
        "95",
        "96",
        "97",
        "98",
        "99",
        "100",
        "101",
        "102",
        "103",
        "104",
        "105",
        "106",
        "107",
        "108",
        "109",
        "110",
        "111",
        "111A",
        "111B",
        "111C",
        "111D",
        "111E",
        "111F",
        "111G",
        "111H",
        "112I",
        "112H",
        "112G",
        "112F",
        "112E",
        "112D",
        "112C",
        "112B",
        "112A",
        "112",
        "113",
        "114",
        "115",
        "116",
        "117",
        "118",
        "119",
        "120",
        "121",
        "122",
        "123",
        "124",
        "125",
        "126",
        "127",
        "128",
    ]

    L_inclusion_list = [
        "1",
        "2",
        "3",
        "4",
        "5",
        "6",
        "7",
        "8",
        "9",
        "10",
        "11",
        "12",
        "13",
        "14",
        "15",
        "16",
        "17",
        "18",
        "19",
        "20",
        "21",
        "22",
        "23",
        "24",
        "25",
        "26",
        "27",
        "28",
        "29",
        "30",
        "31",
        "32",
        "33",
        "34",
        "35",
        "36",
        "37",
        "38",
        "39",
        "40",
        "41",
        "42",
        "43",
        "44",
        "45",
        "46",
        "47",
        "48",
        "49",
        "50",
        "51",
        "52",
        "53",
        "54",
        "55",
        "56",
        "57",
        "58",
        "59",
        "60",
        "61",
        "62",
        "63",
        "64",
        "65",
        "66",
        "67",
        "68",
        "69",
        "70",
        "71",
        "72",
        "73",
        "74",
        "75",
        "76",
        "77",
        "78",
        "79",
        "80",
        "81",
        "82",
        "83",
        "84",
        "85",
        "86",
        "87",
        "88",
        "89",
        "90",
        "91",
        "92",
        "93",
        "94",
        "95",
        "96",
        "97",
        "98",
        "99",
        "100",
        "101",
        "102",
        "103",
        "104",
        "105",
        "106",
        "107",
        "108",
        "109",
        "110",
        "111",
        "112",
        "113",
        "114",
        "115",
        "116",
        "117",
        "118",
        "119",
        "120",
        "121",
        "122",
        "123",
        "124",
        "125",
        "126",
        "127",
    ]

    H_dict = {
        "1": 0,
        "2": 1,
        "3": 2,
        "4": 3,
        "5": 4,
        "6": 5,
        "7": 6,
        "8": 7,
        "9": 8,
        "10": 9,
        "11": 10,
        "12": 11,
        "13": 12,
        "14": 13,
        "15": 14,
        "16": 15,
        "17": 16,
        "18": 17,
        "19": 18,
        "20": 19,
        "21": 20,
        "22": 21,
        "23": 22,
        "24": 23,
        "25": 24,
        "26": 25,
        "27": 26,
        "28": 27,
        "29": 28,
        "30": 29,
        "31": 30,
        "32": 31,
        "33": 32,
        "34": 33,
        "35": 34,
        "36": 35,
        "37": 36,
        "38": 37,
        "39": 38,
        "40": 39,
        "41": 40,
        "42": 41,
        "43": 42,
        "44": 43,
        "45": 44,
        "46": 45,
        "47": 46,
        "48": 47,
        "49": 48,
        "50": 49,
        "51": 50,
        "52": 51,
        "53": 52,
        "54": 53,
        "55": 54,
        "56": 55,
        "57": 56,
        "58": 57,
        "59": 58,
        "60": 59,
        "61": 60,
        "62": 61,
        "63": 62,
        "64": 63,
        "65": 64,
        "66": 65,
        "67": 66,
        "68": 67,
        "69": 68,
        "70": 69,
        "71": 70,
        "72": 71,
        "73": 72,
        "74": 73,
        "75": 74,
        "76": 75,
        "77": 76,
        "78": 77,
        "79": 78,
        "80": 79,
        "81": 80,
        "82": 81,
        "83": 82,
        "84": 83,
        "85": 84,
        "86": 85,
        "87": 86,
        "88": 87,
        "89": 88,
        "90": 89,
        "91": 90,
        "92": 91,
        "93": 92,
        "94": 93,
        "95": 94,
        "96": 95,
        "97": 96,
        "98": 97,
        "99": 98,
        "100": 99,
        "101": 100,
        "102": 101,
        "103": 102,
        "104": 103,
        "105": 104,
        "106": 105,
        "107": 106,
        "108": 107,
        "109": 108,
        "110": 109,
        "111": 110,
        "111A": 111,
        "111B": 112,
        "111C": 113,
        "111D": 114,
        "111E": 115,
        "111F": 116,
        "111G": 117,
        "111H": 118,
        "112I": 119,
        "112H": 120,
        "112G": 121,
        "112F": 122,
        "112E": 123,
        "112D": 124,
        "112C": 125,
        "112B": 126,
        "112A": 127,
        "112": 128,
        "113": 129,
        "114": 130,
        "115": 131,
        "116": 132,
        "117": 133,
        "118": 134,
        "119": 135,
        "120": 136,
        "121": 137,
        "122": 138,
        "123": 139,
        "124": 140,
        "125": 141,
        "126": 142,
        "127": 143,
        "128": 144,
    }

    L_dict = {
        "1": 0,
        "2": 1,
        "3": 2,
        "4": 3,
        "5": 4,
        "6": 5,
        "7": 6,
        "8": 7,
        "9": 8,
        "10": 9,
        "11": 10,
        "12": 11,
        "13": 12,
        "14": 13,
        "15": 14,
        "16": 15,
        "17": 16,
        "18": 17,
        "19": 18,
        "20": 19,
        "21": 20,
        "22": 21,
        "23": 22,
        "24": 23,
        "25": 24,
        "26": 25,
        "27": 26,
        "28": 27,
        "29": 28,
        "30": 29,
        "31": 30,
        "32": 31,
        "33": 32,
        "34": 33,
        "35": 34,
        "36": 35,
        "37": 36,
        "38": 37,
        "39": 38,
        "40": 39,
        "41": 40,
        "42": 41,
        "43": 42,
        "44": 43,
        "45": 44,
        "46": 45,
        "47": 46,
        "48": 47,
        "49": 48,
        "50": 49,
        "51": 50,
        "52": 51,
        "53": 52,
        "54": 53,
        "55": 54,
        "56": 55,
        "57": 56,
        "58": 57,
        "59": 58,
        "60": 59,
        "61": 60,
        "62": 61,
        "63": 62,
        "64": 63,
        "65": 64,
        "66": 65,
        "67": 66,
        "68": 67,
        "69": 68,
        "70": 69,
        "71": 70,
        "72": 71,
        "73": 72,
        "74": 73,
        "75": 74,
        "76": 75,
        "77": 76,
        "78": 77,
        "79": 78,
        "80": 79,
        "81": 80,
        "82": 81,
        "83": 82,
        "84": 83,
        "85": 84,
        "86": 85,
        "87": 86,
        "88": 87,
        "89": 88,
        "90": 89,
        "91": 90,
        "92": 91,
        "93": 92,
        "94": 93,
        "95": 94,
        "96": 95,
        "97": 96,
        "98": 97,
        "99": 98,
        "100": 99,
        "101": 100,
        "102": 101,
        "103": 102,
        "104": 103,
        "105": 104,
        "106": 105,
        "107": 106,
        "108": 107,
        "109": 108,
        "110": 109,
        "111": 110,
        "112": 111,
        "113": 112,
        "114": 113,
        "115": 114,
        "116": 115,
        "117": 116,
        "118": 117,
        "119": 118,
        "120": 119,
        "121": 120,
        "122": 121,
        "123": 122,
        "124": 123,
        "125": 124,
        "126": 125,
        "127": 126,
        "128": 127,
    }

    N_mAbs = len(infile_H["Id"])

    for i in range(N_mAbs):
        H_tmp = 145 * ["-"]
        L_tmp = 127 * ["-"]
        for col in infile_H.columns:
            if col in H_inclusion_list:
                H_tmp[H_dict[col]] = infile_H.iloc[i][col]
        for col in infile_L.columns:
            if col in L_inclusion_list:
                L_tmp[L_dict[col]] = infile_L.iloc[i][col]

        aa_string = ""
        for aa in H_tmp + L_tmp:
            aa_string += aa
        outfile.write(infile_H.iloc[i, 0] + " " + aa_string)
        outfile.write("\n")

    outfile.close()
    return

In [12]:
seq_preprocessing()

## DeepSP

In [13]:
from keras.models import model_from_json
import numpy as np

2025-08-21 13:33:16.478684: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-21 13:33:16.497164: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [14]:
def load_pred_data(filename):
    name_list = []
    seq_list = []
    with open(filename) as datafile:
        for line in datafile:
            line = line.strip().split()
            name_list.append(line[0])
            seq_list.append(line[1])
    return name_list, seq_list


def one_hot_encoder(s):
    d = {
        "A": 0,
        "C": 1,
        "D": 2,
        "E": 3,
        "F": 4,
        "G": 5,
        "H": 6,
        "I": 7,
        "K": 8,
        "L": 9,
        "M": 10,
        "N": 11,
        "P": 12,
        "Q": 13,
        "R": 14,
        "S": 15,
        "T": 16,
        "V": 17,
        "W": 18,
        "Y": 19,
        "-": 20,
    }

    x = np.zeros((len(d), len(s)))
    x[[d[c] for c in s], range(len(s))] = 1

    return x

In [15]:
# 필요한 함수와 클래스를 tensorflow.keras에서 가져옵니다.
from tensorflow.keras.models import model_from_json, Sequential

name_list, seq_list = load_pred_data("../data/example/seq_aligned_HL.txt")

for prop in ["SAPpos", "SCMpos", "SCMneg"]:
    json_file = open("../models/Conv1D_regression" + prop + ".json", "r")
    loaded_model_json = json_file.read()
    json_file.close()

    # ⭐️ 수정된 부분: custom_objects 인자를 추가하여 'Sequential' 클래스를 직접 지정합니다.
    loaded_model = model_from_json(
        loaded_model_json, custom_objects={"Sequential": Sequential}
    )

    # load weights into model
    loaded_model.load_weights("../models/Conv1D_regression_" + prop + ".h5")

    X = [one_hot_encoder(s=x) for x in seq_list]
    X = np.transpose(np.asarray(X), (0, 2, 1))
    X = np.asarray(X)

    loaded_model.compile(optimizer="adam", loss="mae", metrics=["mae"])
    y_pred = loaded_model.predict(X)

    outfile = open("../data/example/pnas_deep" + prop + ".csv", "w")

    for i in range(len(y_pred)):
        outfile.write(
            "%s %s" % (name_list[i], " ".join(["%.2f" % num for num in y_pred[i]]))
        )
        outfile.write("\n")

    outfile.close()

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step


In [16]:
df_pnas_SAPpos = pd.read_csv(
    "../data/example/pnas_deepSAPpos.csv", delimiter=" ", header=None
)
df_pnas_SAPpos.columns = [
    "Name",
    "SAP_pos_CDRH1",
    "SAP_pos_CDRH2",
    "SAP_pos_CDRH3",
    "SAP_pos_CDRL1",
    "SAP_pos_CDRL2",
    "SAP_pos_CDRL3",
    "SAP_pos_CDR",
    "SAP_pos_Hv",
    "SAP_pos_Lv",
    "SAP_pos_Fv",
]

df_pnas_SCMpos = pd.read_csv(
    "../data/example/pnas_deepSCMpos.csv", delimiter=" ", header=None
)
df_pnas_SCMpos.columns = [
    "Name",
    "SCM_pos_CDRH1",
    "SCM_pos_CDRH2",
    "SCM_pos_CDRH3",
    "SCM_pos_CDRL1",
    "SCM_pos_CDRL2",
    "SCM_pos_CDRL3",
    "SCM_pos_CDR",
    "SCM_pos_Hv",
    "SCM_pos_Lv",
    "SCM_pos_Fv",
]

df_pnas_SCMneg = pd.read_csv(
    "../data/example/pnas_deepSCMneg.csv", delimiter=" ", header=None
)
df_pnas_SCMneg.columns = [
    "Name",
    "SCM_neg_CDRH1",
    "SCM_neg_CDRH2",
    "SCM_neg_CDRH3",
    "SCM_neg_CDRL1",
    "SCM_neg_CDRL2",
    "SCM_neg_CDRL3",
    "SCM_neg_CDR",
    "SCM_neg_Hv",
    "SCM_neg_Lv",
    "SCM_neg_Fv",
]


merged_df = df_pnas_SAPpos.merge(df_pnas_SCMneg, on="Name", how="inner")
merged_df = merged_df.merge(df_pnas_SCMpos, on="Name", how="inner")
df_pnas_DL = merged_df

# Get the list of antibodies from df_pnas_MD #since not all antibodies made it to the final MD simulation stage
common_antibodies = dataset["Name"].tolist()

# Filter rows in df_pnas_DL based on the antibodies in df_pnas_MD
df_pnas_DL = df_pnas_DL[df_pnas_DL["Name"].isin(common_antibodies)]

df_pnas_DL.to_csv("../data/example/SAPSCM.csv", index=False)
df_pnas_DL

Unnamed: 0,Name,SAP_pos_CDRH1,SAP_pos_CDRH2,SAP_pos_CDRH3,SAP_pos_CDRL1,SAP_pos_CDRL2,SAP_pos_CDRL3,SAP_pos_CDR,SAP_pos_Hv,SAP_pos_Lv,...,SCM_pos_CDRH1,SCM_pos_CDRH2,SCM_pos_CDRH3,SCM_pos_CDRL1,SCM_pos_CDRL2,SCM_pos_CDRL3,SCM_pos_CDR,SCM_pos_Hv,SCM_pos_Lv,SCM_pos_Fv
0,abituzumab,3.86,4.35,8.72,2.15,4.70,6.79,30.68,45.53,36.43,...,33.36,50.21,58.55,37.38,99.53,19.15,301.27,877.35,1176.29,2044.64
1,abrilumab,3.42,1.08,1.83,2.09,2.40,6.01,17.93,43.01,31.23,...,3.42,-1.28,7.43,36.73,8.36,19.31,67.66,919.62,941.50,1849.14
2,adalimumab,2.13,2.52,14.45,1.90,3.59,3.17,27.50,58.42,30.52,...,3.18,19.58,29.51,116.77,41.76,55.55,263.84,907.11,1219.44,2109.09
3,alemtuzumab,2.27,3.70,5.04,2.22,2.76,3.33,20.66,52.66,32.29,...,31.13,109.97,113.78,59.37,59.35,156.55,539.40,1461.24,1302.77,2748.46
4,alirocumab,2.34,0.49,5.75,6.34,2.48,4.47,23.01,52.35,43.46,...,87.39,30.73,22.05,163.36,64.86,19.05,393.55,1270.47,966.63,2228.18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,vedolizumab,2.81,0.27,11.76,5.93,2.96,2.82,27.31,51.97,51.45,...,30.37,-3.82,4.03,147.94,50.61,64.96,289.12,982.95,1155.90,2126.85
133,veltuzumab,2.51,4.15,14.46,3.16,4.07,2.54,31.23,43.67,37.64,...,34.90,0.02,39.57,32.31,35.55,30.64,166.97,1078.54,1059.89,2115.08
134,visilizumab,6.16,3.60,15.23,2.19,3.86,2.65,34.40,57.11,32.81,...,100.69,106.18,75.53,-2.44,69.55,18.26,372.93,1269.88,1078.14,2312.87
135,zalutumumab,1.99,5.14,18.61,0.97,2.46,5.58,34.69,63.19,32.75,...,29.38,5.33,43.21,1.04,2.67,29.45,108.28,1160.79,941.98,2081.37
