#### Setting environments

In [1]:
import torch, os, gc
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import confusion_matrix, matthews_corrcoef, accuracy_score,\
f1_score, precision_score, recall_score, roc_auc_score, average_precision_score

In [2]:
# Set options
embed_ver = ["clstm", "esm2", "bert", "t5"]
test_ver = ["C018", "C039", "C035", "D011", "C048"]

# model options
data_path = "../data/test_exam/"
model_path = f"../models/cls/"
result_path = f"../results/"
os.makedirs(result_path + "prd-indiv/", exist_ok=True)

col_str = ['file_id', 'organism', 'locus_tag', 'ess']

batch_size = 256

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# Define function to record perfomance result
def record_perform(emb_ver, file_id, organ, y_real, y_conf, y_prd):    
    if file_id != "O046":
        auc_roc = [roc_auc_score(y_real, y_conf)]
        auc_pr = [average_precision_score(y_real, y_conf)]
    else:
        auc_roc = None
        auc_pr = None
    
    tn, fp, fn, tp = confusion_matrix(y_real, y_prd).ravel()
    
    result = pd.DataFrame({
        "emb": [emb_ver],
        "file": [file_id],
        "organism": [organ],
        "tp": [tp],
        "fp": [fp],
        "tn": [tn],
        "fn": [fn],
        "mcc": [matthews_corrcoef(y_real, y_prd)],
        "acc": [accuracy_score(y_real, y_prd)],
        "f1": [f1_score(y_real, y_prd)],
        "prc": [precision_score(y_real, y_prd)],
        "rec": [recall_score(y_real, y_prd)],
        "npv": [precision_score(1 - y_real, 1 - y_prd)],
        "tnr": [recall_score(1 - y_real, 1 - y_prd)],
        "auc-roc": auc_roc,
        "auc-pr": auc_pr
    })

    return result


In [5]:
# Set model architecture
class Classifier(nn.Module):
    def __init__(self, input_size, num_layers, unit_decrease):
        super(Classifier, self).__init__()
        layers = [nn.BatchNorm1d(input_size), nn.Dropout(0.5)]
        in_dim = input_size
        out_dim = 1024
        for i in range(num_layers):            
            out_dim = max(2, out_dim // unit_decrease)
            layers.append(nn.Linear(in_dim, out_dim))
            self.initialize_weights(layers[-1])
            layers.append(nn.GELU())
            in_dim = out_dim
        layers.append(nn.Linear(out_dim, 1))
        self.cls_block = nn.Sequential(*layers)
        
    def initialize_weights(self, layer):
        nn.init.kaiming_normal_(layer.weight, mode='fan_in', nonlinearity='linear')
        if layer.bias is not None:
            nn.init.zeros_(layer.bias)
    
    def forward(self, x):
        return self.cls_block(x)

#### Evaluate model

In [6]:
df_eval = pd.DataFrame()

for ver in embed_ver:
    print(f"\n>>>> {ver} <<<<")

    total_label = []
    total_conf = []
    total_cls = []
    for ts_ver in test_ver:    
        # load dataset
        data = pd.read_csv(data_path + f"data-emb_{ver}-{ts_ver}_ts.csv")
        display("Raw data:", data)

        #### Preprocess for test dataset ####
        col_num = [col for col in data.columns if col not in col_str]

        # split info.& inputs & labels of the test datasets
        info_ts = data[col_str]
        X_ts = torch.tensor(data[col_num].astype('float32').values)
        y_ts = torch.tensor(data['ess'].astype('float32').values)
        print(f"Splited test dataset({ts_ver}):", X_ts.shape, y_ts.shape)
        
        # generate dataloader by the test datasets
        dataset_ts = TensorDataset(X_ts, y_ts)
        test_loader = DataLoader(dataset_ts, batch_size=batch_size, shuffle=False)

        ## Test model ##    
        # generate model instance
        model = Classifier(
            input_size=X_ts.shape[-1],
            num_layers=3,
            unit_decrease=2
        ).to(device)

        # load model weight
        model.load_state_dict(torch.load(f"{model_path}{ver}-{ts_ver}.pt", map_location=device))
        model.eval()

        ## model evaluations by test dataset ##
        results = []
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                # prediction
                preds = model(X_batch).view(-1).cpu()
                # gather the predictions
                results.append(preds)
            
        results = torch.cat(results, dim=0)
        
        # convert logits to confidences & classes
        prd_conf = torch.sigmoid(results)
        prd_cls = (prd_conf >= 0.5).int().tolist()

        # save the model prediction result
        df_pred = pd.concat([info_ts, pd.DataFrame({"conf": prd_conf.tolist()})], axis=1)
        df_pred.to_csv(f"{result_path}prd-indiv/{ver}-{ts_ver}.csv", index=False)
        
        # gather the results
        total_label.extend(df_pred['ess'].tolist())
        total_conf.extend(df_pred['conf'].tolist())
        total_cls.extend(prd_cls)
        
        # get evaluation row by testset
        eval_ts = record_perform(
            emb_ver=ver,
            file_id=ts_ver,
            organ=ts_ver,
            y_real=df_pred['ess'].to_numpy(),
            y_conf=df_pred['conf'].to_numpy(),
            y_prd=np.array(prd_cls)
        )
        df_eval = pd.concat([df_eval, eval_ts], ignore_index=True)
        print(f"- Test in {ts_ver} was done.")
    
    del model
    gc.collect()
    torch.cuda.empty_cache()

    # get total mean row
    eval_ts = record_perform(
        emb_ver=ver,
        file_id="total",
        organ="all",
        y_real=np.array(total_label),
        y_conf=np.array(total_conf),
        y_prd=np.array(total_cls)
    )
    df_eval = pd.concat([df_eval, eval_ts], ignore_index=True)
    print(f"- Test in total testset was done.")

# save the model perfomance result
df_eval.to_csv(f"{result_path}eval-indiv_cls-strain.csv", index=False)
display("Model performance:", df_eval)


>>>> clstm <<<<


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C018,Escherichia coli K-12 BW25113,BW25113_0001,0,-1.141337,0.000117,0.074418,-0.000446,-0.002198,-0.003431,...,-0.000123,0.004792,0.000471,0.126904,0.006709,0.013690,-0.021874,-0.049524,0.108785,-0.003517
1,C018,Escherichia coli K-12 BW25113,BW25113_0002,0,0.137700,0.000258,0.067561,-0.000478,-0.000678,-0.003556,...,-0.000045,0.001738,0.002048,0.126786,0.007048,0.008776,0.114241,-0.035615,0.105729,0.001858
2,C018,Escherichia coli K-12 BW25113,BW25113_0003,0,0.128075,0.000037,0.062052,-0.004017,-0.002748,-0.000468,...,-0.000051,0.001672,0.002281,0.126932,0.007019,0.007990,0.115545,-0.033727,0.102622,0.002154
3,C018,Escherichia coli K-12 BW25113,BW25113_0004,0,0.487113,0.000235,0.064557,-0.001244,-0.000863,-0.002970,...,-0.000038,0.001291,0.000991,0.127128,0.007119,0.008230,0.117134,-0.032810,0.105667,0.001789
4,C018,Escherichia coli K-12 BW25113,BW25113_0005,0,-0.501022,0.000158,0.070695,-0.000914,-0.001795,-0.003404,...,-0.000019,-0.000853,0.003466,0.126817,0.007367,0.006851,0.123346,-0.026789,0.104581,0.004456
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4308,C018,Escherichia coli K-12 BW25113,BW25113_4702,0,-0.824822,0.000154,0.070713,-0.000348,-0.002115,-0.004313,...,-0.000115,0.004844,0.001291,0.126999,0.006747,0.013313,-0.021632,-0.051901,0.107055,-0.004071
4309,C018,Escherichia coli K-12 BW25113,BW25113_4703,0,-1.325105,0.000108,0.077958,-0.000324,-0.002058,-0.002888,...,-0.000106,0.004112,0.002155,0.127117,0.006835,0.011485,0.009256,-0.044031,0.106437,-0.002243
4310,C018,Escherichia coli K-12 BW25113,BW25113_4705,0,-1.137151,0.000133,0.076093,-0.000250,-0.001809,-0.003093,...,-0.000059,0.002871,0.002066,0.127147,0.006998,0.009537,0.041613,-0.044984,0.104478,-0.000265
4311,C018,Escherichia coli K-12 BW25113,BW25113_4706,0,-1.110526,0.000127,0.074501,-0.000332,-0.002103,-0.003567,...,-0.000113,0.004755,0.000375,0.126987,0.006722,0.013741,-0.018794,-0.051945,0.108058,-0.003450


Splited test dataset(C018): torch.Size([4313, 1024]) torch.Size([4313])
- Test in C018 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C039,Pseudomonas aeruginosa MPAO1,PA0001,1,-0.002527,-0.020595,-0.114287,-0.012062,-0.011953,0.023055,...,0.140998,0.004529,0.072357,-0.057453,0.000957,-0.044550,-0.051208,-0.002760,0.000008,-0.001140
1,C039,Pseudomonas aeruginosa MPAO1,PA0002,1,-0.002527,-0.020554,-0.114374,-0.012075,-0.011924,0.023065,...,0.487758,0.003055,0.072045,-0.052898,0.000920,-0.044554,-0.052154,-0.001264,0.000007,-0.001183
2,C039,Pseudomonas aeruginosa MPAO1,PA0003,0,-0.002527,-0.020554,-0.114465,-0.012152,-0.011984,0.023117,...,-0.031859,0.005805,0.072278,-1.221916,0.000928,-0.044602,-0.053711,-0.002189,0.000010,-0.001133
3,C039,Pseudomonas aeruginosa MPAO1,PA0004,1,-0.002528,-0.020734,-0.114397,-0.012141,-0.011958,0.023186,...,-0.776504,0.003083,0.072298,0.306680,0.000932,-0.044488,-0.048716,-0.002996,0.000017,-0.001143
4,C039,Pseudomonas aeruginosa MPAO1,PA0005,0,-0.002527,-0.020757,-0.114371,-0.012447,-0.012254,0.023402,...,0.280329,0.006728,0.072264,-0.543162,0.000934,-0.044501,-0.054391,-0.003492,0.000020,-0.001105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5565,C039,Pseudomonas aeruginosa MPAO1,PA5566,0,-0.002527,-0.020618,-0.114357,-0.012243,-0.012142,0.023330,...,0.798282,0.004630,0.072520,-0.623419,0.000934,-0.044367,-0.052976,-0.002959,0.000012,-0.001141
5566,C039,Pseudomonas aeruginosa MPAO1,PA5567,0,-0.002527,-0.020534,-0.114529,-0.012125,-0.011920,0.023014,...,0.419322,0.006513,0.072046,-0.259117,0.000912,-0.044391,-0.053267,-0.002436,0.000023,-0.001116
5567,C039,Pseudomonas aeruginosa MPAO1,PA5568,1,-0.002527,-0.020357,-0.114468,-0.011936,-0.011668,0.022776,...,0.036780,0.006474,0.072424,-0.329366,0.000920,-0.044700,-0.049474,-0.001785,0.000005,-0.001140
5568,C039,Pseudomonas aeruginosa MPAO1,PA5569,1,-0.002527,-0.020267,-0.114498,-0.011684,-0.011469,0.022462,...,0.520540,0.001117,0.072138,-2.681033,0.000922,-0.044539,-0.053330,-0.001552,0.000003,-0.001191


Splited test dataset(C039): torch.Size([5570, 1024]) torch.Size([5570])
- Test in C039 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C035,Caulobacter crescentus,CCNA_00001,0,0.241180,0.005716,-0.000299,0.015325,0.032135,-0.059960,...,-0.123304,-0.005714,0.000668,0.108539,0.051970,-0.025484,-0.014294,-1.737134,-0.000604,0.015505
1,C035,Caulobacter crescentus,CCNA_00002,0,-0.045185,0.005539,-0.000327,0.015818,0.033548,-0.059887,...,-0.123312,-0.005727,0.001803,0.110109,0.049759,-0.013092,-0.013027,-1.696523,-0.000625,0.014594
2,C035,Caulobacter crescentus,CCNA_00003,1,-0.213073,0.005356,-0.000336,0.016139,0.035146,-0.059827,...,-0.123311,-0.005694,0.000513,0.108760,0.050975,-0.027863,-0.013663,-1.785601,-0.000626,0.015282
3,C035,Caulobacter crescentus,CCNA_00004,1,-0.228296,0.005360,-0.000336,0.016138,0.034612,-0.059811,...,-0.123323,-0.005671,0.002080,0.110030,0.050929,-0.025066,-0.011885,-0.174281,-0.000633,0.015716
4,C035,Caulobacter crescentus,CCNA_00005,1,-0.120089,0.005433,-0.000328,0.015988,0.034627,-0.059877,...,-0.123301,-0.005543,0.002935,0.106811,0.051716,-0.050719,-0.015525,-0.674083,-0.000587,0.016613
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3881,C035,Caulobacter crescentus,CCNA_03995,0,0.033178,0.005891,-0.000330,0.015652,0.033942,-0.059709,...,-0.123307,-0.005970,0.007850,0.108237,0.044217,0.046716,-0.012149,-1.540633,-0.000716,0.006626
3882,C035,Caulobacter crescentus,CCNA_03996,0,0.053660,0.005893,-0.000331,0.015553,0.032510,-0.059701,...,-0.123302,-0.006032,0.008613,0.108982,0.043965,0.059067,-0.010368,-0.265818,-0.000754,0.004910
3883,C035,Caulobacter crescentus,CCNA_03997,0,-0.415582,0.005252,-0.000353,0.016508,0.038276,-0.059795,...,-0.123278,-0.005532,-0.000903,0.110412,0.052775,-0.047691,-0.014426,-0.298670,-0.000603,0.016633
3884,C035,Caulobacter crescentus,CCNA_03998,0,-0.075480,0.005484,-0.000327,0.015941,0.035006,-0.059903,...,-0.123292,-0.005619,0.000638,0.109217,0.052073,-0.040741,-0.014404,-1.296945,-0.000569,0.016881


Splited test dataset(C035): torch.Size([3886, 1024]) torch.Size([3886])
- Test in C035 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00001,1,0.005120,0.068713,0.663236,-0.060797,-0.032449,0.014202,...,0.012313,-0.007399,-0.005816,0.000687,0.064657,-0.085826,-0.023153,0.014340,0.002391,-0.000243
1,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00002,1,0.004470,0.026626,-0.090045,-0.056897,-0.033130,0.013454,...,0.011867,0.002407,-0.004791,0.000464,0.064517,-0.063847,-0.021983,0.012812,0.002528,-0.000259
2,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00003,0,0.004421,0.039953,0.049543,-0.060606,-0.033737,0.013306,...,0.010291,-0.000792,-0.005446,0.000485,0.064288,-0.047681,-0.022056,0.012390,0.002454,-0.000189
3,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00004,0,0.004592,0.050876,0.244351,-0.061488,-0.033570,0.013514,...,0.012146,-0.006105,-0.005143,0.000610,0.064713,-0.051572,-0.019151,0.013191,0.002433,-0.000289
4,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00005,1,0.004462,0.022271,-0.135896,-0.056081,-0.033015,0.013473,...,0.011662,0.001805,-0.005244,0.000413,0.064437,-0.085364,-0.026056,0.015322,0.002441,-0.000236
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2887,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02794,0,0.004373,0.044145,0.106600,-0.062472,-0.034117,0.013229,...,0.011127,-0.011517,-0.005488,0.000771,0.064678,-0.068491,-0.024089,0.015898,0.002620,-0.000523
2888,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02795,0,0.004367,0.050556,0.224676,-0.064723,-0.034484,0.013221,...,0.010400,-0.006378,-0.004600,0.000750,0.065187,-0.054311,-0.020164,0.019472,0.002322,-0.000430
2889,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02801,0,0.004416,0.044100,0.110424,-0.061863,-0.033931,0.013287,...,0.010264,0.001506,-0.004054,0.000687,0.065040,-0.037810,-0.018348,0.016102,0.002270,-0.000309
2890,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02811,0,0.004428,0.051119,0.232471,-0.063884,-0.034238,0.013297,...,0.010364,-0.003213,-0.004639,0.000687,0.065101,-0.031225,-0.026510,0.015555,0.002267,-0.000382


Splited test dataset(D011): torch.Size([2892, 1024]) torch.Size([2892])
- Test in D011 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0001,0,0.051106,-0.000545,-0.000009,-0.086802,-0.048359,-0.000026,...,0.004965,-0.000089,-0.009585,-0.026325,-0.014814,-0.201176,0.001646,-0.009771,-0.006514,-0.332626
1,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0002,0,0.050951,-0.000192,-0.000009,-0.086759,-0.038442,0.000096,...,0.006803,-0.000074,-0.007507,-0.015006,-0.014386,-0.206005,0.001262,-0.012675,-0.019469,-0.278198
2,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0003,0,0.050955,-0.000599,-0.000009,-0.087011,-0.047483,0.000110,...,0.005248,-0.000025,-0.002804,-0.011559,-0.037931,-0.221269,0.001537,-0.011747,-0.032378,-0.359946
3,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0004,1,0.050968,-0.000280,-0.000009,-0.086197,-0.038730,0.000024,...,0.005686,-0.000186,-0.006596,-0.022814,-0.014697,-0.225431,0.001725,-0.012175,-0.021712,-0.487730
4,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0005,0,0.051015,-0.000599,-0.000009,-0.088531,-0.052831,0.000211,...,0.007076,-0.000057,-0.004822,-0.023381,-0.008838,-0.184268,0.001095,-0.011407,-0.023267,0.088170
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4820,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548234,0,0.050880,-0.000122,-0.000009,-0.085990,-0.032849,0.000072,...,0.007551,-0.000211,-0.005450,-0.018650,-0.020950,-0.203492,0.001685,-0.008812,-0.026031,-0.303583
4821,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548235,0,0.050833,-0.000769,-0.000008,-0.087333,-0.049141,0.000231,...,0.005560,-0.000157,-0.001510,-0.015198,-0.015094,-0.208078,0.001249,-0.011518,-0.019989,-0.321386
4822,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548236,0,0.051039,-0.000031,-0.000009,-0.086030,-0.032726,-0.000049,...,0.004447,0.000102,-0.002329,-0.010713,0.018097,-0.202790,0.001093,-0.012116,0.003105,-0.152395
4823,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548237,0,0.051094,-0.000995,-0.000009,-0.088994,-0.061369,0.000183,...,0.006059,0.000021,-0.002644,-0.018071,-0.027150,-0.215147,0.001550,-0.010301,-0.023504,-0.287474


Splited test dataset(C048): torch.Size([4825, 1024]) torch.Size([4825])
- Test in C048 was done.
- Test in total testset was done.

>>>> esm2 <<<<


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279
0,C018,Escherichia coli K-12 BW25113,BW25113_0001,0,-0.014705,0.014871,0.032494,-0.099194,0.111763,-0.048950,...,0.006629,0.068426,-0.029634,0.129648,-0.123162,0.120380,-0.018088,-0.017210,0.025762,0.050284
1,C018,Escherichia coli K-12 BW25113,BW25113_0002,0,0.010395,-0.044129,0.039409,0.026303,-0.087774,-0.043303,...,-0.033984,-0.070647,-0.088788,-0.025774,-0.035926,-0.027873,0.039240,-0.171813,-0.024189,0.067702
2,C018,Escherichia coli K-12 BW25113,BW25113_0003,0,-0.026210,-0.087095,0.031446,0.012126,-0.160973,0.035810,...,0.002050,-0.033890,-0.073008,0.011329,-0.050864,-0.051695,0.081063,-0.173046,-0.011369,0.092225
3,C018,Escherichia coli K-12 BW25113,BW25113_0004,0,0.008325,-0.088589,0.024109,0.075197,-0.097268,-0.003867,...,0.008444,-0.037444,-0.109918,-0.004216,-0.058337,-0.072302,0.039237,-0.203126,-0.055722,0.149343
4,C018,Escherichia coli K-12 BW25113,BW25113_0005,0,-0.023592,-0.037896,0.037959,0.046044,-0.000662,-0.071368,...,0.001027,-0.095670,-0.090029,0.088599,0.103171,0.016177,-0.092981,-0.105658,0.134885,-0.042430
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4308,C018,Escherichia coli K-12 BW25113,BW25113_4702,0,0.022917,0.063451,0.058542,-0.020484,0.013089,-0.053890,...,0.040926,0.025820,-0.089439,0.168240,0.013865,0.095509,0.065338,0.058039,-0.072531,0.040470
4309,C018,Escherichia coli K-12 BW25113,BW25113_4703,0,0.071847,0.004864,0.007702,0.110760,0.026687,0.055307,...,0.071134,-0.063979,-0.068846,0.050000,0.007645,0.029211,0.079024,0.052301,-0.082757,-0.103814
4310,C018,Escherichia coli K-12 BW25113,BW25113_4705,0,0.080228,0.052630,0.027678,0.091422,-0.025509,0.084525,...,0.059094,-0.011803,-0.061478,0.089032,-0.003137,0.051407,0.157305,-0.082298,-0.085231,0.029800
4311,C018,Escherichia coli K-12 BW25113,BW25113_4706,0,0.005647,-0.013861,0.030985,0.005463,-0.014081,0.030428,...,0.099927,-0.005585,-0.028430,0.030939,0.017342,0.114452,0.064848,-0.131629,-0.047249,-0.039224


Splited test dataset(C018): torch.Size([4313, 1280]) torch.Size([4313])
- Test in C018 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279
0,C039,Pseudomonas aeruginosa MPAO1,PA0001,1,-0.000842,-0.106311,-0.012641,0.005056,-0.091756,-0.010066,...,-0.018166,-0.047671,-0.076880,-0.012669,-0.012338,-0.073999,0.082144,-0.141023,0.001802,0.051776
1,C039,Pseudomonas aeruginosa MPAO1,PA0002,1,0.012139,-0.216925,-0.118640,-0.094917,-0.159245,0.022107,...,0.018957,-0.044933,-0.050711,-0.006500,-0.089565,-0.014260,0.057546,-0.239095,0.112732,0.166336
2,C039,Pseudomonas aeruginosa MPAO1,PA0003,0,-0.098312,-0.134627,0.014816,0.084084,-0.107415,-0.009167,...,-0.031528,-0.030567,-0.123196,0.025700,0.007029,-0.036876,0.069367,-0.137614,0.049789,0.091841
3,C039,Pseudomonas aeruginosa MPAO1,PA0004,1,0.024691,-0.011771,-0.000774,0.029667,-0.138577,-0.003756,...,0.009944,-0.050365,-0.033939,0.033187,0.017564,0.029574,-0.005819,-0.078561,-0.008933,0.073980
4,C039,Pseudomonas aeruginosa MPAO1,PA0005,0,-0.032686,-0.055994,-0.039325,-0.013324,-0.044959,-0.065129,...,-0.019475,-0.031306,-0.040509,-0.035259,-0.005981,0.137311,0.081591,-0.175803,0.043193,-0.057391
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5565,C039,Pseudomonas aeruginosa MPAO1,PA5566,0,0.057246,0.045996,0.010357,-0.051157,-0.065003,-0.004023,...,0.050735,0.088418,0.033492,0.103059,0.005616,0.172280,-0.018088,0.058458,-0.121865,0.109127
5566,C039,Pseudomonas aeruginosa MPAO1,PA5567,0,-0.025588,-0.162937,-0.013846,-0.004103,-0.098417,-0.112788,...,-0.019855,-0.044349,-0.117587,0.075252,-0.045971,-0.020897,0.023197,-0.162777,0.019520,0.142901
5567,C039,Pseudomonas aeruginosa MPAO1,PA5568,1,-0.024770,-0.102767,-0.013926,0.014022,-0.084881,-0.004007,...,-0.028231,-0.099346,-0.071256,-0.037557,-0.031373,-0.044831,0.072742,-0.165685,0.035624,0.072870
5568,C039,Pseudomonas aeruginosa MPAO1,PA5569,1,-0.075773,-0.119225,-0.026568,-0.007313,-0.153748,0.055661,...,-0.016795,-0.089924,-0.092721,-0.065731,-0.037132,-0.032952,0.128954,-0.187519,0.085612,0.020410


Splited test dataset(C039): torch.Size([5570, 1280]) torch.Size([5570])
- Test in C039 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279
0,C035,Caulobacter crescentus,CCNA_00001,0,-0.016948,-0.042165,0.020042,-0.016418,-0.057229,-0.100696,...,0.025183,-0.053899,-0.036172,-0.044552,-0.042354,0.049396,0.072384,-0.111984,0.029693,0.063318
1,C035,Caulobacter crescentus,CCNA_00002,0,-0.012678,-0.098183,0.008435,0.028309,-0.143029,-0.038325,...,-0.018794,-0.102245,-0.025406,-0.026221,-0.027595,0.100941,0.120382,-0.194269,-0.064060,0.031912
2,C035,Caulobacter crescentus,CCNA_00003,1,-0.006961,-0.112683,-0.011008,0.043348,-0.071468,-0.011033,...,0.045023,-0.075355,0.018076,-0.036523,-0.034960,0.073637,0.064274,-0.158267,-0.027465,0.041576
3,C035,Caulobacter crescentus,CCNA_00004,1,-0.043994,-0.137897,-0.048401,0.028896,-0.184497,0.020157,...,-0.038492,-0.098503,-0.042422,-0.009926,-0.063873,-0.093545,0.011850,-0.220650,-0.002578,0.138848
4,C035,Caulobacter crescentus,CCNA_00005,1,0.023760,-0.116342,-0.000281,0.049189,-0.107303,0.003333,...,-0.038665,-0.143596,-0.100812,0.003303,-0.024205,0.005555,0.125736,-0.203816,0.003331,0.053200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3881,C035,Caulobacter crescentus,CCNA_03995,0,-0.000979,0.037320,0.050375,0.002076,-0.018403,-0.000528,...,-0.006577,0.067403,-0.108151,0.182208,-0.024374,0.082504,-0.050359,0.093528,-0.098201,0.036008
3882,C035,Caulobacter crescentus,CCNA_03996,0,0.048236,0.097222,0.114520,0.018025,-0.087240,0.014147,...,0.055982,0.129641,-0.105318,0.165101,-0.040093,0.107647,-0.025404,0.046408,-0.039319,0.071720
3883,C035,Caulobacter crescentus,CCNA_03997,0,0.026919,0.009171,0.001484,0.056194,0.000487,-0.050328,...,0.020029,-0.084860,-0.011150,-0.014311,-0.034345,-0.023211,0.033260,-0.118393,0.057678,-0.035150
3884,C035,Caulobacter crescentus,CCNA_03998,0,-0.013349,-0.102431,-0.103976,-0.020937,-0.038556,-0.056374,...,-0.023029,-0.035832,-0.040896,-0.044159,-0.048625,0.064694,0.074242,-0.159754,0.015602,0.048919


Splited test dataset(C035): torch.Size([3886, 1280]) torch.Size([3886])
- Test in C035 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279
0,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00001,1,0.014756,-0.086230,-0.049170,0.060085,-0.122597,-0.034507,...,0.068613,-0.051714,-0.041093,-0.043765,-0.052265,-0.096607,0.113088,-0.122332,-0.038876,0.072069
1,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00002,1,-0.053397,-0.062034,-0.092344,0.067878,-0.151104,-0.049453,...,0.139989,-0.030218,-0.051699,0.036499,-0.118369,-0.039538,0.070770,-0.191519,-0.006231,0.177409
2,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00003,0,-0.010000,-0.017724,0.035584,0.049799,-0.044580,0.032065,...,0.113078,-0.062417,-0.049596,0.019633,-0.052211,-0.124557,0.062685,-0.166726,-0.118613,0.172522
3,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00004,0,-0.056038,-0.025302,0.000482,0.068061,-0.134211,-0.093808,...,0.119703,-0.042128,-0.088721,0.024605,-0.106930,-0.157508,0.148372,-0.140121,-0.009654,0.137043
4,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00005,1,0.027876,-0.025447,-0.004881,0.013319,-0.123916,-0.020236,...,0.039268,-0.066970,-0.045324,0.003242,0.031613,0.026799,-0.018035,-0.117092,-0.022989,0.115587
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2887,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02794,0,0.069163,0.034897,0.037069,0.072696,-0.132518,0.037154,...,0.125594,-0.012621,-0.050077,0.130210,0.003918,0.171234,0.098719,-0.017059,-0.168116,0.011164
2888,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02795,0,-0.002031,0.121649,0.040248,0.079541,-0.096833,-0.060299,...,0.239081,0.008416,-0.230059,0.262065,-0.148070,0.298836,0.097517,-0.207055,-0.194300,-0.005872
2889,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02801,0,0.053643,0.080320,0.051825,0.087870,-0.090280,-0.049021,...,0.224129,0.149523,-0.114989,0.176426,-0.077409,0.237677,-0.013614,-0.034921,-0.106720,-0.051711
2890,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02811,0,0.111638,0.027846,0.063459,0.019992,-0.113877,-0.050402,...,0.149179,0.032013,-0.155836,0.104540,-0.039293,0.203264,0.018613,-0.003791,-0.142092,0.048722


Splited test dataset(D011): torch.Size([2892, 1280]) torch.Size([2892])
- Test in D011 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279
0,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0001,0,0.015226,-0.003460,0.016747,0.136284,-0.039058,0.054414,...,-0.043811,-0.018134,0.055927,0.062891,0.016069,-0.026713,0.068425,-0.110461,-0.034378,-0.027314
1,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0002,0,0.054702,0.030369,0.019799,0.051106,-0.036401,0.016038,...,0.115009,0.094161,-0.032623,-0.017994,-0.070167,-0.032142,0.109447,-0.053393,-0.080839,0.015281
2,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0003,0,-0.015344,-0.022893,0.029302,0.001033,-0.078359,-0.045380,...,0.041825,-0.036654,-0.004544,0.031253,-0.029167,-0.052823,0.092908,-0.085043,-0.029107,0.091703
3,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0004,1,0.053810,-0.007252,-0.019497,0.050211,-0.085094,-0.006909,...,0.105114,0.066781,-0.011164,0.051370,-0.041280,0.130595,0.068174,-0.020823,-0.088194,0.069933
4,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0005,0,0.078180,0.037520,-0.036622,0.036612,-0.066406,-0.096874,...,0.104855,0.003515,-0.070572,0.014473,-0.003558,0.044367,0.100179,-0.031271,-0.084933,0.022136
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4820,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548234,0,0.033819,-0.037969,-0.004286,0.028327,-0.102938,-0.009739,...,0.033463,-0.043338,-0.037569,-0.034426,0.004593,-0.034143,0.076671,-0.095346,-0.023359,0.037466
4821,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548235,0,0.043630,-0.092478,-0.006166,0.050076,-0.110532,0.029470,...,-0.028063,-0.072010,-0.055100,-0.038889,-0.002183,-0.036950,-0.003866,-0.085652,-0.058563,0.141009
4822,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548236,0,0.015700,0.078557,-0.044688,0.013285,-0.046135,0.018201,...,0.094385,-0.034658,-0.025017,0.090807,0.007465,0.133233,0.090349,-0.033758,-0.040873,-0.021821
4823,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548237,0,0.111880,0.063322,-0.052394,-0.053507,-0.068967,-0.018321,...,0.135528,0.073479,-0.014023,0.094231,-0.008560,0.164049,-0.019305,0.079028,-0.099126,0.036932


Splited test dataset(C048): torch.Size([4825, 1280]) torch.Size([4825])
- Test in C048 was done.
- Test in total testset was done.

>>>> bert <<<<


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C018,Escherichia coli K-12 BW25113,BW25113_0001,0,0.053609,-0.130600,-0.060840,-0.126020,0.060003,-0.014846,...,-0.003278,-0.041892,-0.113188,-0.013996,0.037198,-0.077081,0.005863,0.000208,0.035797,0.023481
1,C018,Escherichia coli K-12 BW25113,BW25113_0002,0,-0.050816,-0.034555,-0.000197,0.014464,-0.019171,0.029583,...,0.003586,-0.115776,0.003309,-0.037954,-0.127526,0.001990,-0.020385,-0.025033,0.012277,0.000106
2,C018,Escherichia coli K-12 BW25113,BW25113_0003,0,-0.068930,-0.036029,0.045477,0.001350,-0.009385,-0.000364,...,0.002405,-0.166539,-0.041316,-0.075871,-0.162713,-0.020479,-0.015154,-0.049642,0.028707,-0.027086
3,C018,Escherichia coli K-12 BW25113,BW25113_0004,0,-0.035733,-0.036550,0.029775,0.001912,-0.019517,0.013466,...,0.033837,-0.139610,0.001519,-0.056889,-0.161009,-0.003268,-0.041054,-0.031427,0.012820,-0.036991
4,C018,Escherichia coli K-12 BW25113,BW25113_0005,0,-0.064275,-0.020207,0.054502,0.121403,0.022631,0.035946,...,0.019650,0.019639,-0.018524,-0.035032,0.029231,-0.011468,-0.008523,-0.020028,0.002238,0.001045
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4308,C018,Escherichia coli K-12 BW25113,BW25113_4702,0,0.083056,-0.059131,0.016835,0.095346,-0.079348,-0.016631,...,0.025672,0.063140,-0.053165,-0.057264,0.090547,-0.094729,-0.047396,-0.062846,-0.034102,0.005212
4309,C018,Escherichia coli K-12 BW25113,BW25113_4703,0,-0.033713,-0.061019,-0.025564,0.040633,-0.047529,-0.020682,...,0.007818,0.017703,-0.038945,-0.016607,0.104732,-0.012575,0.040875,-0.061118,-0.038993,-0.045058
4310,C018,Escherichia coli K-12 BW25113,BW25113_4705,0,0.008314,0.031278,0.008313,0.017498,-0.059603,0.002852,...,-0.023402,-0.041200,-0.023697,-0.005152,-0.002504,-0.034793,0.019937,-0.049094,-0.035071,0.008665
4311,C018,Escherichia coli K-12 BW25113,BW25113_4706,0,0.040345,0.000448,-0.000268,0.013070,-0.025070,-0.015841,...,-0.032209,-0.020479,-0.039789,-0.059486,-0.005387,-0.006341,-0.034478,-0.058045,0.012478,-0.010510


Splited test dataset(C018): torch.Size([4313, 1024]) torch.Size([4313])
- Test in C018 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C039,Pseudomonas aeruginosa MPAO1,PA0001,1,0.015109,-0.034086,-0.010556,0.011104,0.023660,0.014769,...,0.062798,-0.082776,0.009823,-0.037634,-0.138931,0.022165,-0.031272,0.001585,-0.005642,0.011913
1,C039,Pseudomonas aeruginosa MPAO1,PA0002,1,-0.029399,-0.045948,-0.016596,0.010400,-0.011993,0.073625,...,0.069874,-0.077642,0.006182,-0.080088,-0.153233,0.026972,-0.057674,-0.016918,0.001771,-0.008633
2,C039,Pseudomonas aeruginosa MPAO1,PA0003,0,-0.060349,-0.045480,-0.030710,0.020671,-0.047547,0.034126,...,0.049181,-0.092152,0.012021,-0.069118,-0.164757,-0.005967,-0.064719,-0.005674,0.000092,0.009116
3,C039,Pseudomonas aeruginosa MPAO1,PA0004,1,-0.015169,-0.031599,0.012893,0.012379,0.031008,0.001353,...,0.063716,-0.075145,0.007762,-0.040603,-0.099574,0.026423,-0.020227,-0.011192,-0.023021,0.002509
4,C039,Pseudomonas aeruginosa MPAO1,PA0005,0,-0.043656,-0.028170,-0.021591,0.005593,-0.062422,0.030017,...,0.033010,-0.067864,-0.011308,-0.038364,-0.092088,-0.002375,-0.044000,-0.039722,-0.002039,-0.017150
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5565,C039,Pseudomonas aeruginosa MPAO1,PA5566,0,0.008313,-0.026933,0.039813,0.009364,0.007391,-0.010368,...,0.037413,-0.058411,-0.021546,0.017686,0.035838,-0.013691,-0.055873,-0.022340,-0.003987,-0.002415
5566,C039,Pseudomonas aeruginosa MPAO1,PA5567,0,-0.012768,-0.051457,-0.007486,0.010770,0.014020,0.036376,...,0.029019,-0.101498,0.001777,-0.050338,-0.150357,0.034692,-0.029847,-0.021698,-0.011606,-0.000920
5567,C039,Pseudomonas aeruginosa MPAO1,PA5568,1,-0.040536,-0.035989,0.014823,0.026205,0.014888,0.007433,...,0.035341,-0.070823,-0.009953,-0.015900,-0.071004,0.033883,-0.016446,0.000516,-0.006395,0.014153
5568,C039,Pseudomonas aeruginosa MPAO1,PA5569,1,-0.029625,-0.022359,-0.004146,-0.001357,0.003033,0.048788,...,0.022101,-0.075721,-0.025129,-0.049617,-0.097198,-0.022387,-0.017960,-0.011806,0.008029,0.021244


Splited test dataset(C039): torch.Size([5570, 1024]) torch.Size([5570])
- Test in C039 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C035,Caulobacter crescentus,CCNA_00001,0,0.021170,-0.049900,0.004337,0.019110,-0.016963,0.035717,...,0.013548,-0.046869,-0.007019,-0.012055,-0.124489,-0.015909,-0.079013,-0.018182,-0.021469,-0.015703
1,C035,Caulobacter crescentus,CCNA_00002,0,0.028033,-0.018804,0.013226,0.002572,0.043106,0.015400,...,0.013951,-0.048793,-0.035960,-0.012365,-0.131028,0.005735,-0.056024,0.008753,-0.011900,0.014986
2,C035,Caulobacter crescentus,CCNA_00003,1,0.040394,-0.041739,-0.018625,0.000916,0.016761,0.028488,...,0.005340,-0.048079,-0.027978,-0.001331,-0.060586,-0.009311,0.002532,0.011142,-0.025302,0.002884
3,C035,Caulobacter crescentus,CCNA_00004,1,0.012573,-0.059315,0.030510,-0.002841,0.025790,0.013309,...,0.018788,-0.085986,-0.006031,-0.026514,-0.117179,0.000363,-0.025359,-0.002024,-0.020280,-0.011847
4,C035,Caulobacter crescentus,CCNA_00005,1,-0.021906,-0.036741,-0.026532,0.039843,0.033308,0.014080,...,0.061313,-0.016741,-0.004953,-0.017890,-0.120261,0.006552,-0.025399,-0.044861,0.004443,-0.021823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3881,C035,Caulobacter crescentus,CCNA_03995,0,0.029576,-0.057636,-0.015889,0.016336,0.032662,-0.010276,...,-0.034465,0.011578,0.017053,0.009560,0.072878,-0.032258,0.013869,0.025119,-0.014531,0.031390
3882,C035,Caulobacter crescentus,CCNA_03996,0,-0.043335,-0.096087,0.013684,-0.010873,-0.046608,-0.054211,...,0.074179,-0.029141,-0.090461,-0.069890,0.069070,-0.067343,-0.102559,-0.091394,-0.036222,0.033967
3883,C035,Caulobacter crescentus,CCNA_03997,0,-0.027276,-0.035590,0.014306,0.074667,-0.040061,0.022049,...,0.020631,-0.017927,-0.044656,-0.069358,0.032683,-0.037887,0.016114,0.007143,0.030844,0.013387
3884,C035,Caulobacter crescentus,CCNA_03998,0,0.012552,-0.052712,0.014957,-0.006066,0.010136,0.021673,...,0.012511,-0.083760,-0.038007,-0.025686,-0.044019,0.001175,-0.024075,-0.012716,-0.020110,-0.002516


Splited test dataset(C035): torch.Size([3886, 1024]) torch.Size([3886])
- Test in C035 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00001,1,-0.025708,-0.010071,-0.019290,0.010613,0.033886,-0.031251,...,0.061488,-0.040678,0.010180,-0.022908,-0.046213,0.008444,-0.006106,0.015532,-0.017827,0.026691
1,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00002,1,-0.008528,-0.027116,-0.015448,0.020863,0.017152,0.008900,...,0.043656,-0.037718,0.019718,-0.028335,-0.008715,0.010594,-0.007009,0.002968,-0.028399,0.038052
2,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00003,0,0.029162,-0.034824,0.028874,-0.007563,0.003247,0.005004,...,0.039504,-0.028720,-0.020464,-0.029115,0.007703,0.003162,-0.014883,-0.052182,-0.016031,0.002887
3,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00004,0,-0.008086,-0.025112,0.003776,0.008063,0.013912,-0.040446,...,0.045348,-0.047838,0.021642,-0.023010,-0.072862,0.004188,-0.016089,0.020402,-0.028265,0.052088
4,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00005,1,-0.040415,-0.019857,-0.011054,0.040427,0.010740,-0.017206,...,0.055364,-0.050327,0.019529,-0.018487,-0.037192,0.020295,-0.009921,0.001154,-0.033088,0.019380
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2887,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02794,0,0.045210,0.017389,-0.007598,0.028779,-0.020822,-0.035316,...,0.072952,-0.057935,0.011968,0.046649,0.077966,-0.016311,-0.013623,-0.021041,-0.012346,-0.000222
2888,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02795,0,-0.021764,-0.053854,-0.010016,-0.051106,-0.051991,0.037513,...,0.079998,0.015135,-0.069813,-0.064262,0.036947,-0.022165,-0.050321,0.001196,0.039510,-0.028461
2889,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02801,0,-0.013544,-0.023808,-0.011514,0.032185,-0.004387,0.023174,...,-0.015406,-0.005212,-0.007190,0.010544,0.068418,-0.026446,-0.016501,0.000128,-0.016723,-0.008824
2890,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02811,0,0.051409,0.014658,0.002889,0.014912,-0.032502,-0.000643,...,-0.017605,-0.064100,-0.026437,-0.019519,0.058029,-0.018436,0.008726,-0.007447,-0.028397,0.003706


Splited test dataset(D011): torch.Size([2892, 1024]) torch.Size([2892])
- Test in D011 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0001,0,0.008134,-0.005340,0.004636,-0.010469,0.040056,-0.017068,...,-0.031579,-0.042298,0.019305,-0.023405,0.041500,0.010795,0.013810,0.027188,-0.000832,-0.002747
1,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0002,0,0.011880,-0.012141,-0.003620,0.015314,-0.009465,0.003099,...,-0.023034,-0.015011,-0.007227,-0.000874,0.129264,0.019527,0.018317,-0.003779,-0.027562,0.015766
2,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0003,0,-0.005235,-0.033147,0.005293,0.011360,-0.000866,-0.026953,...,-0.007730,-0.023668,-0.013512,-0.010689,0.065256,0.005146,0.044662,0.016366,-0.027283,0.026012
3,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0004,1,0.024961,-0.005997,-0.002753,0.003432,-0.004259,-0.002687,...,-0.005626,-0.066784,-0.023227,0.011041,0.065140,-0.032046,0.002233,-0.028425,-0.053003,-0.002222
4,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0005,0,0.004875,0.034588,-0.020054,0.023095,0.014085,-0.015506,...,-0.002939,-0.010085,-0.008495,-0.023024,0.033300,0.008327,0.019326,-0.024427,-0.039357,0.005611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4820,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548234,0,0.001987,-0.029397,-0.009423,0.018703,0.023833,-0.044931,...,0.053782,-0.025635,0.014230,0.000030,-0.011609,0.002531,0.002253,0.008859,-0.006382,0.017132
4821,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548235,0,0.033204,-0.017214,-0.010453,0.012629,0.042732,0.001803,...,0.019163,-0.015624,-0.012221,0.014511,-0.062105,0.004722,-0.011215,-0.008002,-0.035631,0.006069
4822,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548236,0,0.016806,-0.008323,0.004312,0.039599,0.040799,-0.013179,...,0.004748,-0.018343,-0.020388,-0.010194,0.085055,-0.018075,0.014407,-0.012923,-0.013173,-0.014594
4823,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548237,0,0.003518,-0.006550,0.006761,0.059635,0.055527,-0.008578,...,-0.018508,0.004111,-0.022371,0.020706,0.046838,-0.008493,-0.038997,0.018357,-0.054477,0.002058


Splited test dataset(C048): torch.Size([4825, 1024]) torch.Size([4825])
- Test in C048 was done.
- Test in total testset was done.

>>>> t5 <<<<


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C018,Escherichia coli K-12 BW25113,BW25113_0001,0,0.011584,-0.027352,0.087529,0.063956,0.125525,-0.200814,...,-0.000678,0.037217,0.216121,-0.166806,0.073070,-0.209313,-0.170845,0.050126,-0.023876,-0.085014
1,C018,Escherichia coli K-12 BW25113,BW25113_0002,0,0.044142,0.075931,0.025655,0.050714,0.006044,0.038834,...,-0.025106,-0.000805,-0.010590,-0.010165,0.020754,-0.043121,-0.023813,0.020034,-0.029119,0.034619
2,C018,Escherichia coli K-12 BW25113,BW25113_0003,0,0.088072,0.042789,0.006048,0.028320,0.002196,0.055957,...,-0.023607,-0.033983,-0.003197,-0.030053,0.010648,-0.044762,-0.070817,-0.027704,-0.031297,-0.000393
3,C018,Escherichia coli K-12 BW25113,BW25113_0004,0,0.048865,0.066090,0.019594,0.047430,0.002054,0.009558,...,0.003374,-0.027352,-0.037960,-0.013359,0.032385,0.009352,-0.019783,-0.003624,-0.010106,0.038098
4,C018,Escherichia coli K-12 BW25113,BW25113_0005,0,0.057724,0.069290,0.000376,-0.004856,0.010432,-0.039151,...,-0.041921,0.009875,-0.012586,-0.079206,0.067018,-0.038756,-0.056466,0.038706,0.028096,0.047220
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4308,C018,Escherichia coli K-12 BW25113,BW25113_4702,0,-0.030485,0.009393,0.037870,-0.084949,-0.014988,0.022290,...,-0.045453,0.122582,0.036599,-0.158354,0.017079,0.071776,-0.077362,0.167553,0.031955,-0.056729
4309,C018,Escherichia coli K-12 BW25113,BW25113_4703,0,0.006352,0.037870,-0.058590,-0.036578,0.066644,-0.015083,...,0.026498,-0.059437,0.013125,-0.206001,0.028325,-0.005555,-0.049957,0.092794,0.075111,-0.125499
4310,C018,Escherichia coli K-12 BW25113,BW25113_4705,0,0.031782,-0.066062,0.030549,-0.082723,0.002601,0.004915,...,-0.013472,-0.049079,0.063062,-0.115208,0.015199,0.008139,-0.083757,0.024483,0.053137,0.007785
4311,C018,Escherichia coli K-12 BW25113,BW25113_4706,0,0.036987,-0.017611,-0.028287,-0.019906,0.025342,0.020008,...,-0.029651,0.032883,0.026623,-0.147937,-0.055119,-0.061302,0.017104,0.147023,0.019259,-0.080418


Splited test dataset(C018): torch.Size([4313, 1024]) torch.Size([4313])
- Test in C018 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C039,Pseudomonas aeruginosa MPAO1,PA0001,1,0.024235,0.022193,-0.013932,0.041372,-0.006294,0.006999,...,-0.018425,0.008666,-0.053938,-0.015829,0.021349,0.039900,-0.021771,-0.005465,-0.033384,0.010373
1,C039,Pseudomonas aeruginosa MPAO1,PA0002,1,0.009141,-0.002576,-0.007195,0.012134,-0.015342,0.005985,...,0.035174,0.047997,-0.036789,-0.037174,0.017118,-0.020300,0.011556,-0.000560,-0.013514,0.059980
2,C039,Pseudomonas aeruginosa MPAO1,PA0003,0,0.041465,0.006352,-0.002384,-0.002025,-0.001963,0.010232,...,-0.038741,0.029714,-0.016216,-0.048981,-0.030838,-0.022068,-0.049107,-0.026446,-0.001247,0.000940
3,C039,Pseudomonas aeruginosa MPAO1,PA0004,1,0.079612,0.047627,0.012035,0.044456,0.011636,0.005698,...,0.012982,-0.012235,-0.046952,-0.031784,0.058982,-0.050691,-0.032334,-0.038304,-0.044278,0.036417
4,C039,Pseudomonas aeruginosa MPAO1,PA0005,0,0.022074,0.013658,0.011562,-0.027738,-0.022710,0.038536,...,-0.012599,0.046617,0.016559,-0.078393,0.014367,0.029613,0.030858,-0.027727,0.014146,0.023371
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5565,C039,Pseudomonas aeruginosa MPAO1,PA5566,0,-0.001936,0.113273,0.039776,-0.019768,-0.067865,-0.022220,...,0.011913,-0.011985,-0.061749,-0.062286,-0.038123,0.078364,0.051907,0.105583,0.046238,-0.007029
5566,C039,Pseudomonas aeruginosa MPAO1,PA5567,0,0.051280,0.035925,0.003701,0.064181,-0.013409,0.011024,...,0.001760,-0.019178,-0.028146,-0.031394,-0.018709,-0.016755,-0.011252,-0.041947,-0.029829,-0.004253
5567,C039,Pseudomonas aeruginosa MPAO1,PA5568,1,0.036627,0.042347,0.009739,0.031589,-0.008178,0.030534,...,-0.009959,0.009829,-0.016694,-0.032859,0.030945,-0.018591,-0.025247,-0.035370,-0.033299,0.044677
5568,C039,Pseudomonas aeruginosa MPAO1,PA5569,1,0.086184,-0.061592,0.050985,-0.030751,-0.022678,0.055324,...,-0.027935,0.046289,0.007596,-0.053347,-0.020113,-0.003128,-0.007353,0.029771,-0.013180,0.016786


Splited test dataset(C039): torch.Size([5570, 1024]) torch.Size([5570])
- Test in C039 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C035,Caulobacter crescentus,CCNA_00001,0,0.020058,0.011668,0.030414,0.023882,-0.010837,-0.014980,...,-0.007870,0.025724,-0.016352,-0.036573,0.001705,0.061251,0.011432,-0.008975,0.018777,0.026789
1,C035,Caulobacter crescentus,CCNA_00002,0,-0.025983,-0.008170,0.005247,0.053943,-0.028749,0.060893,...,-0.028033,0.010778,0.041618,-0.057536,-0.034405,0.048157,-0.033823,-0.013403,-0.027308,-0.010038
2,C035,Caulobacter crescentus,CCNA_00003,1,0.024779,-0.001617,0.023675,0.018376,-0.033304,0.013851,...,0.012675,-0.053094,-0.010201,-0.060865,-0.031287,0.041388,-0.013631,-0.005432,0.006609,0.039694
3,C035,Caulobacter crescentus,CCNA_00004,1,0.043007,0.022084,0.018602,0.047121,-0.037631,0.035858,...,-0.055712,-0.056001,-0.008636,-0.041914,-0.006228,0.025181,-0.041679,-0.003226,0.001496,-0.003528
4,C035,Caulobacter crescentus,CCNA_00005,1,0.027681,0.015576,0.001451,0.040875,-0.025243,-0.005219,...,-0.041004,-0.041577,-0.020063,-0.043437,0.002943,0.061310,-0.033076,-0.050576,-0.033045,0.024842
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3881,C035,Caulobacter crescentus,CCNA_03995,0,0.044647,0.033466,-0.050714,-0.026918,-0.028675,-0.157701,...,0.057175,0.014047,0.050637,-0.072821,-0.010288,-0.014529,0.029304,0.164122,0.091785,-0.037428
3882,C035,Caulobacter crescentus,CCNA_03996,0,0.179715,0.020425,-0.091150,-0.093731,0.001956,-0.021912,...,-0.001070,0.087600,0.108491,-0.083115,0.051162,-0.082106,-0.076152,0.086164,-0.022795,-0.045483
3883,C035,Caulobacter crescentus,CCNA_03997,0,-0.031976,0.072945,-0.015456,-0.061065,0.034636,-0.075261,...,-0.025199,-0.017073,0.023233,0.007074,0.052571,0.031189,-0.027054,0.076396,0.075286,0.068603
3884,C035,Caulobacter crescentus,CCNA_03998,0,0.024063,-0.020369,-0.008308,-0.000595,0.000381,0.035635,...,0.000596,-0.037611,-0.026730,-0.043442,-0.019756,0.029615,-0.010306,-0.033816,0.001394,0.027850


Splited test dataset(C035): torch.Size([3886, 1024]) torch.Size([3886])
- Test in C035 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00001,1,0.023652,-0.016182,-0.044813,-0.005553,0.009462,-0.010082,...,-0.025819,0.036281,-0.028668,-0.075849,0.104102,0.045724,-0.006181,0.008993,0.018366,0.018931
1,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00002,1,-0.007927,-0.023425,-0.055023,-0.016866,-0.044181,-0.004378,...,-0.008795,0.053866,-0.024888,-0.065082,0.097267,0.015840,-0.010138,0.074140,0.064886,0.030392
2,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00003,0,0.019744,-0.069728,0.000090,-0.022637,-0.004717,0.042393,...,-0.027173,-0.007940,-0.044627,-0.113359,0.018270,-0.014962,-0.001079,-0.016964,0.089658,0.044070
3,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00004,0,0.040470,-0.010439,-0.057918,0.002970,-0.026755,0.003429,...,-0.044887,0.018670,-0.025396,-0.070817,0.055435,0.029834,-0.055991,0.013707,0.055876,0.002192
4,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_00005,1,0.027142,0.019460,-0.028592,0.018927,-0.005202,-0.013055,...,-0.004581,0.005561,-0.020736,-0.076954,0.057063,0.003132,-0.048660,-0.045533,-0.012115,0.016874
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2887,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02794,0,-0.006805,0.097746,-0.047974,0.041322,-0.057610,-0.042833,...,0.042549,-0.088404,0.005287,-0.118230,0.003415,0.044131,-0.016982,0.011587,-0.045439,-0.021421
2888,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02795,0,0.083121,0.127933,0.081483,-0.039364,-0.130656,-0.137333,...,0.017873,0.092684,-0.103311,-0.213061,0.163548,0.098239,-0.016091,0.033422,-0.101626,0.063540
2889,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02801,0,0.114380,0.058984,-0.023862,0.026664,-0.031087,-0.044770,...,-0.085187,0.004284,0.044071,-0.189268,0.142174,0.076700,-0.063392,0.027719,-0.042121,-0.020427
2890,D011,Staphylococcus aureus subsp. aureus NCTC 8325,SAOUHSC_A02811,0,0.058067,0.090945,-0.080845,0.026152,-0.051203,-0.104548,...,-0.055050,-0.032438,0.029816,-0.221483,0.070463,0.049084,-0.107428,0.039623,-0.007828,0.024856


Splited test dataset(D011): torch.Size([2892, 1024]) torch.Size([2892])
- Test in D011 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0001,0,-0.025583,-0.004581,0.078834,-0.007245,-0.017835,0.027827,...,-0.065012,-0.028501,0.053340,-0.077918,0.056029,0.041364,-0.014699,-0.001872,0.004338,0.001955
1,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0002,0,0.018134,0.013939,0.075287,0.070802,-0.001094,0.025817,...,-0.021290,-0.020542,-0.034380,-0.072337,0.068918,0.010234,0.028582,0.009465,0.093650,0.043395
2,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0003,0,0.011903,-0.050535,0.087673,0.071201,-0.009868,0.021778,...,-0.043901,-0.007869,-0.009904,-0.089165,0.022759,-0.034698,0.015764,-0.004712,0.054782,0.000353
3,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0004,1,-0.010374,0.003995,-0.002378,0.038072,-0.007250,0.027236,...,-0.049503,-0.045843,0.005756,-0.022377,-0.010699,0.019500,-0.003762,0.031871,0.039024,-0.007551
4,C048,Bacteroides thetaiotaomicron VPI-5482,BT_0005,0,-0.011068,-0.041055,0.006833,0.067446,0.008203,-0.021138,...,-0.043943,-0.005162,0.013928,-0.136281,0.067658,-0.013946,-0.021438,0.000856,0.044038,-0.004827
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4820,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548234,0,0.031741,-0.001645,0.019326,-0.011843,-0.052117,0.047029,...,-0.054037,0.004018,-0.020355,-0.100695,-0.033680,-0.015619,-0.041882,-0.075407,0.059378,-0.028652
4821,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548235,0,0.011300,-0.025902,-0.010313,-0.032185,-0.015765,0.040253,...,-0.043232,-0.013576,-0.046755,-0.065286,0.012180,0.054107,-0.003031,-0.025053,-0.015699,0.035863
4822,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548236,0,-0.016179,0.049258,-0.010011,0.008940,-0.032092,0.033933,...,-0.017677,-0.053039,-0.022061,-0.076991,0.006883,0.069524,-0.009119,0.030341,0.042864,-0.057160
4823,C048,Bacteroides thetaiotaomicron VPI-5482,BT_p548237,0,-0.013817,0.125102,0.030937,0.018625,-0.055700,-0.024432,...,0.065290,-0.014176,-0.032757,-0.104375,-0.026381,0.102121,0.083062,0.130213,0.057213,-0.048246


Splited test dataset(C048): torch.Size([4825, 1024]) torch.Size([4825])
- Test in C048 was done.
- Test in total testset was done.


'Model performance:'

Unnamed: 0,emb,file,organism,tp,fp,tn,fn,mcc,acc,f1,prc,rec,npv,tnr,auc-roc,auc-pr
0,clstm,C018,C018,233,229,3786,65,0.594411,0.931834,0.613158,0.504329,0.781879,0.983121,0.942964,0.910965,0.586572
1,clstm,C039,C039,206,356,4863,145,0.418455,0.910054,0.45126,0.366548,0.586895,0.971046,0.931788,0.866708,0.489602
2,clstm,C035,C035,241,282,3124,239,0.404242,0.865929,0.480558,0.460803,0.502083,0.928933,0.917205,0.814435,0.513969
3,clstm,D011,D011,128,375,2234,155,0.24187,0.816736,0.3257,0.254473,0.452297,0.935119,0.856267,0.725506,0.275557
4,clstm,C048,C048,110,587,3913,215,0.148307,0.833782,0.215264,0.157819,0.338462,0.947917,0.869556,0.656366,0.172057
5,clstm,total,all,918,1829,17920,819,0.35583,0.876757,0.409456,0.334183,0.528497,0.956294,0.907388,0.790771,0.362973
6,esm2,C018,C018,275,492,3523,23,0.530793,0.880594,0.516432,0.35854,0.922819,0.993514,0.87746,0.944574,0.719505
7,esm2,C039,C039,331,430,4789,20,0.608896,0.91921,0.595324,0.434954,0.94302,0.995841,0.917609,0.976373,0.813541
8,esm2,C035,C035,416,445,2961,64,0.58313,0.869017,0.620433,0.483159,0.866667,0.978843,0.869348,0.939435,0.766418
9,esm2,D011,D011,225,375,2234,58,0.477245,0.850277,0.509626,0.375,0.795053,0.974695,0.856267,0.894539,0.686368
