#### Setting environments

In [1]:
import torch, os, gc
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import confusion_matrix, matthews_corrcoef, accuracy_score,\
f1_score, precision_score, recall_score, roc_auc_score, average_precision_score

In [2]:
# Set options
embed_ver = ["clstm", "t5"]
test_ver = ["C018", "C039", "C035", "C033", "C061"]

# model options
data_path = "../data/test_exam/"
model_path = f"../models/cls/"
result_path = f"../results/"
os.makedirs(result_path + "prd-indiv/", exist_ok=True)

col_str = ['file_id', 'organism', 'locus_tag', 'ess']

batch_size = 256

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# Define function to record perfomance result
def record_perform(emb_ver, file_id, organ, y_real, y_conf, y_prd):    
    if file_id != "O046":
        auc_roc = [roc_auc_score(y_real, y_conf)]
        auc_pr = [average_precision_score(y_real, y_conf)]
    else:
        auc_roc = None
        auc_pr = None
    
    tn, fp, fn, tp = confusion_matrix(y_real, y_prd).ravel()
    
    result = pd.DataFrame({
        "emb": [emb_ver],
        "file": [file_id],
        "organism": [organ],
        "tp": [tp],
        "fp": [fp],
        "tn": [tn],
        "fn": [fn],
        "mcc": [matthews_corrcoef(y_real, y_prd)],
        "acc": [accuracy_score(y_real, y_prd)],
        "f1": [f1_score(y_real, y_prd)],
        "prc": [precision_score(y_real, y_prd)],
        "rec": [recall_score(y_real, y_prd)],
        "npv": [precision_score(1 - y_real, 1 - y_prd)],
        "tnr": [recall_score(1 - y_real, 1 - y_prd)],
        "auc-roc": auc_roc,
        "auc-pr": auc_pr
    })

    return result


In [5]:
# Set model architecture
class Classifier(nn.Module):
    def __init__(self, input_size, num_layers, unit_decrease):
        super(Classifier, self).__init__()
        layers = [nn.BatchNorm1d(input_size), nn.Dropout(0.5)]
        in_dim = input_size
        out_dim = 1024
        for i in range(num_layers):            
            out_dim = max(2, out_dim // unit_decrease)
            layers.append(nn.Linear(in_dim, out_dim))
            self.initialize_weights(layers[-1])
            layers.append(nn.GELU())
            in_dim = out_dim
        layers.append(nn.Linear(out_dim, 1))
        self.cls_block = nn.Sequential(*layers)
        
    def initialize_weights(self, layer):
        nn.init.kaiming_normal_(layer.weight, mode='fan_in', nonlinearity='linear')
        if layer.bias is not None:
            nn.init.zeros_(layer.bias)
    
    def forward(self, x):
        return self.cls_block(x)

#### Evaluate model

In [6]:
df_eval = pd.DataFrame()

for ver in embed_ver:
    print(f"\n>>>> {ver} <<<<")

    total_label = []
    total_conf = []
    total_cls = []
    for ts_ver in test_ver:    
        # load dataset
        data = pd.read_csv(data_path + f"data-emb_{ver}-{ts_ver}_ts.csv")
        display("Raw data:", data)

        #### Preprocess for test dataset ####
        col_num = [col for col in data.columns if col not in col_str]

        # split info.& inputs & labels of the test datasets
        info_ts = data[col_str]
        X_ts = torch.tensor(data[col_num].astype('float32').values)
        y_ts = torch.tensor(data['ess'].astype('float32').values)
        print(f"Splited test dataset({ts_ver}):", X_ts.shape, y_ts.shape)
        
        # generate dataloader by the test datasets
        dataset_ts = TensorDataset(X_ts, y_ts)
        test_loader = DataLoader(dataset_ts, batch_size=batch_size, shuffle=False)

        ## Test model ##    
        # generate model instance
        model = Classifier(
            input_size=X_ts.shape[-1],
            num_layers=3,
            unit_decrease=2
        ).to(device)

        # load model weight
        model.load_state_dict(torch.load(f"{model_path}{ver}-{ts_ver}.pt", map_location=device))
        model.eval()

        ## model evaluations by test dataset ##
        results = []
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                # prediction
                preds = model(X_batch).view(-1).cpu()
                # gather the predictions
                results.append(preds)
            
        results = torch.cat(results, dim=0)
        
        # convert logits to confidences & classes
        prd_conf = torch.sigmoid(results)
        prd_cls = (prd_conf >= 0.5).int().tolist()

        # save the model prediction result
        df_pred = pd.concat([info_ts, pd.DataFrame({"conf": prd_conf.tolist()})], axis=1)
        df_pred.to_csv(f"{result_path}prd-indiv/{ver}-{ts_ver}.csv", index=False)
        
        # gather the results
        total_label.extend(df_pred['ess'].tolist())
        total_conf.extend(df_pred['conf'].tolist())
        total_cls.extend(prd_cls)
        
        # get evaluation row by testset
        eval_ts = record_perform(
            emb_ver=ver,
            file_id=ts_ver,
            organ=ts_ver,
            y_real=df_pred['ess'].to_numpy(),
            y_conf=df_pred['conf'].to_numpy(),
            y_prd=np.array(prd_cls)
        )
        df_eval = pd.concat([df_eval, eval_ts], ignore_index=True)
        print(f"- Test in {ts_ver} was done.")
    
    del model
    gc.collect()
    torch.cuda.empty_cache()

    # get total mean row
    eval_ts = record_perform(
        emb_ver=ver,
        file_id="total",
        organ="all",
        y_real=np.array(total_label),
        y_conf=np.array(total_conf),
        y_prd=np.array(total_cls)
    )
    df_eval = pd.concat([df_eval, eval_ts], ignore_index=True)
    print(f"- Test in total testset was done.")

# save the model perfomance result
df_eval.to_csv(f"{result_path}eval-indiv_cls-strain.csv", index=False)
display("Model performance:", df_eval)


>>>> clstm <<<<


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C018,Escherichia coli K-12 BW25113,BW25113_0001,0,-1.141337,0.000117,0.074418,-0.000446,-0.002198,-0.003431,...,-0.000123,0.004792,0.000471,0.126904,0.006709,0.013690,-0.021874,-0.049524,0.108785,-0.003517
1,C018,Escherichia coli K-12 BW25113,BW25113_0002,0,0.137700,0.000258,0.067561,-0.000478,-0.000678,-0.003556,...,-0.000045,0.001738,0.002048,0.126786,0.007048,0.008776,0.114241,-0.035615,0.105729,0.001858
2,C018,Escherichia coli K-12 BW25113,BW25113_0003,0,0.128075,0.000037,0.062052,-0.004017,-0.002748,-0.000468,...,-0.000051,0.001672,0.002281,0.126932,0.007019,0.007990,0.115545,-0.033727,0.102622,0.002154
3,C018,Escherichia coli K-12 BW25113,BW25113_0004,0,0.487113,0.000235,0.064557,-0.001244,-0.000863,-0.002970,...,-0.000038,0.001291,0.000991,0.127128,0.007119,0.008230,0.117134,-0.032810,0.105667,0.001789
4,C018,Escherichia coli K-12 BW25113,BW25113_0005,0,-0.501022,0.000158,0.070695,-0.000914,-0.001795,-0.003404,...,-0.000019,-0.000853,0.003466,0.126817,0.007367,0.006851,0.123346,-0.026789,0.104581,0.004456
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4308,C018,Escherichia coli K-12 BW25113,BW25113_4702,0,-0.824822,0.000154,0.070713,-0.000348,-0.002115,-0.004313,...,-0.000115,0.004844,0.001291,0.126999,0.006747,0.013313,-0.021632,-0.051901,0.107055,-0.004071
4309,C018,Escherichia coli K-12 BW25113,BW25113_4703,0,-1.325105,0.000108,0.077958,-0.000324,-0.002058,-0.002888,...,-0.000106,0.004112,0.002155,0.127117,0.006835,0.011485,0.009256,-0.044031,0.106437,-0.002243
4310,C018,Escherichia coli K-12 BW25113,BW25113_4705,0,-1.137151,0.000133,0.076093,-0.000250,-0.001809,-0.003093,...,-0.000059,0.002871,0.002066,0.127147,0.006998,0.009537,0.041613,-0.044984,0.104478,-0.000265
4311,C018,Escherichia coli K-12 BW25113,BW25113_4706,0,-1.110526,0.000127,0.074501,-0.000332,-0.002103,-0.003567,...,-0.000113,0.004755,0.000375,0.126987,0.006722,0.013741,-0.018794,-0.051945,0.108058,-0.003450


Splited test dataset(C018): torch.Size([4313, 1024]) torch.Size([4313])
- Test in C018 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C039,Pseudomonas aeruginosa MPAO1,PA0001,1,-0.002527,-0.020595,-0.114287,-0.012062,-0.011953,0.023055,...,0.140998,0.004529,0.072357,-0.057453,0.000957,-0.044550,-0.051208,-0.002760,0.000008,-0.001140
1,C039,Pseudomonas aeruginosa MPAO1,PA0002,1,-0.002527,-0.020554,-0.114374,-0.012075,-0.011924,0.023065,...,0.487758,0.003055,0.072045,-0.052898,0.000920,-0.044554,-0.052154,-0.001264,0.000007,-0.001183
2,C039,Pseudomonas aeruginosa MPAO1,PA0003,0,-0.002527,-0.020554,-0.114465,-0.012152,-0.011984,0.023117,...,-0.031859,0.005805,0.072278,-1.221916,0.000928,-0.044602,-0.053711,-0.002189,0.000010,-0.001133
3,C039,Pseudomonas aeruginosa MPAO1,PA0004,1,-0.002528,-0.020734,-0.114397,-0.012141,-0.011958,0.023186,...,-0.776504,0.003083,0.072298,0.306680,0.000932,-0.044488,-0.048716,-0.002996,0.000017,-0.001143
4,C039,Pseudomonas aeruginosa MPAO1,PA0005,0,-0.002527,-0.020757,-0.114371,-0.012447,-0.012254,0.023402,...,0.280329,0.006728,0.072264,-0.543162,0.000934,-0.044501,-0.054391,-0.003492,0.000020,-0.001105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5565,C039,Pseudomonas aeruginosa MPAO1,PA5566,0,-0.002527,-0.020618,-0.114357,-0.012243,-0.012142,0.023330,...,0.798282,0.004630,0.072520,-0.623419,0.000934,-0.044367,-0.052976,-0.002959,0.000012,-0.001141
5566,C039,Pseudomonas aeruginosa MPAO1,PA5567,0,-0.002527,-0.020534,-0.114529,-0.012125,-0.011920,0.023014,...,0.419322,0.006513,0.072046,-0.259117,0.000912,-0.044391,-0.053267,-0.002436,0.000023,-0.001116
5567,C039,Pseudomonas aeruginosa MPAO1,PA5568,1,-0.002527,-0.020357,-0.114468,-0.011936,-0.011668,0.022776,...,0.036780,0.006474,0.072424,-0.329366,0.000920,-0.044700,-0.049474,-0.001785,0.000005,-0.001140
5568,C039,Pseudomonas aeruginosa MPAO1,PA5569,1,-0.002527,-0.020267,-0.114498,-0.011684,-0.011469,0.022462,...,0.520540,0.001117,0.072138,-2.681033,0.000922,-0.044539,-0.053330,-0.001552,0.000003,-0.001191


Splited test dataset(C039): torch.Size([5570, 1024]) torch.Size([5570])
- Test in C039 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C035,Caulobacter crescentus,CCNA_00001,0,0.241180,0.005716,-0.000299,0.015325,0.032135,-0.059960,...,-0.123304,-0.005714,0.000668,0.108539,0.051970,-0.025484,-0.014294,-1.737134,-0.000604,0.015505
1,C035,Caulobacter crescentus,CCNA_00002,0,-0.045185,0.005539,-0.000327,0.015818,0.033548,-0.059887,...,-0.123312,-0.005727,0.001803,0.110109,0.049759,-0.013092,-0.013027,-1.696523,-0.000625,0.014594
2,C035,Caulobacter crescentus,CCNA_00003,1,-0.213073,0.005356,-0.000336,0.016139,0.035146,-0.059827,...,-0.123311,-0.005694,0.000513,0.108760,0.050975,-0.027863,-0.013663,-1.785601,-0.000626,0.015282
3,C035,Caulobacter crescentus,CCNA_00004,1,-0.228296,0.005360,-0.000336,0.016138,0.034612,-0.059811,...,-0.123323,-0.005671,0.002080,0.110030,0.050929,-0.025066,-0.011885,-0.174281,-0.000633,0.015716
4,C035,Caulobacter crescentus,CCNA_00005,1,-0.120089,0.005433,-0.000328,0.015988,0.034627,-0.059877,...,-0.123301,-0.005543,0.002935,0.106811,0.051716,-0.050719,-0.015525,-0.674083,-0.000587,0.016613
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3881,C035,Caulobacter crescentus,CCNA_03995,0,0.033178,0.005891,-0.000330,0.015652,0.033942,-0.059709,...,-0.123307,-0.005970,0.007850,0.108237,0.044217,0.046716,-0.012149,-1.540633,-0.000716,0.006626
3882,C035,Caulobacter crescentus,CCNA_03996,0,0.053660,0.005893,-0.000331,0.015553,0.032510,-0.059701,...,-0.123302,-0.006032,0.008613,0.108982,0.043965,0.059067,-0.010368,-0.265818,-0.000754,0.004910
3883,C035,Caulobacter crescentus,CCNA_03997,0,-0.415582,0.005252,-0.000353,0.016508,0.038276,-0.059795,...,-0.123278,-0.005532,-0.000903,0.110412,0.052775,-0.047691,-0.014426,-0.298670,-0.000603,0.016633
3884,C035,Caulobacter crescentus,CCNA_03998,0,-0.075480,0.005484,-0.000327,0.015941,0.035006,-0.059903,...,-0.123292,-0.005619,0.000638,0.109217,0.052073,-0.040741,-0.014404,-1.296945,-0.000569,0.016881


Splited test dataset(C035): torch.Size([3886, 1024]) torch.Size([3886])
- Test in C035 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C033,Salmonella enterica subsp. enterica serovar Ty...,-,0,0.048051,-0.003615,-0.133087,-0.020764,-0.147111,0.000007,...,0.011813,-0.000130,0.091691,0.000875,0.003376,-0.000260,0.027993,-0.004713,-0.000013,-0.456642
1,C033,Salmonella enterica subsp. enterica serovar Ty...,t0001,0,0.048778,-0.003095,-0.135399,-0.022900,-0.149605,0.000002,...,0.012639,-0.000184,-0.001965,0.000634,0.004790,-0.015272,0.026573,0.004767,-0.000018,-0.665382
2,C033,Salmonella enterica subsp. enterica serovar Ty...,t0002,0,0.048138,-0.003551,-0.133326,-0.021115,-0.147439,0.000007,...,0.011994,-0.000128,-0.082047,0.000833,0.003336,-0.001369,0.026376,0.003696,-0.000013,-0.562224
3,C033,Salmonella enterica subsp. enterica serovar Ty...,t0003,0,0.048120,-0.003590,-0.133267,-0.020836,-0.147247,0.000007,...,0.012030,-0.000123,-0.011235,0.000939,0.003062,0.000058,0.024064,-0.002085,-0.000013,-0.877493
4,C033,Salmonella enterica subsp. enterica serovar Ty...,t0004,0,0.048074,-0.003598,-0.133185,-0.020825,-0.147189,0.000007,...,0.012088,-0.000130,-0.027572,0.000920,0.003113,-0.000637,0.025022,-0.002332,-0.000013,-1.435330
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4318,C033,Salmonella enterica subsp. enterica serovar Ty...,t4635,0,0.048282,-0.003519,-0.133626,-0.021094,-0.147612,0.000006,...,0.012157,-0.000128,0.009959,0.000869,0.003239,-0.000382,0.023818,-0.001995,-0.000013,-0.292774
4319,C033,Salmonella enterica subsp. enterica serovar Ty...,t4636,0,0.048441,-0.003430,-0.134022,-0.021465,-0.148061,0.000005,...,0.012127,-0.000129,-0.021016,0.000953,0.003298,-0.001529,0.025834,0.002425,-0.000013,-1.360936
4320,C033,Salmonella enterica subsp. enterica serovar Ty...,t4637,0,0.048259,-0.003443,-0.133818,-0.021508,-0.147934,0.000005,...,0.012031,-0.000127,0.053278,0.001180,0.002747,0.001260,0.022773,-0.009386,-0.000013,0.322559
4321,C033,Salmonella enterica subsp. enterica serovar Ty...,t4638,0,0.048696,-0.003140,-0.135196,-0.022706,-0.149387,0.000002,...,0.012116,-0.000161,0.051433,0.000799,0.004444,-0.010738,0.024786,-0.002683,-0.000016,-1.267374


Splited test dataset(C033): torch.Size([4323, 1024]) torch.Size([4323])
- Test in C033 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_0001,1,-0.004642,-0.026831,-0.050019,-0.018936,-0.154830,0.000153,...,0.028761,0.001788,0.004815,-0.010541,-0.031238,-0.011856,0.009031,0.013978,0.000126,-0.003233
1,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_0002,1,-0.004645,-0.026831,-0.050019,-0.018936,-0.154830,0.000153,...,0.028436,0.001727,0.004191,-0.014137,-0.030193,-0.013885,0.009026,0.013890,0.000117,-0.003301
2,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_0003,0,-0.004643,-0.026831,-0.050022,-0.018936,-0.154830,0.000146,...,0.028394,0.001837,0.004520,-0.013613,-0.031180,-0.008772,0.009025,0.014085,0.000124,-0.003276
3,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_0004,0,-0.004639,-0.026831,-0.050021,-0.018932,-0.154830,0.000139,...,0.028419,0.001799,0.004563,-0.013676,-0.031154,-0.011862,0.009035,0.013974,0.000129,-0.003243
4,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_0005,0,-0.004639,-0.026831,-0.050018,-0.018938,-0.154830,0.000156,...,0.028452,0.001831,0.005031,-0.010943,-0.030681,-0.010778,0.009035,0.013964,0.000133,-0.003256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3322,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_3333,0,-0.004615,-0.026831,-0.050005,-0.018854,-0.154830,-0.000034,...,0.028296,0.001921,0.005071,-0.012502,-0.031306,-0.011642,0.009035,0.013973,0.000136,-0.003353
3323,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_3334,0,-0.004613,-0.026831,-0.050012,-0.018892,-0.154830,0.000046,...,0.028697,0.001816,0.004960,-0.007998,-0.029868,-0.006294,0.009035,0.014540,0.000126,-0.003230
3324,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_3335,0,-0.004475,-0.026821,-0.049818,-0.018396,-0.154827,-0.000924,...,0.028490,0.002031,0.005113,-0.007207,-0.031035,-0.001707,0.009038,0.014680,0.000138,-0.003367
3325,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_3336,0,-0.004645,-0.026831,-0.050022,-0.018925,-0.154830,0.000128,...,0.028413,0.001858,0.005036,-0.015296,-0.031342,-0.010399,0.009040,0.014040,0.000138,-0.003312


Splited test dataset(C061): torch.Size([3327, 1024]) torch.Size([3327])
- Test in C061 was done.
- Test in total testset was done.

>>>> t5 <<<<


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C018,Escherichia coli K-12 BW25113,BW25113_0001,0,0.011584,-0.027352,0.087529,0.063956,0.125525,-0.200814,...,-0.000678,0.037217,0.216121,-0.166806,0.073070,-0.209313,-0.170845,0.050126,-0.023876,-0.085014
1,C018,Escherichia coli K-12 BW25113,BW25113_0002,0,0.044142,0.075931,0.025655,0.050714,0.006044,0.038834,...,-0.025106,-0.000805,-0.010590,-0.010165,0.020754,-0.043121,-0.023813,0.020034,-0.029119,0.034619
2,C018,Escherichia coli K-12 BW25113,BW25113_0003,0,0.088072,0.042789,0.006048,0.028320,0.002196,0.055957,...,-0.023607,-0.033983,-0.003197,-0.030053,0.010648,-0.044762,-0.070817,-0.027704,-0.031297,-0.000393
3,C018,Escherichia coli K-12 BW25113,BW25113_0004,0,0.048865,0.066090,0.019594,0.047430,0.002054,0.009558,...,0.003374,-0.027352,-0.037960,-0.013359,0.032385,0.009352,-0.019783,-0.003624,-0.010106,0.038098
4,C018,Escherichia coli K-12 BW25113,BW25113_0005,0,0.057724,0.069290,0.000376,-0.004856,0.010432,-0.039151,...,-0.041921,0.009875,-0.012586,-0.079206,0.067018,-0.038756,-0.056466,0.038706,0.028096,0.047220
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4308,C018,Escherichia coli K-12 BW25113,BW25113_4702,0,-0.030485,0.009393,0.037870,-0.084949,-0.014988,0.022290,...,-0.045453,0.122582,0.036599,-0.158354,0.017079,0.071776,-0.077362,0.167553,0.031955,-0.056729
4309,C018,Escherichia coli K-12 BW25113,BW25113_4703,0,0.006352,0.037870,-0.058590,-0.036578,0.066644,-0.015083,...,0.026498,-0.059437,0.013125,-0.206001,0.028325,-0.005555,-0.049957,0.092794,0.075111,-0.125499
4310,C018,Escherichia coli K-12 BW25113,BW25113_4705,0,0.031782,-0.066062,0.030549,-0.082723,0.002601,0.004915,...,-0.013472,-0.049079,0.063062,-0.115208,0.015199,0.008139,-0.083757,0.024483,0.053137,0.007785
4311,C018,Escherichia coli K-12 BW25113,BW25113_4706,0,0.036987,-0.017611,-0.028287,-0.019906,0.025342,0.020008,...,-0.029651,0.032883,0.026623,-0.147937,-0.055119,-0.061302,0.017104,0.147023,0.019259,-0.080418


Splited test dataset(C018): torch.Size([4313, 1024]) torch.Size([4313])
- Test in C018 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C039,Pseudomonas aeruginosa MPAO1,PA0001,1,0.024235,0.022193,-0.013932,0.041372,-0.006294,0.006999,...,-0.018425,0.008666,-0.053938,-0.015829,0.021349,0.039900,-0.021771,-0.005465,-0.033384,0.010373
1,C039,Pseudomonas aeruginosa MPAO1,PA0002,1,0.009141,-0.002576,-0.007195,0.012134,-0.015342,0.005985,...,0.035174,0.047997,-0.036789,-0.037174,0.017118,-0.020300,0.011556,-0.000560,-0.013514,0.059980
2,C039,Pseudomonas aeruginosa MPAO1,PA0003,0,0.041465,0.006352,-0.002384,-0.002025,-0.001963,0.010232,...,-0.038741,0.029714,-0.016216,-0.048981,-0.030838,-0.022068,-0.049107,-0.026446,-0.001247,0.000940
3,C039,Pseudomonas aeruginosa MPAO1,PA0004,1,0.079612,0.047627,0.012035,0.044456,0.011636,0.005698,...,0.012982,-0.012235,-0.046952,-0.031784,0.058982,-0.050691,-0.032334,-0.038304,-0.044278,0.036417
4,C039,Pseudomonas aeruginosa MPAO1,PA0005,0,0.022074,0.013658,0.011562,-0.027738,-0.022710,0.038536,...,-0.012599,0.046617,0.016559,-0.078393,0.014367,0.029613,0.030858,-0.027727,0.014146,0.023371
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5565,C039,Pseudomonas aeruginosa MPAO1,PA5566,0,-0.001936,0.113273,0.039776,-0.019768,-0.067865,-0.022220,...,0.011913,-0.011985,-0.061749,-0.062286,-0.038123,0.078364,0.051907,0.105583,0.046238,-0.007029
5566,C039,Pseudomonas aeruginosa MPAO1,PA5567,0,0.051280,0.035925,0.003701,0.064181,-0.013409,0.011024,...,0.001760,-0.019178,-0.028146,-0.031394,-0.018709,-0.016755,-0.011252,-0.041947,-0.029829,-0.004253
5567,C039,Pseudomonas aeruginosa MPAO1,PA5568,1,0.036627,0.042347,0.009739,0.031589,-0.008178,0.030534,...,-0.009959,0.009829,-0.016694,-0.032859,0.030945,-0.018591,-0.025247,-0.035370,-0.033299,0.044677
5568,C039,Pseudomonas aeruginosa MPAO1,PA5569,1,0.086184,-0.061592,0.050985,-0.030751,-0.022678,0.055324,...,-0.027935,0.046289,0.007596,-0.053347,-0.020113,-0.003128,-0.007353,0.029771,-0.013180,0.016786


Splited test dataset(C039): torch.Size([5570, 1024]) torch.Size([5570])
- Test in C039 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C035,Caulobacter crescentus,CCNA_00001,0,0.020058,0.011668,0.030414,0.023882,-0.010837,-0.014980,...,-0.007870,0.025724,-0.016352,-0.036573,0.001705,0.061251,0.011432,-0.008975,0.018777,0.026789
1,C035,Caulobacter crescentus,CCNA_00002,0,-0.025983,-0.008170,0.005247,0.053943,-0.028749,0.060893,...,-0.028033,0.010778,0.041618,-0.057536,-0.034405,0.048157,-0.033823,-0.013403,-0.027308,-0.010038
2,C035,Caulobacter crescentus,CCNA_00003,1,0.024779,-0.001617,0.023675,0.018376,-0.033304,0.013851,...,0.012675,-0.053094,-0.010201,-0.060865,-0.031287,0.041388,-0.013631,-0.005432,0.006609,0.039694
3,C035,Caulobacter crescentus,CCNA_00004,1,0.043007,0.022084,0.018602,0.047121,-0.037631,0.035858,...,-0.055712,-0.056001,-0.008636,-0.041914,-0.006228,0.025181,-0.041679,-0.003226,0.001496,-0.003528
4,C035,Caulobacter crescentus,CCNA_00005,1,0.027681,0.015576,0.001451,0.040875,-0.025243,-0.005219,...,-0.041004,-0.041577,-0.020063,-0.043437,0.002943,0.061310,-0.033076,-0.050576,-0.033045,0.024842
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3881,C035,Caulobacter crescentus,CCNA_03995,0,0.044647,0.033466,-0.050714,-0.026918,-0.028675,-0.157701,...,0.057175,0.014047,0.050637,-0.072821,-0.010288,-0.014529,0.029304,0.164122,0.091785,-0.037428
3882,C035,Caulobacter crescentus,CCNA_03996,0,0.179715,0.020425,-0.091150,-0.093731,0.001956,-0.021912,...,-0.001070,0.087600,0.108491,-0.083115,0.051162,-0.082106,-0.076152,0.086164,-0.022795,-0.045483
3883,C035,Caulobacter crescentus,CCNA_03997,0,-0.031976,0.072945,-0.015456,-0.061065,0.034636,-0.075261,...,-0.025199,-0.017073,0.023233,0.007074,0.052571,0.031189,-0.027054,0.076396,0.075286,0.068603
3884,C035,Caulobacter crescentus,CCNA_03998,0,0.024063,-0.020369,-0.008308,-0.000595,0.000381,0.035635,...,0.000596,-0.037611,-0.026730,-0.043442,-0.019756,0.029615,-0.010306,-0.033816,0.001394,0.027850


Splited test dataset(C035): torch.Size([3886, 1024]) torch.Size([3886])
- Test in C035 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C033,Salmonella enterica subsp. enterica serovar Ty...,-,0,0.065683,0.032795,0.018285,0.036632,-0.025111,-0.007229,...,0.018181,-0.035341,-0.016494,-0.089050,0.027582,0.055476,-0.059411,-0.065098,0.057698,0.000330
1,C033,Salmonella enterica subsp. enterica serovar Ty...,t0001,0,0.018239,-0.030508,0.078645,0.082283,0.127463,-0.197618,...,0.027051,0.021510,0.231686,-0.173741,0.079085,-0.236435,-0.114395,0.054386,-0.030780,-0.075775
2,C033,Salmonella enterica subsp. enterica serovar Ty...,t0002,0,0.043713,0.078354,0.026243,0.049889,0.004885,0.038013,...,-0.022686,-0.003278,-0.010358,-0.010717,0.021367,-0.041086,-0.026043,0.020208,-0.029272,0.032302
3,C033,Salmonella enterica subsp. enterica serovar Ty...,t0003,0,0.083051,0.047820,0.007012,0.029369,-0.004415,0.054288,...,-0.021937,-0.035736,0.000474,-0.025479,0.003724,-0.044224,-0.078591,-0.024167,-0.032960,-0.003994
4,C033,Salmonella enterica subsp. enterica serovar Ty...,t0004,0,0.049332,0.071890,0.020884,0.042220,-0.002572,0.009770,...,0.005520,-0.032038,-0.037331,-0.013604,0.030826,0.010263,-0.022842,-0.002780,-0.010440,0.036847
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4318,C033,Salmonella enterica subsp. enterica serovar Ty...,t4635,0,0.012097,-0.065290,0.000432,-0.009828,-0.017065,0.019812,...,-0.025016,-0.032792,-0.046223,-0.073268,0.025904,0.040681,0.015038,-0.036431,0.047604,0.044033
4319,C033,Salmonella enterica subsp. enterica serovar Ty...,t4636,0,-0.024641,0.001389,0.042155,0.021389,0.047794,-0.015396,...,0.037738,-0.035077,-0.044523,-0.048968,0.001672,0.009992,-0.022698,-0.018696,0.022103,-0.005804
4320,C033,Salmonella enterica subsp. enterica serovar Ty...,t4637,0,0.062484,0.017408,0.031820,0.050831,0.035122,0.007927,...,-0.041288,0.033037,-0.025965,-0.057013,0.085826,-0.016592,0.005705,-0.033826,-0.054984,0.018048
4321,C033,Salmonella enterica subsp. enterica serovar Ty...,t4638,0,0.104638,0.106250,-0.003138,-0.028262,0.015430,-0.032766,...,-0.022631,-0.021204,0.079132,-0.110890,0.037328,-0.076325,-0.088044,0.042978,-0.037957,-0.048862


Splited test dataset(C033): torch.Size([4323, 1024]) torch.Size([4323])
- Test in C033 was done.


'Raw data:'

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_0001,1,-0.025382,-0.054158,-0.041020,0.025128,-0.022723,-0.022706,...,-0.005814,-0.028534,-0.036309,-0.044612,-0.012766,0.044037,-0.005584,-0.023319,-0.024667,0.015848
1,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_0002,1,-0.018260,-0.021806,-0.000753,0.003539,-0.033552,0.010649,...,0.050197,0.036141,-0.020034,-0.039409,0.030300,0.007444,0.038922,0.000095,0.024009,0.066186
2,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_0003,0,0.006159,0.021244,0.028599,0.034836,0.013581,-0.076633,...,0.017829,-0.071718,0.027697,-0.011705,-0.032012,0.071740,-0.017354,0.017639,-0.000121,0.024740
3,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_0004,0,0.007004,-0.014530,0.010058,0.001008,-0.046316,0.033357,...,-0.004594,-0.042138,-0.019864,-0.048953,-0.057098,0.028646,0.039517,-0.022908,0.023727,0.008576
4,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_0005,0,0.024031,0.042662,-0.003146,0.069239,-0.064750,-0.000631,...,-0.034455,-0.005914,-0.011770,-0.015676,-0.028455,-0.037863,-0.001682,-0.012595,-0.001026,0.016611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3322,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_3333,0,0.001518,0.037248,0.022039,-0.012143,0.010487,0.013753,...,-0.005680,0.012004,0.037781,-0.027722,0.031395,0.001208,-0.048559,-0.011862,0.040373,-0.008763
3323,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_3334,0,-0.008089,-0.027680,-0.016765,0.012398,0.039769,-0.032880,...,-0.019672,-0.026439,0.006928,-0.045812,-0.057364,0.025876,0.050790,0.003421,-0.016292,-0.007993
3324,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_3335,0,0.057358,0.024541,0.019398,-0.049149,-0.003834,0.046475,...,-0.015658,0.012642,0.041163,-0.049470,-0.015128,-0.009704,-0.014906,-0.017245,0.044353,-0.025788
3325,C061,Brevundimonas subvibrioides ATCC 15264,Bresu_3336,0,0.042842,-0.016897,-0.009269,0.021328,0.008204,0.009895,...,-0.043496,-0.033051,-0.001590,-0.045391,0.065838,-0.040051,0.016861,-0.024124,0.035798,0.011222


Splited test dataset(C061): torch.Size([3327, 1024]) torch.Size([3327])
- Test in C061 was done.
- Test in total testset was done.


'Model performance:'

Unnamed: 0,emb,file,organism,tp,fp,tn,fn,mcc,acc,f1,prc,rec,npv,tnr,auc-roc,auc-pr
0,clstm,C018,C018,233,229,3786,65,0.594411,0.931834,0.613158,0.504329,0.781879,0.983121,0.942964,0.910965,0.586572
1,clstm,C039,C039,202,338,4881,149,0.419433,0.912567,0.453423,0.374074,0.575499,0.970378,0.935237,0.831878,0.479215
2,clstm,C035,C035,239,270,3136,241,0.408286,0.868502,0.483316,0.469548,0.497917,0.928635,0.920728,0.811394,0.515062
3,clstm,C033,C033,313,347,3622,41,0.607401,0.910248,0.617357,0.474242,0.884181,0.988807,0.912572,0.949381,0.775461
4,clstm,C061,C061,216,224,2689,198,0.433438,0.873159,0.505855,0.490909,0.521739,0.931417,0.923103,0.801258,0.503849
5,clstm,total,all,1203,1408,18114,694,0.488073,0.901863,0.533718,0.460743,0.634159,0.963101,0.927876,0.848067,0.54062
6,t5,C018,C018,269,372,3643,29,0.577528,0.907025,0.57295,0.419657,0.902685,0.992102,0.907347,0.947603,0.696499
7,t5,C039,C039,327,414,4805,24,0.609819,0.921364,0.598901,0.441296,0.931624,0.99503,0.920674,0.976866,0.818725
8,t5,C035,C035,415,341,3065,65,0.63543,0.895522,0.671521,0.548942,0.864583,0.979233,0.899883,0.946234,0.780424
9,t5,C033,C033,332,325,3644,22,0.653764,0.919732,0.656775,0.505327,0.937853,0.993999,0.918115,0.968341,0.86353
