In [None]:
!pip install -r ../requirements.txt

In [1]:
import gc, os, torch, time
import pandas as pd
from torch.utils.data import DataLoader
from data_module import load_embedding_model, embed_batch

In [None]:
# set options
data_path = '../data/exam_data'

max_len = 1600
embed_ver = ['esm3', 'esm2', 'bert', 't5']
set_ver = 'ts'  # all, tr, ts
batch_size = 256

In [None]:
# load protein sequence data
df = pd.read_csv(os.path.join(data_path, 'data-seq_raw.csv'))

display(df)

Unnamed: 0,file_id,genome_id,organism,locus_tag,protein_id,product,locus,strand,dna_seq,dna_len,aa_seq,aa_len,ess
0,C050,CP001363,Salmonella enterica subsp. enterica serovar Ty...,STM14_0001,ACY86538.1,thr operon leader peptide,190..255,+,ATGAACCGCATCAGCACCACCACCATTACCACCATCACCATTACCA...,66,MNRISTTTITTITITTGNGAG,21,0
1,C050,CP001363,Salmonella enterica subsp. enterica serovar Ty...,STM14_0002,ACY86539.1,bifunctional aspartokinase I/homeserinedehydro...,337..2799,+,ATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAAC...,2463,MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNH...,820,0
2,C050,CP001363,Salmonella enterica subsp. enterica serovar Ty...,STM14_0003,ACY86540.1,homoserine kinase,2801..3730,+,ATGGTGAAAGTGTATGCCCCGGCTTCCAGCGCGAACATGAGCGTCG...,930,MVKVYAPASSANMSVGFDVLGAAVTPVDGTLLGDVVSVEAADHFRL...,309,0
3,C050,CP001363,Salmonella enterica subsp. enterica serovar Ty...,STM14_0004,ACY86541.1,threonine synthase,3734..5020,+,ATGAAACTCTATAATCTGAAAGACCATAATGAGCAGGTCAGCTTTG...,1287,MKLYNLKDHNEQVSFAQAVTQGLGKQQGLFFPHDLPEFSLTEIDEM...,428,0
4,C050,CP001363,Salmonella enterica subsp. enterica serovar Ty...,STM14_0005,ACY86542.1,hypothetical protein,5114..5887,-,ATGCTGATTCTGATTTCACCTGCAAAAACGCTTGATTATCAAAGCC...,774,MLILISPAKTLDYQSPLATTRYTQPELLDHSQQLIQQARQLSAPQI...,257,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
283919,O046,CP016816,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0913,AVX54582.1,Tetracycline resistance ribosomal protectionpr...,20416..22335,+,ATGAAAATTATTAATATTGGAGTTTTAGCTCATGTTGATGCAGGAA...,1920,MKIINIGVLAHVDAGKTTLTESLLYNSGAITELGSVDKGTTRTDNT...,639,1
283920,O046,CP016816,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0918,AVX54581.1,Imidazoleglycerol-phosphate dehydratase,18716..19378,-,ATGACAGAGCAGAAAGCCCTAGTAAAGCGTATTACAAATGAAACCA...,663,MTEQKALVKRITNETKIQIAISLKGGPLAIEHSIFPEKEAEAVAEQ...,220,1
283921,O046,CP016816,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0930,AVX54826.1,50S ribosomal protein L33,319048..319191,-,ATGCGTGAAAAATACATTTTACGTTGTACAGTTTGTAAAAATGAAA...,144,MREKYILRCTVCKNENYIGKNDKKKPKIEVSKYCSNCNKHETHKQKK,47,1
283922,O046,CP016816,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0931,AVX54860.1,Adenylyl-sulfate kinase,360403..361011,+,ATGGCTACTAATATTACTTGGCATCCAAATCTTACTTACGACGAAC...,609,MATNITWHPNLTYDERKALRKQDGCTIWLTGLSASGKSTIACALEQ...,202,1


In [None]:
info_col = ['file_id', 'organism', 'locus_tag', 'ess']
valid_aa = 'ACDEFGHIKLMNPQRSTVWY'

# replace invalid residues
df['aa_seq'] = df['aa_seq'].str.replace(f"[^{valid_aa}]", "", regex=True)

# # filter invalid sequences
# df = df[df['aa_seq'].apply(lambda seq: set(seq).issubset(set(valid_aa)))]
# df = df.reset_index(drop=True)

# truncate sequences to max length
df['aa_seq'] = df['aa_seq'].str[:max_len]

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_workers = min(os.cpu_count(), 16)

for emb_ver in embed_ver:
    print(f">>> {emb_ver.upper()} <<<")
    
    # load embedding model
    model, tokenizer = load_embedding_model(emb_ver)
    model.to(device)
    
    # set data-loader
    print("> Data-loader setting...")
    time_total = time.time()
    # get protein sequences
    sequences = df['aa_seq'].tolist()
    print("Number of sequences:", len(sequences))
    # add spaces between letters
    if "bert" in emb_ver.lower() or "t5" in emb_ver.lower():
        sequences = list(map(lambda x: " ".join(x), sequences))
    print("Prepared sequence examples:")
    for s in sequences[:3]:
        print(f"- {s[:10]}... {len(s)}")
    # tokenize the sequences with multi-threading & set data-loader
    if "esm3" in emb_ver.lower():
        dataloader = DataLoader(sequences, batch_size=batch_size, pin_memory=True, num_workers=num_workers)
    else:
        tokenizer.num_threads = num_workers
        tokenized_seqs = [tokenizer(seq, return_tensors='pt') for seq in sequences]
        dataloader = DataLoader(
            tokenized_seqs, batch_size=batch_size, pin_memory=True, num_workers=num_workers, collate_fn=lambda x: x
        )
        print("Tokenized sequence examples:")
        for i in range(3):
            print(f"- ID: {tokenized_seqs[i]['input_ids'][0, :10]}... {tokenized_seqs[i]['input_ids'].shape}")
            print(f"- Mask: {tokenized_seqs[i]['attention_mask'][0, :10]}... {tokenized_seqs[i]['attention_mask'].shape}")
    batch_n = len(dataloader)
    print(f"Number of batches: {batch_n}")
    print(f"=== Data-loader setting complete: {time.time() - time_total:.1f} sec ===")

    # embed the protein sequences
    print("> Protein sequence embedding...")
    embed_allmean, embed_aamean = [], []
    embed_bos, embed_eos = [], []
    embed_first, embed_center, embed_last = [], [], []
    step = 0
    time_total = time.time()
    time_step = time_total
    for batch in dataloader:
        # load batch on GPU
        if 'esm3' not in emb_ver.lower():
            batch = list(map(lambda seq_token: (
                seq_token["input_ids"].to(device, non_blocking=True),
                seq_token["attention_mask"].to(device, non_blocking=True)
            ), batch))
        # embed batch
        emb_allmean, emb_aamean, emb_bos, emb_eos, emb_first, emb_center, emb_last = embed_batch(
            batch=batch, model=model, embed_ver=emb_ver
        )
        torch.cuda.empty_cache()
        # gather embeddings
        embed_allmean.append(emb_allmean)
        embed_aamean.append(emb_aamean)
        embed_bos.append(emb_bos)
        embed_eos.append(emb_eos)
        embed_first.append(emb_first)
        embed_center.append(emb_center)
        embed_last.append(emb_last)
        # display process
        step += 1
        if step % 10 == 0:
            print(f"- Step [{step}/{batch_n}] {time.time() - time_step:.1f} sec")
            time_step = time.time()
    print(f"=== Protein embedding complete: {time.time() - time_total:.1f} sec ===")

    # release model memory
    del model, tokenizer
    torch.cuda.empty_cache()
    gc.collect()

    # postprocessing & save the embeddings
    print("> Postprocessing & saving the embeddings...")
    time_total = time.time()
    time_start = time_total
    embed_sets = [embed_allmean, embed_aamean, embed_bos, embed_eos, embed_first, embed_center, embed_last]
    embed_types = ['allmean', 'aamean', 'bos', 'eos', 'first', 'center', 'last']
    save_path = os.path.join(data_path, f'emb-{emb_ver}_{set_ver}.h5')
    
    df[info_col].to_hdf(save_path, key='info', mode='w', complevel=9, complib='zlib')
    print(f"- '{emb_ver}_info' save complete: {time.time() - time_start:.1f} sec")
    display(df[info_col])
    
    for emb_set, emb_type in zip(embed_sets, embed_types):
        if 't5' in emb_ver.lower() and 'bos' == emb_type.lower():
            continue
        time_start = time.time()
        emb_df = pd.DataFrame(torch.cat(emb_set).numpy())
        emb_df.to_hdf(save_path, key=emb_type, mode='a', complevel=9, complib='zlib')
        print(f"- '{emb_ver}_{emb_type}' save complete: {time.time() - time_start:.1f} sec")
        display(emb_df)
    print(f"=== Save complete: {time.time() - time_total:.1f} sec ===\n")

>>> T5 <<<


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


> Data-loader setting...
Number of sequences: 283924
Prepared sequence examples:
- M N R I S ... 41
- M R V L K ... 1639
- M V K V Y ... 617
Tokenized sequence examples:
- ID: tensor([19, 17,  8, 12,  7, 11, 11, 11, 12, 11])... torch.Size([1, 22])
- Mask: tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])... torch.Size([1, 22])
- ID: tensor([19,  8,  6,  4, 14, 15,  5,  5, 11,  7])... torch.Size([1, 821])
- Mask: tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])... torch.Size([1, 821])
- ID: tensor([19,  6, 14,  6, 18,  3, 13,  3,  7,  7])... torch.Size([1, 310])
- Mask: tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])... torch.Size([1, 310])
Number of batches: 278
=== Data-loader setting complete: 316.0 sec ===
> Protein sequence embedding...
- Step [10/278] 603.3 sec
- Step [20/278] 607.2 sec
- Step [30/278] 586.5 sec
- Step [40/278] 614.7 sec
- Step [50/278] 651.8 sec
- Step [60/278] 595.3 sec
- Step [70/278] 633.3 sec
- Step [80/278] 646.2 sec
- Step [90/278] 602.9 sec
- Step [100/278] 627.8 sec
- Step [110/278] 6

Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0001,0,-0.049371,0.163820,-0.219679,0.019581,0.305149,-0.304640,...,-0.064120,0.158074,0.661177,-0.350553,0.396804,-0.289547,-0.405487,0.199097,-0.023773,-0.520262
1,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0002,0,-0.240329,-0.189138,-0.310265,-0.157506,0.350747,0.154589,...,0.072696,-0.103963,0.213086,0.055001,-0.138454,0.071324,-0.153500,-0.037551,-0.219072,-0.339926
2,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0003,0,-0.204783,-0.034828,0.009298,-0.023510,0.230727,0.022204,...,0.015943,-0.184957,0.070099,-0.000730,0.211045,0.040097,-0.387749,0.135722,-0.091258,0.201138
3,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0004,0,-0.140180,-0.076402,-0.105657,0.084481,0.375071,-0.111160,...,0.038791,-0.307700,0.036717,-0.023990,0.172541,-0.219786,0.089030,-0.047601,0.319219,-0.048020
4,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0005,0,0.080777,0.383700,0.237501,0.369460,-0.094065,0.100681,...,0.269926,-0.076266,0.012823,0.052992,-0.162184,0.128264,0.009878,0.164374,-0.093041,-0.047296
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283919,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0913,1,-0.077053,-0.170203,-0.116610,-0.240810,-0.032757,0.067470,...,-0.137570,0.059220,0.015109,-0.014641,0.118092,-0.006082,-0.058487,0.085743,-0.017411,0.095818
283920,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0918,1,-0.258937,-0.005928,-0.052825,0.155068,0.015876,0.073789,...,-0.309864,-0.106548,0.013895,0.022814,-0.229732,0.167183,0.026737,-0.151487,-0.263022,0.272067
283921,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0930,1,-0.267710,-0.113778,0.252234,0.195408,-0.182631,-0.259615,...,-0.139429,0.164255,-0.138605,-0.127404,0.087189,-0.212414,-0.232907,-0.166602,0.339350,0.184319
283922,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0931,1,-0.075423,0.232160,-0.348512,0.048989,-0.257464,-0.818136,...,0.098422,-0.100611,0.145713,-0.017544,0.135909,-0.240773,0.166520,0.353866,-0.519237,-0.704526


t5_first


Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0001,0,0.065344,-0.126964,-0.423504,0.098970,0.178375,-0.131448,...,-0.220750,0.219721,-0.190850,-0.071231,0.046588,-0.060389,-0.124743,0.070092,-0.018012,0.118681
1,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0002,0,0.229827,-0.151514,-0.020429,0.137528,0.262602,-0.187458,...,0.139755,0.416638,-0.500989,-0.061745,0.079090,0.009843,-0.014082,0.041121,-0.362340,0.314115
2,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0003,0,0.158652,-0.238558,-0.105196,0.101047,0.278562,-0.234516,...,0.045540,0.406883,-0.179488,-0.157577,0.206518,-0.094620,0.035470,0.064153,-0.070354,0.096344
3,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0004,0,0.063999,0.007141,0.223693,-0.030590,0.200305,-0.146545,...,-0.097798,0.209928,-0.324394,0.007730,0.072865,-0.048823,0.023022,0.220458,-0.256884,-0.066301
4,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0005,0,0.263810,-0.049946,0.195230,0.015759,0.203205,-0.074603,...,-0.099785,0.502817,-0.707766,-0.396425,0.340458,-0.173970,-0.011167,0.033248,-0.217184,-0.098505
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283919,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0913,1,0.159041,-0.108905,0.048879,0.423213,-0.066132,-0.132598,...,-0.082205,0.271018,-0.412717,-0.181109,0.163372,0.004195,-0.168535,-0.166800,0.045728,0.189744
283920,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0918,1,0.105252,-0.294758,-0.007108,0.550689,0.161133,-0.329330,...,-0.410333,0.039998,0.035250,0.061170,0.064293,-0.088110,-0.120426,0.088466,-0.196413,0.264433
283921,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0930,1,-0.128271,-0.187882,0.060804,0.263644,0.063790,-0.629232,...,-0.163881,0.013167,-0.057070,-0.065802,-0.005396,0.007249,0.120940,0.183087,0.013950,0.191955
283922,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0931,1,0.174813,-0.147095,-0.043367,0.383806,0.179668,-0.123008,...,-0.344616,0.218524,0.015178,0.042904,0.165866,-0.035368,-0.299651,0.078002,-0.002532,0.120704


t5_last


Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0001,0,0.116065,-0.055074,-0.047198,0.091096,-0.023774,0.035514,...,0.023166,-0.125994,0.095053,-0.114166,-0.172020,-0.337392,0.139445,0.029175,0.068761,0.005447
1,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0002,0,-0.128664,-0.118767,0.073194,0.053941,-0.031853,-0.137136,...,-0.131517,0.051485,0.014105,0.059611,-0.122367,0.004980,-0.087650,0.083387,-0.182388,0.184179
2,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0003,0,-0.126500,-0.058462,0.534104,0.282322,0.208357,0.042368,...,0.004231,-0.001846,0.054372,-0.049039,-0.312130,-0.063957,0.154453,-0.170236,-0.093901,-0.003865
3,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0004,0,-0.004718,-0.091913,0.348961,-0.001013,0.126467,-0.208610,...,-0.192375,-0.072480,0.141099,0.035346,-0.067792,-0.342933,-0.086618,-0.054763,-0.019575,0.204228
4,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0005,0,0.135892,-0.074276,0.199674,0.013650,0.152540,-0.033576,...,-0.229157,-0.026466,-0.049522,-0.060608,0.005451,-0.443518,0.024153,-0.095934,-0.168246,0.302621
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283919,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0913,1,0.049126,-0.130923,0.144488,-0.051700,0.084612,-0.028485,...,-0.106989,0.119977,-0.008777,-0.036728,0.075124,-0.190166,-0.028554,0.059950,-0.118504,0.062652
283920,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0918,1,0.048446,0.047130,0.329764,0.182134,0.067687,0.145212,...,-0.070787,0.164477,-0.077092,-0.058705,-0.006361,-0.234844,-0.144847,-0.138890,-0.131026,-0.178367
283921,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0930,1,-0.048940,-0.046887,0.560575,0.021994,-0.187470,-0.017453,...,-0.089239,0.143004,0.076838,0.055929,0.123544,-0.470247,0.005004,-0.152168,0.165839,-0.067238
283922,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0931,1,-0.153266,-0.052713,0.331571,-0.111520,-0.144880,0.226726,...,-0.262898,-0.194602,0.005124,-0.029744,-0.132702,-0.326389,-0.289782,-0.112341,-0.262503,0.037811


t5_aamean


Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0001,0,0.018239,-0.030508,0.078645,0.082283,0.127463,-0.197618,...,0.027051,0.021510,0.231686,-0.173741,0.079085,-0.236435,-0.114395,0.054386,-0.030780,-0.075775
1,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0002,0,0.043340,0.078926,0.026738,0.050184,0.004291,0.038041,...,-0.023108,-0.003255,-0.010074,-0.010509,0.021057,-0.041092,-0.026289,0.020529,-0.029514,0.032310
2,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0003,0,0.083051,0.047820,0.007012,0.029369,-0.004415,0.054288,...,-0.021937,-0.035736,0.000474,-0.025479,0.003724,-0.044224,-0.078591,-0.024167,-0.032960,-0.003994
3,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0004,0,0.048851,0.071860,0.020825,0.042091,-0.002787,0.009612,...,0.005614,-0.032511,-0.037446,-0.013102,0.030071,0.009861,-0.022703,-0.002080,-0.011043,0.036730
4,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0005,0,0.035259,0.042339,0.024604,-0.025343,-0.006228,0.043092,...,-0.020996,0.023913,-0.012189,-0.067514,0.045659,0.010079,-0.014732,-0.013761,-0.029590,0.063286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283919,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0913,1,0.020563,0.107576,0.027025,0.052937,-0.012762,0.021742,...,0.013594,0.028046,0.010028,-0.060973,0.093713,-0.003019,-0.053488,-0.045007,-0.043997,-0.002393
283920,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0918,1,0.057506,-0.014071,-0.033472,0.067503,-0.016533,0.091051,...,-0.048624,0.031121,0.011467,-0.048133,-0.033131,-0.060800,-0.055844,-0.098644,-0.041724,0.014361
283921,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0930,1,0.003938,-0.011171,0.027588,0.017686,-0.010909,0.041780,...,-0.008057,0.051703,0.032014,-0.148301,0.106521,-0.029680,-0.032345,-0.021705,0.096927,0.099435
283922,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0931,1,0.064550,0.037385,-0.023904,-0.001969,-0.006216,0.048084,...,-0.037994,-0.018542,-0.014736,-0.102566,0.023649,-0.022915,-0.034404,-0.085372,0.044055,0.009911


t5_allmean


Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0001,0,0.016716,-0.031845,0.077063,0.082092,0.122552,-0.181123,...,0.024810,0.019273,0.220462,-0.164246,0.074593,-0.222749,-0.110326,0.050530,-0.027618,-0.068943
1,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0002,0,0.043389,0.078761,0.026800,0.049912,0.004308,0.037941,...,-0.023020,-0.003242,-0.010078,-0.010378,0.021033,-0.041033,-0.026189,0.020466,-0.029421,0.032270
2,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0003,0,0.082988,0.047526,0.007235,0.028937,-0.004398,0.054022,...,-0.021697,-0.035713,0.000324,-0.024995,0.003641,-0.043902,-0.078182,-0.024029,-0.032594,-0.004008
3,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0004,0,0.048859,0.071602,0.020958,0.041720,-0.002771,0.009489,...,0.005706,-0.032422,-0.037412,-0.012804,0.029971,0.009890,-0.022566,-0.002055,-0.010903,0.036635
4,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0005,0,0.035233,0.041948,0.024590,-0.025400,-0.006127,0.042763,...,-0.020623,0.023801,-0.012134,-0.066863,0.045405,0.010130,-0.014574,-0.013681,-0.029350,0.062909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283919,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0913,1,0.020607,0.107341,0.027141,0.052566,-0.012742,0.021671,...,0.013670,0.027996,0.009963,-0.060716,0.093532,-0.003009,-0.053373,-0.044929,-0.043874,-0.002380
283920,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0918,1,0.057345,-0.014208,-0.033131,0.067003,-0.016396,0.090481,...,-0.048009,0.030911,0.011347,-0.047509,-0.032974,-0.060465,-0.055494,-0.098062,-0.041369,0.014156
283921,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0930,1,0.003970,-0.012058,0.028379,0.017963,-0.010615,0.041456,...,-0.005706,0.050292,0.032118,-0.143252,0.103687,-0.029375,-0.031912,-0.021850,0.094137,0.096484
283922,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0931,1,0.064304,0.036855,-0.023804,-0.001995,-0.006205,0.047682,...,-0.037409,-0.018344,-0.014836,-0.101570,0.023398,-0.022747,-0.034048,-0.084793,0.043853,0.009817


t5_eos


Unnamed: 0,file_id,organism,locus_tag,ess,0,1,2,3,4,5,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0001,0,-0.015264,-0.059927,0.043838,0.078085,0.019432,0.165267,...,-0.022252,-0.027712,-0.015241,0.035147,-0.019741,0.064663,-0.024862,-0.030443,0.038772,0.074514
1,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0002,0,0.083816,-0.056523,0.078407,-0.172926,0.018743,-0.043881,...,0.049137,0.007355,-0.013334,0.097031,0.001040,0.007597,0.056037,-0.031496,0.046812,-0.000620
2,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0003,0,0.063757,-0.043234,0.076306,-0.104482,0.000811,-0.028059,...,0.052438,-0.028529,-0.046183,0.124679,-0.022167,0.055612,0.048430,0.018410,0.080696,-0.008328
3,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0004,0,0.052364,-0.038657,0.077991,-0.117237,0.004035,-0.043465,...,0.044863,0.005576,-0.022832,0.114629,-0.013051,0.021898,0.036204,0.008846,0.048777,-0.003891
4,C050,Salmonella enterica subsp. enterica serovar Ty...,STM14_0005,0,0.028592,-0.058579,0.020962,-0.040078,0.019906,-0.041731,...,0.075157,-0.004976,0.002056,0.100405,-0.019802,0.023277,0.026040,0.006802,0.032350,-0.034047
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283919,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0913,1,0.048392,-0.042987,0.101156,-0.184102,0.000341,-0.024134,...,0.061865,-0.003911,-0.031592,0.103593,-0.021997,0.003498,0.020276,0.004865,0.034958,0.005906
283920,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0918,1,0.021840,-0.044302,0.041969,-0.042964,0.013657,-0.034824,...,0.087254,-0.015392,-0.015060,0.089690,0.001533,0.013287,0.021372,0.030093,0.036783,-0.030861
283921,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0930,1,0.005433,-0.053745,0.065550,0.030977,0.003228,0.026217,...,0.104778,-0.016034,0.036963,0.094047,-0.029504,-0.015065,-0.011592,-0.028665,-0.036991,-0.042235
283922,O046,synthetic bacterium JCVI-Syn3A,JCVISYN3A_0931,1,0.014663,-0.070205,-0.003692,-0.007119,-0.003837,-0.033539,...,0.080860,0.021605,-0.035078,0.099716,-0.027324,0.011232,0.037810,0.032123,0.003209,-0.009110


=== Save complete: 1450.1 sec ===

