### Import and global config

In [1]:
%env TF_CPP_MIN_LOG_LEVEL=3

env: TF_CPP_MIN_LOG_LEVEL=3


In [2]:
from datetime import datetime
from itertools import product
import re
import json
import math
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.data import Dataset
from tensorflow.keras import backend
import keras_tuner

In [4]:
def check_and_set_gpu():
    gpu_list = tf.config.list_physical_devices('GPU')
    if len(gpu_list) == 0:
        print("No available GPU!")
    else:
        try:
            tf.config.experimental.set_memory_growth(gpu_list[0], True)
            print("Enable VRAM growth")
        except e:
            print(e)
check_and_set_gpu()

Enable VRAM growth


In [5]:
# for reproducibility
SEED = 42
keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism()

# for network debugging
# tf.debugging.enable_check_numerics()
# tf.debugging.experimental.enable_dump_debug_info(  #  incompatible with enable_op_determinism()
#     f"./logs/tfdbg2_logdir",
#     tensor_debug_mode="FULL_HEALTH",
#     circular_buffer_size=-1
# )

### load data

In [6]:
DATA_PATH = "./data/airrship_shm_seed42_100_000.tsv"
df_data = pd.read_csv(DATA_PATH, sep="\t")
df_data.head()

Unnamed: 0,sequence_id,sequence,productive,stop_codon,vj_in_frame,v_call,d_call,j_call,junction,junction_aa,...,d_sequence_start,d_sequence_end,j_sequence_start,j_sequence_end,shm_events,shm_count,shm_freq,unmutated_sequence,gapped_unmutated_sequence,gapped_mutated_sequence
0,0,CAGGTGCAGCTGCGGGAGTCGGGCCCAGGGCTGGTGAAGCCTTTGG...,T,T,F,IGHV4-61*08,IGHD3-3*02,IGHJ4*01,TGCGCGAGGCCGCCAGGTGTATCAGCATTTAGGAGGACACCCGCTT...,CARPPGVSAFRRTPAWDFDPW,...,307,318,338,382,"14:A>G,30:A>G,44:C>T,68:C>A,84:C>T,89:G>C,90:C...",28,0.073298,CAGGTGCAGCTGCAGGAGTCGGGCCCAGGACTGGTGAAGCCTTCGG...,CAGGTGCAGCTGCAGGAGTCGGGCCCA...GGACTGGTGAAGCCTT...,CAGGTGCAGCTGCGGGAGTCGGGCCCA...GGGCTGGTGAAGCCTT...
1,1,CAGGTCACTTTGAGGGAGTCTGGTCCTGCGCTGGTGAAACCCACAC...,T,T,F,IGHV2-70*19,IGHD2-8*02,IGHJ4*01,TGTGCACGGGGGCATGTCCACGATAGGGTCTTTCCGAGAGTTGACT...,CARGHVHDRVFPRVDFW,...,310,313,329,370,"9:C>T,72:C>T,82:T>C,85:C>A,88:A>T,100:A>G,105:...",20,0.054054,CAGGTCACCTTGAGGGAGTCTGGTCCTGCGCTGGTGAAACCCACAC...,CAGGTCACCTTGAGGGAGTCTGGTCCT...GCGCTGGTGAAACCCA...,CAGGTCACTTTGAGGGAGTCTGGTCCT...GCGCTGGTGAAACCCA...
2,2,GAGGTGCAGCTCCCGGAGTCTGGGGGCGGCCTGGTACAGCCTGGGG...,T,T,F,IGHV3-23*03,IGHD5-24*01,IGHJ4*01,TGTGCGAGAGACGGAAAAAAGAGACCCGACTGG,CARDGKKRPDW,...,305,309,314,349,"12:G>C,13:T>C,14:T>C,27:A>C,31:T>C,71:C>T,90:C...",34,0.097421,GAGGTGCAGCTGTTGGAGTCTGGGGGAGGCTTGGTACAGCCTGGGG...,GAGGTGCAGCTGTTGGAGTCTGGGGGA...GGCTTGGTACAGCCTG...,GAGGTGCAGCTCCCGGAGTCTGGGGGC...GGCCTGGTACAGCCTG...
3,3,CAGGTGCAGCTGGTGGAGTCTGGGGGAGGCGTGGACCAGCCTGGGA...,T,T,F,IGHV3-33*05,IGHD3/OR15-3a*01,IGHJ4*01,TGTGCGAGAGACAAAAATTTGGGACTGGCCGGGAACTTCTTTGACT...,CARDKNLGLAGNFFDYW,...,304,313,326,367,"35:T>A,72:G>T,92:G>C,98:G>A,132:G>A,151:A>T,15...",17,0.046322,CAGGTGCAGCTGGTGGAGTCTGGGGGAGGCGTGGTCCAGCCTGGGA...,CAGGTGCAGCTGGTGGAGTCTGGGGGA...GGCGTGGTCCAGCCTG...,CAGGTGCAGCTGGTGGAGTCTGGGGGA...GGCGTGGACCAGCCTG...
4,4,GAGGTGCAGCTGGTGGAGTCTGGGGGAGGCTTAGTTCAGCCTGGGG...,T,T,F,IGHV3-74*02,IGHD1-26*01,IGHJ4*01,TGTGCAAGACAAGTGGGGGGCAATATCGACCACCTTTCGAAATACT...,CARQVGGNIDHLSKYYW,...,298,301,329,367,"89:G>C,93:C>T,97:T>C,119:C>T,138:G>A,147:A>C,1...",14,0.038147,GAGGTGCAGCTGGTGGAGTCTGGGGGAGGCTTAGTTCAGCCTGGGG...,GAGGTGCAGCTGGTGGAGTCTGGGGGA...GGCTTAGTTCAGCCTG...,GAGGTGCAGCTGGTGGAGTCTGGGGGA...GGCTTAGTTCAGCCTG...


In [7]:
pos_names = [f"{seg}_sequence_{pos}" for seg in "vdj" for pos in ("start", "end")]
sequences = df_data["sequence"]
positions = df_data[pos_names]

display(sequences.notna().all())
display(positions.notna().all())

True

v_sequence_start    True
v_sequence_end      True
d_sequence_start    True
d_sequence_end      True
j_sequence_start    True
j_sequence_end      True
dtype: bool

In [8]:
ds_seq = Dataset.from_tensor_slices(sequences.to_numpy())
ds_pos = Dataset.from_tensor_slices(positions.to_numpy())
ds_all = Dataset.zip((ds_seq, ds_pos))

display(ds_all.take(1).get_single_element())
display(ds_all.cardinality().numpy())

(<tf.Tensor: shape=(), dtype=string, numpy=b'CAGGTGCAGCTGCGGGAGTCGGGCCCAGGGCTGGTGAAGCCTTTGGAGACCCTGTCCCTCACCTGCAATGTCTCTGGTGGCTCTGTCACTAGTGGTGGTTACTACTGGAGTTGGGTCCGGCTGACCCCAGGGAAGGGACTGGACTGGATTGGTTTTCTTTATTACAGTGGGAGTACCAATTACAACCCCTCCCTCGAGACTCGAGTCACCATATCAGTAGACACGGCCAAGAACCAGTTCTCTCTGAAGGTGAGCTCTGTGACCGCTGCGGACACGGCCGTGTATTACTGCGCGAGGCCGCCAGGTGTATCAGCATTTAGGAGGACACCCGCTTGGGACTTTGACCCCTGGGGCCATGGAACCCTGGTCACCGTCTCCTCAG'>,
 <tf.Tensor: shape=(6,), dtype=int64, numpy=array([  1, 296, 307, 318, 338, 382])>)

100000

### Functions for encoding

In [9]:
def dna_onehot_tensor(seq):
    table = tf.lookup.StaticHashTable(
        initializer=tf.lookup.KeyValueTensorInitializer(
            keys=tf.constant(["A", "C", "G", "T"], dtype=tf.string),
            values=tf.constant([0, 1, 2, 3]),
        ),
        default_value=tf.constant(-1)
    )
    chars = tf.strings.bytes_split(seq)
    ind = table.lookup(chars)
    encoded = tf.one_hot(ind, depth=4)
    return encoded

# test dna_onehot_tensor
# test_oh = ds_all.take(6).map(
#     lambda x, y: (dna_onehot_tensor(x), y)
# )
# for padded_batch in test_oh.padded_batch(3):
#     display(padded_batch)

In [10]:
def get_kmer_tensor(seq, k):
    chars = tf.strings.bytes_split(seq)
    kmers = tf.strings.ngrams(chars, k, separator="")
    sentence = tf.strings.reduce_join(kmers, separator=" ")
    return sentence

# test get_kmer_tensor()
# test_kmer = ds_all.take(3).map(
#     lambda x, y: (get_kmer_tensor(x, 3), y)
# )
# display(test_kmer.batch(3).take(1).get_single_element())

In [11]:
def get_kmer_vocab(k):
    return ["".join(x) for x in product("ACGT", repeat=k)]

# test get_kmer_vocab()
# vocab_kmer = get_kmer_vocab(3)
# " ".join(vocab_kmer)

### Prepare datasets

In [12]:
TRAIN_SIZE = 99000
VALID_SIZE = 1000
TEST_SIZE = 0

In [13]:
ds_train = ds_all.take(TRAIN_SIZE)
ds_not_train = ds_all.skip(TRAIN_SIZE)
ds_valid = ds_not_train.take(VALID_SIZE)
ds_test = ds_not_train.skip(VALID_SIZE)

assert ds_train.cardinality().numpy() == TRAIN_SIZE
assert ds_valid.cardinality().numpy() == VALID_SIZE
assert ds_test.cardinality().numpy() == TEST_SIZE

In [14]:
def transform_ds(ds, method, scale_factor, batch_size=None, shuffle_buffer=None, shuffle_seed=None):
    assert method == "onehot" or method.endswith("mer")
    assert (shuffle_buffer is not None) ^ (shuffle_seed is None)
    if method == "onehot":
        ds = ds.map(lambda x, y: (dna_onehot_tensor(x), y / scale_factor))
    else:
        k = int(method[0])
        ds = ds.map(lambda x, y: (get_kmer_tensor(x, k), y / scale_factor))
    if not batch_size:
        return ds
    if shuffle_buffer:
        ds = ds.shuffle(shuffle_buffer, seed=shuffle_seed, reshuffle_each_iteration=True)
    batched = ds.padded_batch(batch_size) if method == "onehot" else ds.batch(batch_size)
    return batched
                    
# # test encode_ds()
# for batch_seq, batch_pos in transform_ds(ds_train.take(6), "onehot", 450, 3, 5, 1):
#     display(batch_seq.shape)
#     display(batch_pos.shape)   
# display(batch_seq)
# display(batch_pos)
    
# for batch_seq, batch_pos in transform_ds(ds_train.take(6), "3mer", 450, 3, 5, 1):
#     display(batch_seq.shape)
#     display(batch_pos.shape)
# display(batch_seq)
# display(batch_pos)

# batched = transform_ds(ds_train, "3mer", 450, 32, 128, SEED)
# display(batched.cardinality().numpy())
# prefetched = batched.prefetch(8)
# display(prefetched.cardinality().numpy())

### Functions and classes for building model

In [15]:
# Custom metrics
class ScaledRMSE(keras.metrics.RootMeanSquaredError):
    def __init__(self, scale_factor, name="s_rmse", dtype=None):
        super().__init__(name, dtype=dtype)
        self.scale_factor = scale_factor
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.math.rint(tf.cast(y_true, self._dtype) * self.scale_factor)
        y_pred = tf.cast(y_pred, self._dtype) * self.scale_factor
        super().update_state(y_true, y_pred, sample_weight)
        
class ScaledMAE(keras.metrics.MeanAbsoluteError):
    def __init__(self, scale_factor, name="s_mae", dtype=None):
        super().__init__(name, dtype=dtype)
        self.scale_factor = scale_factor
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.math.rint(tf.cast(y_true, self._dtype) * self.scale_factor)
        y_pred = tf.cast(y_pred, self._dtype) * self.scale_factor
        super().update_state(y_true, y_pred, sample_weight)
        
class PosAccuracy(keras.metrics.Accuracy):
    def __init__(self, scale_factor, name="p_acc", dtype=None):
        super().__init__(name, dtype=dtype)
        self.scale_factor = scale_factor
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.math.rint(tf.cast(y_true, self._dtype) * self.scale_factor)
        y_pred = tf.math.rint(tf.cast(y_pred, self._dtype) * self.scale_factor)
        super().update_state(y_true, y_pred, sample_weight)
        
class AllAccuracy(keras.metrics.Accuracy):
    def __init__(self, scale_factor, name="a_acc", dtype=None):
        super().__init__(name, dtype=dtype)
        self.scale_factor = scale_factor
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.math.rint(tf.cast(y_true, self._dtype) * self.scale_factor)
        y_pred = tf.math.rint(tf.cast(y_pred, self._dtype) * self.scale_factor)
        all_correct_bool = tf.reduce_all(y_true == y_pred, axis=-1)
        correct_mat = tf.where(all_correct_bool, 1, 0)
        y_pred = tf.reshape(correct_mat, (-1, 1))
        y_true = tf.ones(tf.shape(y_true)[0], 1)
        super().update_state(y_true, y_pred, sample_weight)
        
class ClassAccuracy(keras.metrics.Metric):
    def __init__(self, scale_factor, name="c_acc", **kwargs):
        super().__init__(name=name, **kwargs)
        self.scale_factor = scale_factor
        self.len_sum = self.add_weight(name="len_sum", initializer="zeros")
        self.correct_sum = self.add_weight(name="correct_sum", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.math.rint(tf.cast(y_true, self._dtype) * self.scale_factor)
        y_pred = tf.math.rint(tf.cast(y_pred, self._dtype) * self.scale_factor)
        incorrect_count = tf.reduce_sum(tf.abs(y_true - y_pred))
        batch_len_sum = tf.reduce_sum(tf.gather(y_true, indices=(5), axis=-1))
        self.len_sum.assign_add(batch_len_sum)
        self.correct_sum.assign_add(batch_len_sum - incorrect_count)

    def result(self):
        return self.correct_sum / self.len_sum

    def reset_state(self):
        self.len_sum.assign(0.0)
        self.correct_sum.assign(0.0)
        
class SegmentPosAccuracy(PosAccuracy):
    def __init__(self, scale_factor, segment, name="p_acc", dtype=None):
        assert segment in ("V", "D", "J")
        super().__init__(scale_factor, name=f"{segment}_{name}", dtype=dtype)
        self.segment = segment
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        seg_idx_dict = {"V": (0, 1), "D": (2, 3), "J": (4, 5)}
        seg_idx = seg_idx_dict[self.segment]
        y_true_seg = tf.gather(y_true, indices=seg_idx, axis=-1)
        y_pred_seg = tf.gather(y_pred, indices=seg_idx, axis=-1)
        super().update_state(y_true_seg, y_pred_seg, sample_weight)
        
class SegmentRMSE(ScaledRMSE):
    def __init__(self, scale_factor, segment, name="s_rmse", dtype=None):
        assert segment in ("V", "D", "J")
        super().__init__(scale_factor, name=f"{segment}_{name}", dtype=dtype)
        self.segment = segment
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        seg_idx_dict = {"V": (0, 1), "D": (2, 3), "J": (4, 5)}
        seg_idx = seg_idx_dict[self.segment]
        y_true_seg = tf.gather(y_true, indices=seg_idx, axis=-1)
        y_pred_seg = tf.gather(y_pred, indices=seg_idx, axis=-1)
        super().update_state(y_true_seg, y_pred_seg, sample_weight)
        
class SegmentMAE(ScaledMAE):
    def __init__(self, scale_factor, segment, name="s_mae", dtype=None):
        assert segment in ("V", "D", "J")
        super().__init__(scale_factor, name=f"{segment}_{name}", dtype=dtype)
        self.segment = segment
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        seg_idx_dict = {"V": (0, 1), "D": (2, 3), "J": (4, 5)}
        seg_idx = seg_idx_dict[self.segment]
        y_true_seg = tf.gather(y_true, indices=seg_idx, axis=-1)
        y_pred_seg = tf.gather(y_pred, indices=seg_idx, axis=-1)
        super().update_state(y_true_seg, y_pred_seg, sample_weight)

In [16]:
class CustomEarlyStopping(keras.callbacks.EarlyStopping):
    def __init__(self, start_from_epoch=0, **kwargs):
        super().__init__(**kwargs)
        self.start_from_epoch = start_from_epoch
    
    def on_epoch_end(self, epoch, logs=None):
        if epoch < self.start_from_epoch:
            return
        super().on_epoch_end(epoch=epoch, logs=logs)

In [17]:
class WriteLearningRate(keras.callbacks.Callback):
    def __init__(self, logdir):
        super().__init__()
        self.writer = tf.summary.create_file_writer(logdir + "/lr")
    
    def on_batch_begin(self, batch, logs=None):
        lr = keras.backend.eval(self.model.optimizer.learning_rate)
        step = keras.backend.eval(self.model.optimizer.iterations)
        if isinstance(lr, keras.optimizers.schedules.LearningRateSchedule):
            val = lr(step)
        else:
            val = lr
        with self.writer.as_default():
            tf.summary.scalar("learning_rate", val, step=step)

In [18]:
class PrintLearningRate(keras.callbacks.Callback):
    def on_batch_begin(self, batch, logs=None):
        lr = keras.backend.eval(self.model.optimizer.learning_rate)
        step = keras.backend.eval(self.model.optimizer.iterations)
        if isinstance(lr, keras.optimizers.schedules.LearningRateSchedule):
            self.val = lr(step)
        else:
            self.val = lr
    def on_epoch_end(self, epoch, logs=None):
        tf.print(f"learning rate: {self.val}")

In [19]:
class CustomModel(keras.Model):
    
    def set_model(self, replace_nan=None, replace_inf=None, warn=False):
        self.replace_nan = tf.cast(replace_nan, dtype=tf.float32) if replace_nan else False
        self.replace_inf = tf.cast(replace_inf, dtype=tf.float32) if replace_inf else False
        self.warn = warn
            
    def train_step(self, data):
        
        # Compute gradients
        x, y = data
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        step = self.optimizer.iterations
        
        # Check and process nan and Inf
        for i in range(len(gradients)):
            grad = gradients[i]
            var_name = trainable_vars[i].name
            
            nan_bool = tf.math.is_nan(grad)
            if self.warn:
                show_nan = f"Batch {step}: Nan in gradients of {var_name}"
                tf.cond(tf.reduce_any(nan_bool), lambda: tf.print(show_nan), tf.no_op)
            if self.replace_nan:
                if isinstance(grad, tf.IndexedSlices):
                    grad = tf.convert_to_tensor(grad)
                grad = tf.where(nan_bool, self.replace_nan, grad)
                gradients[i] = grad
            
            inf_bool = tf.math.is_inf(grad)
            if self.warn:
                show_inf = f"Batch {step}: Inf in gradients of {var_name}"
                tf.cond(tf.reduce_any(inf_bool), lambda: tf.print(show_inf), tf.no_op)
            if self.replace_inf:
                if isinstance(grad, tf.IndexedSlices):
                    grad = tf.convert_to_tensor(grad)
                pos_bool = inf_bool & (grad > 0)
                neg_bool = inf_bool & (grad < 0)
                grad = tf.where(pos_bool, self.replace_inf, grad)
                grad = tf.where(neg_bool, -1 * self.replace_inf, grad)
                gradients[i] = grad
                
        # Update
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        self.compiled_metrics.update_state(y, y_pred)
        
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

In [20]:
class CustomHyperModel_2(keras_tuner.HyperModel):
    def build(self, hp):
        model_input = keras.Input(shape=(1,), dtype=tf.string, name=f"input")
        
        # encoder part
        kmer_choice = hp.Choice("kmer_choice", values=["3mer", "4mer", "5mer"], ordered=False)
        embed_dim = hp.Int("embed_dim", min_value=16, max_value=128, step=2, sampling="log")
        text_vec_layer = keras.layers.TextVectorization(
            standardize=None,
            split="whitespace",
            vocabulary=get_kmer_vocab(int(kmer_choice[0])),
            name="text_vectorize"
        )
        
        embedding_layer = keras.layers.Embedding(
            input_dim=text_vec_layer.vocabulary_size(), 
            output_dim=embed_dim, 
            mask_zero=True,
            name="embedding"
        )        
        x = text_vec_layer(model_input)
        x = embedding_layer(x)
        
        # gru part
        gru_num = hp.Int("gru_num", min_value=2, max_value=8)
        gru_unit = hp.Int("gru_unit", min_value=32, max_value=256, step=2, sampling="log")
        gru_dropout_rate = hp.Float("gru_dropout_rate", min_value=0.0, max_value=0.5, step=0.1)
        for i in range(gru_num):
            not_last_gru = (i != (gru_num - 1))
            dropout_rate = gru_dropout_rate if i != 0 else 0.0
            gru_layer = keras.layers.GRU(
                gru_unit, 
                dropout=dropout_rate,
                return_sequences=not_last_gru, 
                name=f"gru_{i+1}"
            )
            gru_layer = keras.layers.Bidirectional(gru_layer, name=f"gru_{i+1}")
            x = gru_layer(x)

        # dense part
        total_layer_num = hp.Int("total_layer_num", min_value=8, max_value=16)
        dense_num = total_layer_num - gru_num
        dense_unit = hp.Int(f"dense_unit", min_value=64, max_value=512, step=2, sampling="log")
        leaky_alpha = hp.Float("leaky_alpha", min_value=0.0, max_value=0.3, step=0.05)
        dense_dropout_rate = hp.Float("dense_dropout_rate", min_value=0.0, max_value=0.1, step=0.05)
        for i in range(dense_num):
            dense_layer = keras.layers.Dense(dense_unit, activation=None, name=f"dense_{i+1}")
            leaky_relu_layer = keras.layers.LeakyReLU(alpha=leaky_alpha, name=f"dense_{i+1}_leaky_relu")
            dense_dropout_layer = keras.layers.GaussianDropout(dense_dropout_rate, name=f"dense_{i+1}_dropout")
            x = dense_layer(x)
            x = leaky_relu_layer(x)
            x = dense_dropout_layer(x)

        # build the model
        output_layer = keras.layers.Dense(6, activation=None, name="output")
        model_output = output_layer(x)
        model = CustomModel(model_input, model_output)
        model.set_model(replace_nan=1e-5, replace_inf=1e-5)
        
        return model

    def fit(self, hp, model, ds_train, ds_valid, shuffle_seed, **fit_kwargs):
        
        # prepare dataset
        scale_factor = 450
        train_batch_size = 192
        valid_batch_size = 32
        prefetch_size = 16
        shuffle_buffer = 256
        kmer_choice = hp.get("kmer_choice")
        ds_train_input = transform_ds(
            ds_train, 
            kmer_choice, 
            scale_factor=scale_factor, 
            batch_size=train_batch_size, 
            shuffle_buffer=shuffle_buffer, 
            shuffle_seed=shuffle_seed
        )
        ds_valid_input = transform_ds(
            ds_valid, 
            kmer_choice, 
            scale_factor=scale_factor, 
            batch_size=valid_batch_size
        )
        ds_train_input = ds_train_input.prefetch(prefetch_size)
        ds_valid_input = ds_valid_input.prefetch(prefetch_size)
        batch_per_epoch = int(ds_train_input.cardinality().numpy())
        
        # compile model and fit
        decay_epoch = 350
        lr_schedule = hp.Choice("lr_schedule", values=["cos_decay", "exp_decay"], ordered=False)
        weight_decay = hp.Choice("weight_decay", values=[1e-3, 3e-3, 1e-2, 3e-2], ordered=True)
        if lr_schedule == "cos_decay":
            lr_schedule = keras.optimizers.schedules.CosineDecay(
                1e-3, decay_steps=batch_per_epoch * decay_epoch, alpha=1e-2, name="cos_decay"
            )
        else:
            decay_rate = 0.95
            decay_steps = batch_per_epoch * math.ceil(decay_epoch / math.log(5e-2, decay_rate))
            lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
                1e-3, 
                decay_steps=decay_steps, 
                decay_rate=decay_rate,
                staircase=False, 
                name="exp_decay"
            )
        optimizer = keras.optimizers.experimental.AdamW(
            learning_rate=lr_schedule,
            weight_decay=weight_decay,
            clipnorm=1e-3
        )
        loss = keras.losses.MeanSquaredError()
        metric_list = [
            ScaledRMSE(scale_factor),
            SegmentRMSE(scale_factor, segment="V"),
            SegmentRMSE(scale_factor, segment="D"),
            SegmentRMSE(scale_factor, segment="J")
        ]
        model.compile(optimizer=optimizer, loss=loss, metrics=metric_list)
        return model.fit(
            x=ds_train_input,
            validation_data=ds_valid_input,
            **fit_kwargs,
        )

# test
# hp = keras_tuner.HyperParameters()
# hypermodel = CustomHyperModel_2()
# model = hypermodel.build(hp)
# model.summary()

### Final training

In [21]:
# load tuner
n_top = 3
tuner_dir = "./tune/final_3"
hypermodel = CustomHyperModel_2()
tuner = keras_tuner.Hyperband(
    hypermodel=hypermodel,
    objective=keras_tuner.Objective("val_s_rmse", direction="min"),
    max_epochs=320,
    factor=6,
    hyperband_iterations=1,
    seed=SEED,
    overwrite=False,
    directory=tuner_dir,
    project_name=tuner_dir.split("/")[-1],
)
tuner.results_summary(n_top)

INFO:tensorflow:Reloading Tuner from ./tune/final_3/final_3/tuner0.json
Results summary
Results in ./tune/final_3/final_3
Showing 3 best trials
Objective(name="val_s_rmse", direction="min")

Trial 0413 summary
Hyperparameters:
kmer_choice: 3mer
embed_dim: 16
gru_num: 6
gru_unit: 64
gru_dropout_rate: 0.2
total_layer_num: 11
dense_unit: 64
leaky_alpha: 0.15000000000000002
dense_dropout_rate: 0.0
lr_schedule: cos_decay
weight_decay: 0.001
tuner/epochs: 320
tuner/initial_epoch: 54
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 0406
Score: 2.036548376083374

Trial 0396 summary
Hyperparameters:
kmer_choice: 4mer
embed_dim: 32
gru_num: 6
gru_unit: 32
gru_dropout_rate: 0.0
total_layer_num: 12
dense_unit: 64
leaky_alpha: 0.15000000000000002
dense_dropout_rate: 0.05
lr_schedule: cos_decay
weight_decay: 0.01
tuner/epochs: 320
tuner/initial_epoch: 54
tuner/bracket: 2
tuner/round: 2
tuner/trial_id: 0392
Score: 2.088806629180908

Trial 0323 summary
Hyperparameters:
kmer_choice: 5mer
embed_dim: 16
g

In [22]:
def final_train(hp, model, ds_train, ds_valid, shuffle_seed, **fit_kwargs):

    # prepare dataset
    scale_factor = 450
    train_batch_size = 32
    valid_batch_size = 32
    prefetch_size = 16
    shuffle_buffer = 256
    kmer_choice = hp.get("kmer_choice")
    ds_train_input = transform_ds(
        ds_train, 
        kmer_choice, 
        scale_factor=scale_factor, 
        batch_size=train_batch_size, 
        shuffle_buffer=shuffle_buffer, 
        shuffle_seed=shuffle_seed
    )
    ds_valid_input = transform_ds(
        ds_valid, 
        kmer_choice, 
        scale_factor=scale_factor, 
        batch_size=valid_batch_size
    )
    ds_train_input = ds_train_input.prefetch(prefetch_size)
    ds_valid_input = ds_valid_input.prefetch(prefetch_size)
    batch_per_epoch = int(ds_train_input.cardinality().numpy())

    # compile model and fit
    decay_epoch = 350
    lr_schedule = hp.get("lr_schedule")
    weight_decay = hp.get("weight_decay")
    if lr_schedule == "cos_decay":
        lr_schedule = keras.optimizers.schedules.CosineDecay(
            1e-3, decay_steps=batch_per_epoch * decay_epoch, alpha=1e-2, name="cos_decay"
        )
    else:
        decay_rate = 0.95
        decay_steps = batch_per_epoch * math.ceil(decay_epoch / math.log(5e-2, decay_rate))
        lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
            1e-3, 
            decay_steps=decay_steps, 
            decay_rate=decay_rate,
            staircase=False, 
            name="exp_decay"
        )

    optimizer = keras.optimizers.experimental.AdamW(
        learning_rate=lr_schedule,
        weight_decay=weight_decay,
        clipnorm=1e-3
    )
    loss = keras.losses.MeanSquaredError()
    metric_list = [
        ScaledRMSE(scale_factor),
        ScaledMAE(scale_factor),
        PosAccuracy(scale_factor),
        AllAccuracy(scale_factor),
        ClassAccuracy(scale_factor),
        SegmentRMSE(scale_factor, segment="V"),
        SegmentRMSE(scale_factor, segment="D"),
        SegmentRMSE(scale_factor, segment="J")
    ]
    model.compile(optimizer=optimizer, loss=loss, metrics=metric_list)
    return model.fit(
        x=ds_train_input,
        validation_data=ds_valid_input,
        **fit_kwargs,
    )

In [23]:
main_logdir = "./logs/final_training"
best_hps = tuner.get_best_hyperparameters(n_top)
best_models = tuner.get_best_models(n_top)
history_list = []
for i, hp, model in zip(range(n_top), best_hps, best_models):
    print(f"top {i+1} training")
    logdir = main_logdir + f"/top_{i+1}"
    tensorboard_cb = keras.callbacks.TensorBoard(
        logdir, 
        histogram_freq=10,
        write_graph=False
    )
    early_stop_cb = keras.callbacks.EarlyStopping(
        monitor="loss",
        patience=10,
        verbose=1,
        mode="min"
    )
    print_lr_cb = PrintLearningRate()
    cb_list = [
        tensorboard_cb, 
        # print_lr_cb,
        early_stop_cb
    ]
    for seg in "VDJ":
        save_model_cb = keras.callbacks.ModelCheckpoint(
            logdir + f"/model/best_{seg}",
            monitor=f"val_{seg}_s_rmse",
            verbose=0,
            save_best_only=True,
            save_weights_only=False,
            mode="min",
            save_freq="epoch"
        )
        cb_list.append(save_model_cb)
    history = final_train(hp, model, ds_train, ds_valid, SEED, epochs=150, verbose=0, callbacks=cb_list)
    history_list.append(history)

top 1 training




INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_1/model/best_J/assets


top 2 training




INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_2/model/best_V/assets


top 3 training




INFO:tensorflow:Assets written to: ./logs/final_training/top_3/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_3/model/best_V/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_3/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_3/model/best_D/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_3/model/best_J/assets


INFO:tensorflow:Assets written to: ./logs/final_training/top_3/model/best_J/assets


Epoch 11: early stopping


In [27]:
for i, history in zip(range(n_top), history_list):
    df_history = pd.DataFrame(history.history)
    df_history.to_csv(main_logdir + f"/top_{i+1}.tsv", sep="\t")