# Setup

libraries

In [1]:
import os
os.environ["KERAS_BACKEND"] = "torch"

import sys
sys.path.append(f"../../3_train_and_test_models")

import keras
from keras.layers import TorchModuleWrapper

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import warnings
import random
import crested

from math import floor, ceil
from torch import Tensor
from einops import rearrange
from typing import Callable, List, Optional, Union
from collections import defaultdict

# Different libraries to get attributions
#from captum.attr import IntegratedGradients
#from tangermeme.deep_lift_shap import deep_lift_shap
from crested.tl import contribution_scores

# Grab things we need from other models
from params import Params, ROOT, TFS, SPECIES, GENOMES
from Baseline.test import ConvHead, LinearBlock, FeedForwardBlock, GRUBlock
from Baseline.test import BasicModel as BaselineModel
from MORALE.test import FeatureExtractor, Classifier

  from .autonotebook import tqdm as notebook_tqdm


we need to do this because all of our models contain a GRU element

In [2]:
#torch.backends.cudnn.enabled = False # LSTM, GRU, and RNNs are not supported by cuDNN for captum

functions

In [3]:
def get_model_file(model, params):

    model_path = ROOT + "/".join(["/models", params.tf, params.target_species + "_tested", f"{model}/"])

    # Need to get all files that match the specief prefix (model type). If we use MORALE, 
    # we have to return the feature extractor and the classifier models.
    if model == "Baseline":
        model_file_suffix = ".baseline.pt"
        files = [f for f in os.listdir(model_path) if f.endswith(model_file_suffix)]
        latest_file = max([model_path + f for f in files], key=os.path.getctime)
        return latest_file
    elif model == "BM":
        model_file_suffix = ".basic_model.pt"
        files = [f for f in os.listdir(model_path) if f.endswith(model_file_suffix)]
        latest_file = max([model_path + f for f in files], key=os.path.getctime)
        return latest_file
    elif "EvoPS" in model or model == "MORALE":
        fe_file_suffix = ".feature_extractor.pt"
        cl_file_suffix = ".classifier.pt"
        
        fe_files = [f for f in os.listdir(model_path) if f.endswith(fe_file_suffix)]
        cl_files = [f for f in os.listdir(model_path) if f.endswith(cl_file_suffix)]

        latest_fe_file = max([model_path + f for f in fe_files], key=os.path.getctime)
        latest_cl_file = max([model_path + f for f in cl_files], key=os.path.getctime)
        
        return latest_fe_file, latest_cl_file
    else:
        raise ValueError(f"Model {model} not found.")

In [4]:
def get_preds_file(tf, target_species):
    preds_root = ROOT + "/model_out"
    os.makedirs(preds_root, exist_ok=True)
    return f"{preds_root}/BM_{tf}_{target_species}-tested.preds.npy"

def get_labels_file(tf, target_species):    
    preds_root = ROOT + "/model_out"
    os.makedirs(preds_root, exist_ok=True)
    return f"{preds_root}/Baseline_{tf}_{target_species}-tested.labels.npy"

def load_performance_data(verbose=False):
    preds_dict      = defaultdict(lambda : defaultdict(lambda : dict()))
    labels_dict     = defaultdict(lambda : defaultdict(lambda : dict()))
    bound_indices   = defaultdict(lambda : defaultdict(lambda : dict()))
    unbound_indices = defaultdict(lambda : defaultdict(lambda : dict()))

    # Do for each tf-species pair
    for tf in TFS:
        for target in SPECIES:

            preds_file  = get_preds_file(tf=tf, target_species=target)
            labels_file = get_labels_file(tf=tf, target_species=target)
            try:
                # Load them
                preds = np.load(preds_file).squeeze()
                labels = np.load(labels_file).squeeze()

                # Calculate if we need to truncate the labels
                if preds.shape[0] != labels.shape[0]:
                    print("\t\t Preds & labels mismatch! truncating labels\n")
                    labels = labels[:preds.shape[0]]

                assert preds.shape[0] == labels.shape[0]

                # We save predictions from each of the five-folds per model, TF, source, and target
                preds_dict[tf][target]  = preds             
                labels_dict[tf][target] = labels

                # Store unbound and bound indices for all models, TFs, sources, and targets
                bound_indices[tf][target]   = np.nonzero(labels == 1)[0]
                unbound_indices[tf][target] = np.nonzero(labels == 0)[0]
            except:
                print("Could not load regular preds/labels files")

        if verbose:
            print(f"\t\t\t---> Generated dictionaries needed for cnf matrix construction!\n")
    
    return preds_dict, labels_dict, bound_indices, unbound_indices

def generate_confusion_matrix(verbose=False, bound_threshold=0.98, unbound_threshold=0.01):
    """
    Since we are generating attribtuions from bound sites for each TF, we characterize this 
    confusion matrix as one that 'finds' the most extreme sites (i.e. for those that are bound,
    sites that all folds agree as bound (>0.8 is our default)).
    """

    if verbose:
        print("\t\t--> Generating TPs, FPs, TNS, and FNs by looking at bound/unbound sites (in comparison to the predictions).\n")

    # (1) Load the predictions, labels, and bound/unbound indices
    preds_dict, labels_dict, bound_indices, unbound_indices = load_performance_data(verbose=verbose)

    # Indices we need to populate
    ex_tp_indices   = defaultdict(lambda: {})
    ex_fp_indices   = defaultdict(lambda: {})
    ex_tn_indices   = defaultdict(lambda: {})
    ex_fn_indices   = defaultdict(lambda: {})

    # (2) Generate the confusion matrix
    for tf in TFS:
        for target in SPECIES:

            # We use <= 0.5 to classify as unbound 
            bound_predictions_indices       = np.nonzero(preds_dict[tf][target] > bound_threshold)[0]
            unbound_predictions_indices     = np.nonzero(preds_dict[tf][target] <= unbound_threshold)[0]

            # We store these for each example, but they will be constant across target-tfs pairs
            ground_truth_bound_indices      = bound_indices[tf][target]
            ground_truth_unbound_indices    = unbound_indices[tf][target]

            # For each example the models predicted as bound...
            ex_tp_indices[tf][target] = set(bound_predictions_indices).intersection(set(ground_truth_bound_indices))
            ex_fp_indices[tf][target] = set(bound_predictions_indices).intersection(set(ground_truth_unbound_indices))

            # For each example the models predicted as bound...
            ex_tn_indices[tf][target] = set(unbound_predictions_indices).intersection(set(ground_truth_unbound_indices))
            ex_fn_indices[tf][target] = set(unbound_predictions_indices).intersection(set(ground_truth_bound_indices))

    return ex_tp_indices, ex_fp_indices, ex_tn_indices, ex_fn_indices

def get_agreement_sites(params, verbose=False):
    """ 
    We capture and zero-in on the extreme sites that are strongly predicted in the target
    species. We look at the Baseline model in order to do this.
    """

    if verbose:
        print("\t-> Finding the sites to use for attribution\n")

    assert params.target_species in SPECIES, f"Target species must be one of {SPECIES}"

    ex_tp_indices, ex_fp_indices, ex_tn_indices, ex_fn_indices = generate_confusion_matrix(verbose=verbose)
        
    extreme_agreement_sites = defaultdict(lambda : dict())
    for tf in TFS:
        extreme_agreement_sites[tf]["TP"] = ex_tp_indices[tf][params.target_species]
        extreme_agreement_sites[tf]["FP"] = ex_fp_indices[tf][params.target_species]
        extreme_agreement_sites[tf]["TN"] = ex_tn_indices[tf][params.target_species]
        extreme_agreement_sites[tf]["FN"] = ex_fn_indices[tf][params.target_species]

    return extreme_agreement_sites

def read_holdout_bed(holdout_type, params, verbose=True):

    assert holdout_type in ["test", "val"], f"Invalid holdout type {holdout_type}. Please choose from ['test', 'val']"

    print(f"> Converting sites from the {holdout_type} set so that we can use them to get model attributions.\n")

    holdout_bed_path    = f"{ROOT}/data/{params.target_species}/{params.tf}/{holdout_type}_shuf.bed"
    holdout_bed         = pd.read_csv(holdout_bed_path, sep="\t", names=['chrom', 'start', 'end', 'label'], usecols=['chrom','start','end'])

    # Keep only a certain num of exmaples
    if holdout_type == "val":
        holdout_bed = holdout_bed.iloc[:1000000]
    elif holdout_type == "test":
        holdout_bed = holdout_bed.iloc[:2000000]

    # Get agreement sites so we can subset
    agreement_sites = get_agreement_sites(params=params, verbose=verbose)
    holdout_bed     = holdout_bed.iloc[list(agreement_sites[params.tf]["TP"])]

    # Randomly subsample 2,000 sites from the val or test beds
    shuffled_indices = np.random.permutation(len(holdout_bed))[:2000]
    holdout_bed = holdout_bed.iloc[shuffled_indices]

    print(f"> Currently we use all sites types to randomly subsample {len(holdout_bed)} values\n")

    # Create the necessary strings for the regions we need
    chrom_values    = holdout_bed.iloc[:, 0]
    start_values    = holdout_bed.iloc[:, 1]
    end_values      = holdout_bed.iloc[:, 2]

    print(f"> Creating interval regions to use...\n")

    # Create the region strings
    regions = [
        f"{chrom}:{start}-{end}"
        for chrom, start, end in zip(chrom_values, start_values, end_values)
    ]

    shuffled_indices = np.random.permutation(len(regions)) # [:2000]
    regions = [regions[i] for i in shuffled_indices]

    return regions

In [5]:
def get_crested_contributions(intervals, model, genome, target_idx=None):
    """
    Computes contribution scores via expected integrated gradients!
    We use the `crested` library to do it :) I really like it!

    Args:
        intervals: chromosomal regions to use for analysis
        model: The (TensorFlow/PyTorch) model
        genome: The species to get chromosomal regions from

    Returns:
        A dictionary of attribution scores.
    """

    scores, one_hot_encoded_sequences = contribution_scores(
        input=intervals,
        target_idx=target_idx,
        model=model,
        method='integrated_grad',
        genome=GENOMES[genome],
        transpose=True,
        output_dir=None,
        verbose=True
    )

    return scores, one_hot_encoded_sequences

In [6]:
# def read_holdout_bed(holdout_type, params, verbose=True):

#     from seqdataloader.batchproducers.coordbased.core import Coordinates
#     from seqdataloader.batchproducers.coordbased.coordstovals.fasta import PyfaidxCoordsToVals

#     assert holdout_type in ["test", "val"], f"Invalid holdout type {holdout_type}. Please choose from ['test', 'val']"

#     print(f"> Converting sites from the {holdout_type} set so that we can use them to get model attributions.\n")

#     holdout_bed_path    = f"{ROOT}/data/{params.target_species}/{params.tf}/{holdout_type}_shuf.bed"
#     converter           = PyfaidxCoordsToVals(params.genome_files[params.target_species])

#     # Keep only a certain num of exmaples
#     if holdout_type == "val":
#         coords_tmp	= [line.split() for line in open(holdout_bed_path)][:1000000]
#     elif holdout_type == "test":
#         coords_tmp	= [line.split() for line in open(holdout_bed_path)][:2000000]

#     # Subset based on the extreme agreement sites
#     agreement_sites = get_agreement_sites(params=params, verbose=verbose)
    
#     print(f"> Currently we keep all sites no matter the sigmoid value...\n")

#     coords = [Coordinates(coord[0], int(coord[1]), int(coord[2])) for coord in coords_tmp]

#     print(f"> Converting {len(coords)} coordinates to ohes...\n")

#     return converter(coords).transpose((0,2,1))


In [7]:
# def predict_on_seqs(
#     model,
#     genome,
#     x: Union[str, List[str]],
#     device: Union[str, int] = "cpu",
# ) -> np.ndarray:
#     """
#     A simple function to return model predictions directly
#     on a batch of a single batch of sequences in string
#     format.

#     Args:
#         x: DNA sequences as a string or list of strings.
#         device: Index of the device to use

#     Returns:
#         A numpy array of predictions.
#     """

#     # Handle (assumed) interval input
#     import grelu.sequence.format

#     input_seqs = grelu.sequence.format.convert_input_type(
#         x,
#         output_type="one_hot",
#         genome=genome,
#         add_batch_axis=True
#     ).to(device)

#     model = model.eval().to(device)
#     preds = model.forward(input_seqs).detach().cpu().numpy()
#     model = model.cpu()
#     return preds

# def ISM_predict(
#     seqs: Union[pd.DataFrame, np.ndarray, str, List[str]],
#     model: Callable,
#     genome: Optional[str] = None,
#     prediction_transform: Optional[Callable] = None,
#     start_pos: int = 0,
#     end_pos: Optional[int] = None,
#     compare_func: Optional[Union[str, Callable]] = None,
#     devices: Union[str, List[int]] = "cpu",
#     num_workers: int = 1,
#     batch_size: int = 64,
#     return_df: bool = True,
# ) -> Union[np.array, pd.DataFrame]:
#     """
#     Predicts the importance scores of each nucleotide position in a given DNA sequence
#     using the In Silico Mutagenesis (ISM) method.

#     Args:
#         seqs: Input DNA sequences as genomic intervals, strings, or integer-encoded form.
#         genome: Name of the genome to use if a genomic interval is supplied.
#         model: A pre-trained deep learning model
#         prediction_transform: A module to transform the model output
#         start_pos: Index of the position to start applying ISM
#         end_pos: Index of the position to stop applying ISM
#         compare_func: A function or name of a function to compare the predictions for mutated
#             and reference sequences. Allowed names are "divide", "subtract" and "log2FC".
#             If not provided, the raw predictions for both mutant and reference sequences will
#             be returned.
#         devices: Indices of the devices on which to run inference
#         num_workers: number of workers for inference
#         batch_size: batch size for model inference
#         return_df: If True, the ISM results will be returned as a dataframe. Otherwise, they
#             will be returned as a Numpy array.

#     Returns:
#         A numpy array of the predicted scores for each nucleotide position (if return_df = False)
#         or a pandas dataframe with A, C, G, and T as row labels and the bases at each position
#         of the sequence as column labels  (if return_df = True).
#     """
#     from grelu.data.dataset import ISMDataset
#     from grelu.sequence.format import BASE_TO_INDEX_HASH, STANDARD_BASES
#     from grelu.sequence.utils import get_unique_length
#     from grelu.utils import get_compare_func, make_list

#     # Get sequence as string
#     seqs = convert_input_type(seqs, "strings", genome=genome)
#     seqs = make_list(seqs)

#     # Get the last position to mutate
#     if end_pos is None:
#         end_pos = get_unique_length(seqs)

#     # Make dataset
#     ism = ISMDataset(
#         seqs=seqs,
#         positions=range(start_pos, end_pos),
#         drop_ref=False,
#     )

#     # Add transform to model
#     model.add_transform(prediction_transform)

#     # Get predictions for all mutated sequences
#     preds = model.predict_on_dataset(
#         ism,
#         devices=devices,
#         num_workers=num_workers,
#         batch_size=batch_size,
#     )
#     # B, L, 4, T, L

#     if compare_func is not None:

#         # Slice the prediction corresponding to each reference sequence
#         ref_bases = [BASE_TO_INDEX_HASH[seq[start_pos]] for seq in seqs]
#         ref_preds = np.concatenate(
#             [preds[None, None, None, i, 0, x] for i, x in enumerate(ref_bases)]
#         )  # B, L, 1, T, L

#         # Compare all predictions to the prediction for the corresponding reference sequence
#         preds = get_compare_func(compare_func, tensor=False)(preds, ref_preds)

#     # Convert into a dataframe
#     if return_df:
#         if (preds.shape[0] == 1) and (preds.shape[3:] == (1, 1)):
#             preds = preds.squeeze(axis=(0, 3, 4))  # L, 4
#             preds = pd.DataFrame(
#                 preds.T,  # 4, L
#                 index=STANDARD_BASES,
#                 columns=[b for b in seqs[0][start_pos:end_pos]],
#             )
#         else:
#             warnings.warn(
#                 "Cannot return a dataframe as either multiple sequences are \
#                 supplied or the model predictions are multi-dimensional. Returning Numpy array."
#             )

#     # Remove transform
#     model.reset_transform()
#     return preds

# def get_attributions(
#     model,
#     seqs: Union[pd.DataFrame, np.array, List[str]],
#     genome: Optional[str] = None,
#     prediction_transform: Optional[Callable] = None,
#     device: Union[str, int] = "cpu",
#     method: str = "deepshap",
#     hypothetical: bool = False,
#     n_shuffles: int = 20,
#     seed=None,
#     **kwargs,
# ) -> np.array:
#     """
#     Get per-nucleotide importance scores for sequences using Captum.

#     Args:
#         model: A trained deep learning model
#         seqs: input DNA sequences as genomic intervals, strings, or integer-encoded form.
#         genome: Name of the genome to use if a genomic interval is supplied.
#         prediction_transform: A module to transform the model output
#         devices: Indices of the devices to use for inference
#         method: One of "deepshap", "saliency", "inputxgradient" or "integratedgradients"
#         hypothetical: whether to calculate hypothetical importance scores.
#             Set this to True to obtain input for tf-modisco, False otherwise
#         n_shuffles: Number of times to dinucleotide shuffle sequence
#         seed: Random seed
#         **kwargs: Additional arguments to pass to tangermeme.deep_lift_shap.deep_lift_shap

#     Returns:
#         Per-nucleotide importance scores as numpy array of shape (B, 4, L).
#     """
#     # One-hot encode the input
#     seqs = convert_input_type(seqs, "one_hot", genome=genome, add_batch_axis=True)

#     # Add transform to model
#     model.add_transform(prediction_transform)
#     model = model.eval()

#     # Empty list for the output
#     attributions = []

#     # Check hypothetical
#     if hypothetical:
#         if method != "deepshap":
#             warnings.warn(
#                 "hypothetical = True will be ignored as method is not deepshap."
#             )

#     # Initialize the attributer
#     if method == "deepshap":
#         if isinstance(model.model, EnformerModel) or isinstance(
#             model.model, EnformerPretrainedModel
#         ):
#             raise NotImplementedError(
#                 "DeepShap currently cannot be applied to Enformer models."
#             )
#         else:
#             attributions = deep_lift_shap(
#                 model,
#                 X=seqs,
#                 n_shuffles=n_shuffles,
#                 hypothetical=hypothetical,
#                 device=device,
#                 random_state=seed,
#                 **kwargs,
#             ).numpy(force=True)

#     else:
#         if method == "integratedgradients":
#             attributer = IntegratedGradients(model.to(device))
#         elif method == "inputxgradient":
#             attributer = InputXGradient(model.to(device))
#         elif method == "saliency":
#             attributer = Saliency(model.to(device))
#         else:
#             raise NotImplementedError

#         # Calculate attributions for each sequence
#         with torch.no_grad():
#             for i in range(len(seqs)):
#                 X_ = seqs[i : i + 1].to(device)  # 1, 4, L
#                 attr = attributer.attribute(X_)
#                 attributions.append(attr.cpu().numpy())

#         attributions = np.vstack(attributions)

#     # Remove transform
#     model.reset_transform()
#     return attributions  # N, 4, L

* classes

In [8]:
class KerasBaseline(keras.Model):
    def __init__(self, model, **kwargs):
        super().__init__(**kwargs)
        self.model    = TorchModuleWrapper(model)

    def call(self, x):
        return self.model(x)
    
class KerasEvo(keras.Model):
    def __init__(self, fe_model, cls_model, **kwargs):
        super().__init__(**kwargs)
        self.fe_model    = TorchModuleWrapper(fe_model)
        self.cls_model   = TorchModuleWrapper(cls_model)

    def call(self, x):
        x = self.fe_model(x)
        x = self.cls_model(x)
        return x

# `Main`

In [9]:
SAVE        = True

seed        = 1182024

random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

In [10]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f"\nUsing device: {device}")


Using device: cuda


In [11]:
#-------------------------------------------------------
# Make compatible, the model, with attribution and set to eval mode
# https://discuss.pytorch.org/t/when-should-we-set-torch-backends-cudnn-enabled-to-false-especially-for-lstm/106571/5
#basic_model.eval().to(device)
#basic_model.gru_tower.train()

#-------------------------------------------------------
# from collections import OrderedDict

# # Instantiate the model
# basic_model = BasicModel(params)
# #basic_model.to(device) # Move to device
# #basic_model.eval()     # Set eval mode

# print(f"Loading model from {model_file}\n")

# # Load the state dict from the file
# state_dict = torch.load(model_file, map_location=device)

# # Create a new state dict inserting '.module' after the first component
# new_state_dict = OrderedDict()

# for k, v in state_dict.items():
#     # Find the first dot
#     dot_index = k.find('.')
#     if dot_index != -1:
#         # Split the key into the first part and the rest (including the dot)
#         part1 = k[:dot_index]
#         part2 = k[dot_index:] # Starts with '.'
#         # Construct the new key by inserting '.module'
#         new_key = part1 + '.module' + part2
#         new_state_dict[new_key] = v
#     else:
#         # If there's no dot (e.g., a parameter directly on BasicModel),
#         # it's unclear how to transform it based on the error pattern.
#         # Keep it as is for now, or decide on a rule if such keys exist.
#         # Based on your error, all relevant keys seem to have dots.
#         print(f"Warning: Key '{k}' does not contain '.' and was not modified.")
#         new_state_dict[k] = v # Keep original key if no dot is found


# # Load the corrected state dict
# try:
#     basic_model.load_state_dict(new_state_dict)
#     print("Model loaded successfully!")
# except RuntimeError as e:
#     print(f"Error loading modified state_dict: {e}")
#     print("Double-check if the transformation rule is correct for all keys.")

* create the basic model and load saved weights from training

In [None]:
for target_species in SPECIES:
    for model_name in ["MORALE"]:
        for tf in TFS:

            print(f"\n--- Attributing {model_name} on {tf}, tested on {target_species} ---\n")

            # (1) Define new params set

            params = Params(args = ["Attribtuion", tf, target_species], verbose=False)

            # (2) Load in the model we are calling and their saved weights
            
            print(f"> Loading from saved models files...\n")

            if model_name == "Baseline":
                model_file = get_model_file(model=model_name, params=params)

                baseline_model = BaselineModel(params)
                baseline_model.load_state_dict(torch.load(model_file))
                
                keras_model = KerasBaseline(model=baseline_model).eval().to('cpu')

                print(f"\t>> Model summary:\n")
                print(keras_model.summary())

            else:
                fe_file, cls_file   = get_model_file(model=model_name, params=params)

                feature_extractor = FeatureExtractor(params)
                feature_extractor.load_state_dict(torch.load(fe_file))

                classifier = Classifier(params)
                classifier.load_state_dict(torch.load(cls_file))

                keras_model = KerasEvo(fe_model=feature_extractor, cls_model=classifier).eval().to('cpu')

                print(f"\t>> Model summary:")
                print(keras_model.summary())
                print("\n")

            # (3) Create data
            regions = read_holdout_bed(holdout_type="test", params=params, verbose=False)

            # (4) Compute scores via IG and save
            scores, seqs = get_crested_contributions(
                intervals=regions,
                target_idx=0,
                model=keras_model,
                genome=params.target_species
            )

            # TF-Modisco assumes a length-last format
            if SAVE:
                np.save(f"{ROOT}/plots/crested/{model_name}_{tf}_{target_species}_scores.npy", scores.squeeze())
                np.save(f"{ROOT}/plots/crested/{model_name}_{tf}_{target_species}_seqs.npy", seqs.squeeze())


--- Attributing MORALE on CEBPA, tested on mm10 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m21:39:50[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
  return F.conv1d(
Model: 100%|██████████| 1/1 [10:34<00:00, 634.97s/it]



--- Attributing MORALE on FOXA1, tested on mm10 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m21:51:06[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [11:39<00:00, 699.81s/it]



--- Attributing MORALE on HNF4A, tested on mm10 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m22:03:30[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [10:24<00:00, 624.79s/it]



--- Attributing MORALE on HNF6, tested on mm10 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m22:14:36[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [11:29<00:00, 689.01s/it]



--- Attributing MORALE on CEBPA, tested on rheMac10 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m22:26:53[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [10:24<00:00, 624.73s/it]



--- Attributing MORALE on FOXA1, tested on rheMac10 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 1897 values

> Creating interval regions to use...



[32m22:38:04[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 1897 region(s).
Model: 100%|██████████| 1/1 [09:53<00:00, 593.84s/it]



--- Attributing MORALE on HNF4A, tested on rheMac10 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m22:48:45[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [10:25<00:00, 626.00s/it]



--- Attributing MORALE on HNF6, tested on rheMac10 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m22:59:58[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [10:27<00:00, 627.21s/it]



--- Attributing MORALE on CEBPA, tested on canFam6 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m23:11:09[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [10:25<00:00, 625.83s/it]



--- Attributing MORALE on FOXA1, tested on canFam6 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 1894 values

> Creating interval regions to use...



[32m23:22:16[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 1894 region(s).
Model: 100%|██████████| 1/1 [09:52<00:00, 592.75s/it]



--- Attributing MORALE on HNF4A, tested on canFam6 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m23:32:52[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [10:25<00:00, 625.13s/it]



--- Attributing MORALE on HNF6, tested on canFam6 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m23:44:00[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [10:25<00:00, 625.97s/it]



--- Attributing MORALE on CEBPA, tested on rn7 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m23:55:08[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [10:26<00:00, 626.87s/it]



--- Attributing MORALE on FOXA1, tested on rn7 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m00:06:18[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [10:29<00:00, 629.85s/it]



--- Attributing MORALE on HNF4A, tested on rn7 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m00:17:31[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [10:28<00:00, 628.02s/it]



--- Attributing MORALE on HNF6, tested on rn7 ---

> Loading from saved models files...

	>> Model summary:


None


> Converting sites from the test set so that we can use them to get model attributions.

> Currently we use all sites types to randomly subsample 2000 values

> Creating interval regions to use...



[32m00:28:41[0m [1m|[0m [34mINFO[0m [1m|[0m Calculating contribution scores for 1 class(es) and 2000 region(s).
Model: 100%|██████████| 1/1 [10:26<00:00, 626.29s/it]


-----