# Stubbs

## Index
1. [Instantiate model class](#Instantiate-model-class)
2. [Define clock metadata](#Define-clock-metadata)
3. [Download clock dependencies](#Download-clock-dependencies)
5. [Load features](#Load-features)
6. [Load weights into base model](#Load-weights-into-base-model)
7. [Load reference values](#Load-reference-values)
8. [Load preprocess and postprocess objects](#Load-preprocess-and-postprocess-objects)
10. [Check all clock parameters](#Check-all-clock-parameters)
10. [Basic test](#Basic-test)
11. [Save torch model](#Save-torch-model)
12. [Clear directory](#Clear-directory)

Let's first import some packages:

In [1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya

## Instantiate model class

In [2]:
def print_entire_class(cls):
    source = inspect.getsource(cls)
    print(source)

print_entire_class(pya.models.Stubbs)

class Stubbs(pyagingModel):
    def __init__(self):
        super().__init__()

    def preprocess(self, x):
        """
        Apply quantile normalization on x using gold standard means
        and then scale with the means and standard deviation.
        """

        gold_standard_means = torch.tensor(
            self.reference_values, device=x.device, dtype=x.dtype
        )

        # Ensure gold_standard_means is a 1D tensor and sorted
        sorted_gold_standard = torch.sort(gold_standard_means)[0]

        # Pre-compute the quantile indices
        quantile_indices = torch.linspace(
            0, len(sorted_gold_standard) - 1, steps=x.size(1)
        ).long()

        # Prepare a tensor to hold normalized data
        normalized_data = torch.empty_like(x, device=x.device, dtype=x.dtype)

        for i in range(x.size(0)):
            sorted_indices = torch.argsort(x[i, :])
            normalized_data[i, sorted_indices] = sorted_gold_standard[quantile_indices]

        gold_

In [3]:
model = pya.models.Stubbs()

## Define clock metadata

In [4]:
model.metadata["clock_name"] = 'stubbs'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Mus musculus'
model.metadata["year"] = 2017
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "Stubbs, Thomas M., et al. \"Multi-tissue DNA methylation age predictor in mouse.\" Genome biology 18 (2017): 1-14."
model.metadata["doi"] = "https://doi.org/10.1186/s13059-017-1203-5"
model.metadata["research_only"] = None
model.metadata["notes"] = None

## Download clock dependencies

#### Download directly with curl

In [5]:
supplementary_url = "https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvNDA2NzUvZWxpZmUtNDA2NzUtc3VwcDMtdjIueGxzeA--/elife-40675-supp3-v2.xlsx?_hash=qzOMc4yUFACfDFG%2FlgxkFTHWt%2BSXSmP9zz1BM3oOTRM%3D"
supplementary_file_name = "coefficients.xlsx"
os.system(f"curl -o {supplementary_file_name} {supplementary_url}")

0

#### Download GitHub repository

In [6]:
github_url = "https://github.com/EpigenomeClock/MouseEpigeneticClock.git"
github_folder_name = github_url.split('/')[-1].split('.')[0]
os.system(f"git clone {github_url}")

0

## Load features

In [7]:
df = pd.read_excel('coefficients.xlsx', sheet_name='Young age multi-tissue', nrows=329)
df['feature'] = df['Chromosome'].astype(str) + ':' + df['Position'].astype(int).astype(str)
df['coefficient'] = df['Weight']

reference_feature_values_df = pd.read_table('MouseEpigeneticClock/TrainingMatrix/TrainingData_Babraham_Reizel_Cannon.txt', index_col=0)
reference_feature_values_df.index = ['chr' + index for index in reference_feature_values_df.index]
reference_feature_values_df = reference_feature_values_df.T

model.features = reference_feature_values_df.columns.tolist()
model.base_model_features = df['feature'].tolist()

## Load weights into base model

In [8]:
weights = torch.tensor(df['coefficient'].tolist()).unsqueeze(0)
intercept = torch.tensor([0.0])

#### Linear model

In [9]:
base_model = pya.models.LinearModel(input_dim=len(model.features))

base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()

model.base_model = base_model

## Load reference values

In [10]:
model.reference_values = reference_feature_values_df.mean().tolist()

## Load preprocess and postprocess objects

In [11]:
model.preprocess_name = 'quantile_normalization_and_scale_with_gold_standard'
gold_standard_stds = reference_feature_values_df.std().tolist()
indices = [model.features.index(item) for item in model.base_model_features]
model.preprocess_dependencies = [gold_standard_stds, indices]

In [12]:
model.postprocess_name = 'stubbs'
model.postprocess_dependencies = None

## Check all clock parameters

In [13]:
pya.utils.print_model_details(model)


Model Attributes:

training: True
metadata: {'approved_by_author': '⌛',
 'citation': 'Stubbs, Thomas M., et al. "Multi-tissue DNA methylation age '
             'predictor in mouse." Genome biology 18 (2017): 1-14.',
 'clock_name': 'stubbs',
 'data_type': 'methylation',
 'doi': 'https://doi.org/10.1186/s13059-017-1203-5',
 'notes': None,
 'research_only': None,
 'species': 'Mus musculus',
 'version': None,
 'year': 2017}
reference_values: [0.009419308625383253, 0.9454021653384829, 0.9262220310728511, 0.01115736080094597, 0.005828992981096152, 0.015486742789977479, 0.10632124338173232, 0.008327731508385066, 0.9470653205543639, 0.7441407980821007, 0.9443701038672639, 0.7338555884793152, 0.7296056309787987, 0.9211735929020528, 0.012150415029205755, 0.01607368220739321, 0.011272227879684186, 0.012648110923722247, 0.17279958253325498, 0.606354333653314, 0.612341969307046, 0.0016412203744236714, 0.0016069770681773007, 0.0009744614615922172, 0.0031840144988144236, 0.0010236321557703402, 0.00

## Basic test

In [14]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred

tensor([[ 1.1474],
        [-0.2128],
        [-0.1608],
        [ 2.1481],
        [-0.2222],
        [ 0.8489],
        [ 3.7938],
        [ 0.6316],
        [-0.1613],
        [ 2.0362]], dtype=torch.float64, grad_fn=<MulBackward0>)

## Save torch model

In [15]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")

## Clear directory
<a id="10"></a>

In [16]:
# Function to remove a folder and all its contents
def remove_folder(path):
    try:
        shutil.rmtree(path)
        print(f"Deleted folder: {path}")
    except Exception as e:
        print(f"Error deleting folder {path}: {e}")

# Get a list of all files and folders in the current directory
all_items = os.listdir('.')

# Loop through the items
for item in all_items:
    # Check if it's a file and does not end with .ipynb
    if os.path.isfile(item) and not item.endswith('.ipynb'):
        os.remove(item)
        print(f"Deleted file: {item}")
    # Check if it's a folder
    elif os.path.isdir(item):
        remove_folder(item)

Deleted file: coefficients.xlsx
Deleted folder: MouseEpigeneticClock
