# Han

## Index
1. [Instantiate model class](#Instantiate-model-class)
2. [Define clock metadata](#Define-clock-metadata)
3. [Download clock dependencies](#Download-clock-dependencies)
5. [Load features](#Load-features)
6. [Load weights into base model](#Load-weights-into-base-model)
7. [Load reference values](#Load-reference-values)
8. [Load preprocess and postprocess objects](#Load-preprocess-and-postprocess-objects)
10. [Check all clock parameters](#Check-all-clock-parameters)
10. [Basic test](#Basic-test)
11. [Save torch model](#Save-torch-model)
12. [Clear directory](#Clear-directory)

Let's first import some packages:

In [1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya

## Instantiate model class

In [2]:
def print_entire_class(cls):
    source = inspect.getsource(cls)
    print(source)

print_entire_class(pya.models.Han)

class Han(pyagingModel):
    def __init__(self):
        super().__init__()

    def preprocess(self, x):
        return x

    def postprocess(self, x):
        """
        Applies an anti-logarithmic linear transformation to a PyTorch tensor.
        """
        adult_age = 20

        # Create a mask for negative and non-negative values
        mask_negative = x < 0
        mask_non_negative = ~mask_negative

        # Initialize the result tensor
        age_tensor = torch.empty_like(x)

        # Exponential transformation for negative values
        age_tensor[mask_negative] = (1 + adult_age) * torch.exp(x[mask_negative]) - 1

        # Linear transformation for non-negative values
        age_tensor[mask_non_negative] = (1 + adult_age) * x[mask_non_negative] + adult_age

        return age_tensor



In [3]:
model = pya.models.Han()

## Define clock metadata

In [4]:
model.metadata["clock_name"] = 'han'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2020
model.metadata["approved_by_author"] = '✅'
model.metadata["citation"] = "Han, Yang, et al. \"New targeted approaches for epigenetic age predictions.\" BMC biology 18 (2020): 1-15."
model.metadata["doi"] = "https://doi.org/10.1186/s12915-020-00807-2"
model.metadata["research_only"] = None
model.metadata["notes"] = None

## Download clock dependencies

In [5]:
# from authors
cpg_sites = [
    "(Intercept)",
    'cg19283806',
    'cg11807280',
    'cg00329615',
    'cg22454769',
    'cg16867657',
    'cg22796704',
    'cg09809672',
    'cg18618815',
    'cg25533247',
    'cg02286081',
    'cg20222376',
    'cg19344626',
    'cg07082267',
    'cg15845821',
    'cg11741201',
    'cg16054275',
    'cg18933331',
    'cg20249566',
    'cg16604658',
    'cg07583137',
    'cg16008966',
    'cg14556683',
    'cg03746976',
    'cg14314729',
    'cg03431918',
    'cg22156456',
    'cg23078123',
    'cg09748749',
    'cg17457912',
    'cg06492796',
    'cg17593342',
    'cg05308819',
    'cg22512670',
    'cg01820962',
    'cg06639320',
    'cg03224418',
    'cg17436656',
    'cg19500607',
    'cg03735592',
    'cg20669012',
    'cg19761273',
    'cg07080372',
    'cg03638795',
    'cg19722847',
    'cg24711336',
    'cg26935102',
    'cg10221746',
    'cg02085953',
    'cg04604946',
    'cg08558886',
    'cg22361181',
    'cg04208403',
    'cg12623930',
    'cg21572722',
    'cg17885226',
    'cg00748589',
    'cg13033938',
    'cg19784428',
    'cg22016779',
    'cg01974375',
    'cg25256723',
    'cg24724428',
    'cg07547549',
    'cg25410668',
    'cg21296230'
]

coefficients = [
    0.711184864,
    -0.588354066,
    -0.212038592,
    0.014351188,
    0.051285529,
    2.152191741,
    -0.689940565,
    -0.643729974,
    -0.772516118,
    0.116662569,
    -0.233409678,
    0.002802259,
    -0.062172432,
    -0.224027294,
    1.535209377,
    0.344367661,
    0.188826525,
    -0.409150014,
    -0.776065004,
    0.500336643,
    0.06125005,
    -0.391624093,
    0.100449175,
    0.02000403,
    0.266044453,
    -0.259829677,
    0.254063071,
    -0.726178338,
    -1.141947121,
    -0.06322441,
    -0.196926134,
    0.85613244,
    -0.887977059,
    -0.334654336,
    -0.854110638,
    1.916122401,
    0.92208575,
    -0.070665617,
    0.524707402,
    0.319375235,
    0.376055859,
    0.033361038,
    -1.458360975,
    -0.267930475,
    -0.590085273,
    0.642506165,
    0.470352872,
    0.273581649,
    -0.637989789,
    -1.109388991,
    -0.16886654,
    0.662451226,
    -0.091891613,
    0.086290028,
    -0.426089316,
    0.32615363,
    2.535639458,
    -3.626802894,
    0.097619541,
    -0.427604263,
    -0.41418774,
    -0.27412342,
    0.703772384,
    -0.110027226,
    0.283649813,
    0.928585964
]

## Load features

In [6]:
df = pd.DataFrame({
    'feature': cpg_sites,
    'coefficient': coefficients
})
model.features = df['feature'][1:].tolist()

## Load weights into base model

In [7]:
weights = torch.tensor(df['coefficient'][1:].tolist()).unsqueeze(0)
intercept = torch.tensor([df['coefficient'][0]])

#### Linear model

In [8]:
base_model = pya.models.LinearModel(input_dim=len(model.features))

base_model.linear.weight.data = weights.float()
base_model.linear.bias.data = intercept.float()

model.base_model = base_model

## Load reference values

In [9]:
model.reference_values = None

## Load preprocess and postprocess objects

In [10]:
model.preprocess_name = None
model.preprocess_dependencies = None

In [11]:
model.postprocess_name = 'anti_log_linear'
model.postprocess_dependencies = None

## Check all clock parameters

In [12]:
pya.utils.print_model_details(model)


Model Attributes:

training: True
metadata: {'approved_by_author': '✅',
 'citation': 'Han, Yang, et al. "New targeted approaches for epigenetic age '
             'predictions." BMC biology 18 (2020): 1-15.',
 'clock_name': 'han',
 'data_type': 'methylation',
 'doi': 'https://doi.org/10.1186/s12915-020-00807-2',
 'notes': None,
 'research_only': None,
 'species': 'Homo sapiens',
 'version': None,
 'year': 2020}
reference_values: None
preprocess_name: None
preprocess_dependencies: None
postprocess_name: 'anti_log_linear'
postprocess_dependencies: None
features: ['cg19283806', 'cg11807280', 'cg00329615', 'cg22454769', 'cg16867657', 'cg22796704', 'cg09809672', 'cg18618815', 'cg25533247', 'cg02286081', 'cg20222376', 'cg19344626', 'cg07082267', 'cg15845821', 'cg11741201', 'cg16054275', 'cg18933331', 'cg20249566', 'cg16604658', 'cg07583137', 'cg16008966', 'cg14556683', 'cg03746976', 'cg14314729', 'cg03431918', 'cg22156456', 'cg23078123', 'cg09748749', 'cg17457912', 'cg06492796']... [Total e

## Basic test

In [13]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred

tensor([[ -1.0000],
        [  2.2557],
        [310.9586],
        [  4.2231],
        [ -0.7209],
        [161.4707],
        [ -0.7927],
        [557.5405],
        [  0.8421],
        [ -0.9992]], dtype=torch.float64, grad_fn=<IndexPutBackward0>)

## Save torch model

In [14]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")

## Clear directory
<a id="10"></a>

In [15]:
# Function to remove a folder and all its contents
def remove_folder(path):
    try:
        shutil.rmtree(path)
        print(f"Deleted folder: {path}")
    except Exception as e:
        print(f"Error deleting folder {path}: {e}")

# Get a list of all files and folders in the current directory
all_items = os.listdir('.')

# Loop through the items
for item in all_items:
    # Check if it's a file and does not end with .ipynb
    if os.path.isfile(item) and not item.endswith('.ipynb'):
        os.remove(item)
        print(f"Deleted file: {item}")
    # Check if it's a folder
    elif os.path.isdir(item):
        remove_folder(item)