# GrimAge

## Index
1. [Instantiate model class](#Instantiate-model-class)
2. [Define clock metadata](#Define-clock-metadata)
3. [Download clock dependencies](#Download-clock-dependencies)
5. [Load features](#Load-features)
6. [Load weights into base model](#Load-weights-into-base-model)
7. [Load reference values](#Load-reference-values)
8. [Load preprocess and postprocess objects](#Load-preprocess-and-postprocess-objects)
10. [Check all clock parameters](#Check-all-clock-parameters)
10. [Basic test](#Basic-test)
11. [Save torch model](#Save-torch-model)
12. [Clear directory](#Clear-directory)

Let's first import some packages:

In [1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
import numpy as np

## Instantiate model class

In [2]:
def print_entire_class(cls):
    source = inspect.getsource(cls)
    print(source)

print_entire_class(pya.models.GrimAge)

class GrimAge(pyagingModel):
    def __init__(self):
        super().__init__()

        self.PACKYRS = None
        self.ADM = None
        self.B2M = None
        self.CystatinC = None
        self.GDF15 = None
        self.Leptin = None
        self.PAI1 = None
        self.TIMP1 = None

        self.features_PACKYRS = None
        self.features_ADM = None
        self.features_B2M = None
        self.features_CystatinC = None
        self.features_GDF15 = None
        self.features_Leptin = None
        self.features_PAI1 = None
        self.features_TIMP1 = None

    def forward(self, x):
        Female = x[:, -2].unsqueeze(1)
        Age = x[:, -1].unsqueeze(1)

        PACKYRS = self.PACKYRS(x[:, self.features_PACKYRS])
        ADM = self.ADM(x[:, self.features_ADM])
        B2M = self.B2M(x[:, self.features_B2M])
        CystatinC = self.CystatinC(x[:, self.features_CystatinC])
        GDF15 = self.GDF15(x[:, self.features_GDF15])
        Leptin = self.Leptin(x[:, self.features

In [3]:
model = pya.models.GrimAge()

## Define clock metadata

In [4]:
model.metadata["clock_name"] = 'grimage'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2019
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "Lu, Ake T., et al. \"DNA methylation GrimAge strongly predicts lifespan and healthspan.\" Aging (albany NY) 11.2 (2019): 303."
model.metadata["doi"] = "https://doi.org/10.18632/aging.101684"
model.metadata["research_only"] = True
model.metadata["notes"] = None

## Download clock dependencies

In [5]:
#download PCClock Rdata file from https://yale.app.box.com/s/kq0b0a7lxckxjvaz7x5n4keaug7tewry
logger = pya.logger.Logger()
urls = [
    "https://pyaging.s3.amazonaws.com/supporting_files/ElasticNet_DNAmProtein_Vars_model4.csv",
    "https://pyaging.s3.amazonaws.com/supporting_files/datMiniAnnotation3_Gold.csv",
]
dir = "."
for url in urls:
    pya.utils.download(url, dir, logger, indent_level=1)

|-----------> Downloading data to ./ElasticNet_DNAmProtein_Vars_model4.csv


|-----------> in progress: 20.8889%

|-----------> in progress: 41.7778%

|-----------> in progress: 62.6667%

|-----------> in progress: 83.5556%

|-----------> in progress: 104.4445%

|-----------> in progress: 100.0000%


|-----------> Downloading data to ./datMiniAnnotation3_Gold.csv


|-----------> in progress: 1.7208%

|-----------> in progress: 3.4415%

|-----------> in progress: 5.1623%

|-----------> in progress: 6.8831%

|-----------> in progress: 8.6038%

|-----------> in progress: 10.3246%

|-----------> in progress: 12.0454%

|-----------> in progress: 13.7662%

|-----------> in progress: 15.4869%

|-----------> in progress: 17.2077%

|-----------> in progress: 18.9285%

|-----------> in progress: 20.6492%

|-----------> in progress: 22.3700%

|-----------> in progress: 24.0908%

|-----------> in progress: 25.8115%

|-----------> in progress: 27.5323%

|-----------> in progress: 29.2531%

|-----------> in progress: 30.9739%

|-----------> in progress: 32.6946%

|-----------> in progress: 34.4154%

|-----------> in progress: 36.1362%

|-----------> in progress: 37.8569%

|-----------> in progress: 39.5777%

|-----------> in progress: 41.2985%

|-----------> in progress: 43.0192%

|-----------> in progress: 44.7400%

|-----------> in progress: 46.4608%

|-----------> in progress: 48.1816%

|-----------> in progress: 49.9023%

|-----------> in progress: 51.6231%

|-----------> in progress: 53.3439%

|-----------> in progress: 55.0646%

|-----------> in progress: 56.7854%

|-----------> in progress: 58.5062%

|-----------> in progress: 60.2269%

|-----------> in progress: 61.9477%

|-----------> in progress: 63.6685%

|-----------> in progress: 65.3893%

|-----------> in progress: 67.1100%

|-----------> in progress: 68.8308%

|-----------> in progress: 70.5516%

|-----------> in progress: 72.2723%

|-----------> in progress: 73.9931%

|-----------> in progress: 75.7139%

|-----------> in progress: 77.4346%

|-----------> in progress: 79.1554%

|-----------> in progress: 80.8762%

|-----------> in progress: 82.5970%

|-----------> in progress: 84.3177%

|-----------> in progress: 86.0385%

|-----------> in progress: 87.7593%

|-----------> in progress: 89.4800%

|-----------> in progress: 91.2008%

|-----------> in progress: 92.9216%

|-----------> in progress: 94.6423%

|-----------> in progress: 96.3631%

|-----------> in progress: 98.0839%

|-----------> in progress: 99.8046%

|-----------> in progress: 100.0000%


## Load features

#### From CSV

In [6]:
df = pd.read_csv('ElasticNet_DNAmProtein_Vars_model4.csv')
model.features = np.unique(df['var']).tolist()[2:] + ['female'] + ['age']

## Load weights into base model

#### Linear model

In [7]:
all_features = np.unique(df['var']).tolist()[2:] + ['Female'] + ['Age']

model.PACKYRS = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmPACKYRS'])))
model.PACKYRS.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmPACKYRS', 'beta'][1:])).unsqueeze(0).float()
model.PACKYRS.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmPACKYRS', 'beta'].iloc[0])).float()
model.features_PACKYRS = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmPACKYRS', 'var']) if item in all_features]).long()

model.ADM = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmadm'])))
model.ADM.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmadm', 'beta'][1:])).unsqueeze(0).float()
model.ADM.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmadm', 'beta'].iloc[0])).float()
model.features_ADM = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmadm', 'var']) if item in all_features]).long()

model.B2M = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmB2M'])))
model.B2M.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmB2M', 'beta'][1:])).unsqueeze(0).float()
model.B2M.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmB2M', 'beta'].iloc[0])).float()
model.features_B2M = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmB2M', 'var']) if item in all_features]).long()

model.CystatinC = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmCystatin_C'])))
model.CystatinC.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmCystatin_C', 'beta'][1:])).unsqueeze(0).float()
model.CystatinC.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmCystatin_C', 'beta'].iloc[0])).float()
model.features_CystatinC = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmCystatin_C', 'var']) if item in all_features]).long()

model.GDF15 = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmGDF_15'])))
model.GDF15.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmGDF_15', 'beta'][1:])).unsqueeze(0).float()
model.GDF15.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmGDF_15', 'beta'].iloc[0])).float()
model.features_GDF15 = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmGDF_15', 'var']) if item in all_features]).long()

model.Leptin = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmleptin'])))
model.Leptin.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmleptin', 'beta'][1:])).unsqueeze(0).float()
model.Leptin.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmleptin', 'beta'].iloc[0])).float()
model.features_Leptin = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmleptin', 'var']) if item in all_features]).long()

model.PAI1 = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmpai_1'])))
model.PAI1.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmpai_1', 'beta'][1:])).unsqueeze(0).float()
model.PAI1.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmpai_1', 'beta'].iloc[0])).float()
model.features_PAI1 = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmpai_1', 'var']) if item in all_features]).long()

model.TIMP1 = pya.models.LinearModel(input_dim=len(np.array(df.loc[df['Y.pred'] == 'DNAmTIMP_1'])))
model.TIMP1.linear.weight.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmTIMP_1', 'beta'][1:])).unsqueeze(0).float()
model.TIMP1.linear.bias.data = torch.tensor(np.array(df.loc[df['Y.pred'] == 'DNAmTIMP_1', 'beta'].iloc[0])).float()
model.features_TIMP1 = indices = torch.tensor([all_features.index(item) for item in np.array(df.loc[df['Y.pred'] == 'DNAmTIMP_1', 'var']) if item in all_features]).long()

#### Linear model

In [8]:
grimage_weights = [
    0.000348777412272004,
    4.59105969389204e-07,
    3.49816671441537e-06,
    0.000143661105491888,
    0.00790270975255529,
    2.55560382039825e-05,
    -7.32066983502079e-06,
    0.0303981613409142,
    0.0300823182194075,
    -0.228468475622039
]

In [9]:
base_model = pya.models.LinearModel(input_dim=len(grimage_weights))

base_model.linear.weight.data = torch.tensor(grimage_weights).unsqueeze(0).float()
base_model.linear.bias.data = torch.tensor([0]).float()

model.base_model = base_model

## Load reference values

In [10]:
reference_df = pd.read_csv('datMiniAnnotation3_Gold.csv', index_col=0)
model.reference_values = reference_df.loc[model.features[:-2]]['gold'].tolist() + [1, 65] # 65 yo F

## Load preprocess and postprocess objects

In [11]:
model.preprocess_name = None
model.preprocess_dependencies = None

In [12]:
model.postprocess_name = 'cox_to_years'
model.postprocess_dependencies = None

## Check all clock parameters

In [13]:
pya.utils.print_model_details(model)


Model Attributes:

training: True
metadata: {'approved_by_author': '⌛',
 'citation': 'Lu, Ake T., et al. "DNA methylation GrimAge strongly predicts '
             'lifespan and healthspan." Aging (albany NY) 11.2 (2019): 303.',
 'clock_name': 'grimage',
 'data_type': 'methylation',
 'doi': 'https://doi.org/10.18632/aging.101684',
 'notes': None,
 'research_only': True,
 'species': 'Homo sapiens',
 'version': None,
 'year': 2019}
reference_values: [0.422480272528644, 0.935109546405548, 0.0162959729801047, 0.502691053893618, 0.910839576323153, 0.710155040209873, 0.479121329208521, 0.905888314944049, 0.279992670790348, 0.117900358329507, 0.940987438881091, 0.761621096809391, 0.0721244934513398, 0.0851830172952001, 0.222068390557704, 0.103705423432714, 0.91516014793103, 0.748331163695382, 0.903928589429489, 0.524090323888757, 0.894685558616447, 0.647988638853782, 0.0581747999131966, 0.830024180811995, 0.209808614636345, 0.324296328128978, 0.118979846374564, 0.545425926051344, 0.9232432449

B2M.linear.bias: tensor(1412953.3750)
CystatinC.linear.weight: [2589.667724609375, -15088.66015625, 36553.1171875, -14194.40234375, 177517.65625, 2264.057861328125, -3639.993896484375, 146477.484375, 11819.2109375, 438.5729064941406, -35147.8515625, -146515.59375, 3482.841796875, 46536.5078125, -19400.65625, 5430.861328125, -2332.132080078125, 28774.947265625, 37130.91796875, 4642.5302734375, 2118.04541015625, 8312.26953125, 1070.0323486328125, 53286.4609375, 6551.3515625, 1233.9503173828125, -2420.194580078125, 3439.35546875, 13203.8330078125, 35212.96875]... [Tensor of shape torch.Size([1, 88])]
CystatinC.linear.bias: tensor(1091528.5000)


GDF15.linear.weight: [9.351357460021973, 84.36457824707031, 143.69606018066406, 81.37864685058594, 29.040103912353516, -112.49447631835938, 11.24372673034668, -118.38355255126953, -1980.7607421875, 12.090482711791992, 381.68292236328125, 20.09428596496582, 79.332275390625, 93.16657257080078, -42.04895782470703, -56.21128463745117, 50.33696746826172, -4.690526962280273, 23.865774154663086, 110.17974090576172, -504.0067138671875, 454.8924255371094, 3.939922571182251, 10.757635116577148, -21.75938606262207, 191.6622314453125, 342.773193359375, -183.74392700195312, 297.9110107421875, 50.03202438354492]... [Tensor of shape torch.Size([1, 138])]
GDF15.linear.bias: tensor(1975.7983)
Leptin.linear.weight: [399.8157043457031, 3861.580810546875, 4281.87353515625, 1714.1119384765625, -25588.5390625, 25643.771484375, -3710.89697265625, 1028.21484375, 4559.81591796875, -9729.9609375, 28351.38671875, 60980.60546875, 894.3289184570312, 2256.300537109375, -997.83447265625, 16759.64453125, 637.7293090

Leptin.linear.bias: tensor(7210.0625)
PAI1.linear.weight: [62.57840347290039, 321.195556640625, -476.72576904296875, 6221.58544921875, 7843.1796875, -313.1407775878906, -3855.91650390625, 3294.65234375, 752.2315673828125, -47.931236267089844, 301.0967712402344, -321.5203552246094, 24.402000427246094, -568.0665893554688, -3272.8876953125, 1760.7930908203125, -6259.56103515625, -10119.8154296875, 2037.0191650390625, 2472.403564453125, -3049.89794921875, 4225.28271484375, 982.7288208007812, -152.87660217285156, -356.9750061035156, -4542.0302734375, 433.330810546875, -169.24246215820312, -2095.550537109375, 311.5205078125]... [Tensor of shape torch.Size([1, 211])]


PAI1.linear.bias: tensor(-1129.6017)
TIMP1.linear.weight: [127.23798370361328, 576.6142578125, -161.49070739746094, -186.5166778564453, 571.6375732421875, 174.81607055664062, 23.66378402709961, -228.55433654785156, 58.980308532714844, 469.25677490234375, 723.093994140625, 1335.6502685546875, 542.5457153320312, 2160.827880859375, 922.79736328125, 7743.75146484375, -1151.7979736328125, -43.27967834472656, 407.7511901855469, -5735.69287109375, -11.83304500579834, -665.969970703125, 340.971923828125, 207.72994995117188, -32.84348678588867, -1965.6759033203125, 253.16822814941406, 23.78565788269043, 3192.898681640625, 67.02117156982422]... [Tensor of shape torch.Size([1, 43])]


TIMP1.linear.bias: tensor(15844.5957)


base_model.linear.weight: tensor([[ 3.4878e-04,  4.5911e-07,  3.4982e-06,  1.4366e-04,  7.9027e-03,
          2.5556e-05, -7.3207e-06,  3.0398e-02,  3.0082e-02, -2.2847e-01]])
base_model.linear.bias: tensor([0.])




## Basic test

In [14]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float).double()
model.eval()
model.to(float)
pred = model(input)
pred

tensor([[ -8.6124],
        [ 66.5366],
        [ 49.7812],
        [-51.8447],
        [ 89.6747],
        [-65.7946],
        [ 89.6295],
        [-60.4899],
        [ 69.0053],
        [-58.3423]], dtype=torch.float64, grad_fn=<AddBackward0>)

## Save torch model

In [15]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")

## Clear directory
<a id="10"></a>

In [16]:
# Function to remove a folder and all its contents
def remove_folder(path):
    try:
        shutil.rmtree(path)
        print(f"Deleted folder: {path}")
    except Exception as e:
        print(f"Error deleting folder {path}: {e}")

# Get a list of all files and folders in the current directory
all_items = os.listdir('.')

# Loop through the items
for item in all_items:
    # Check if it's a file and does not end with .ipynb
    if os.path.isfile(item) and not item.endswith('.ipynb'):
        os.remove(item)
        print(f"Deleted file: {item}")
    # Check if it's a folder
    elif os.path.isdir(item):
        remove_folder(item)

Deleted file: coefficients.xlsx
Deleted file: datMiniAnnotation3_Gold.csv
Deleted file: ElasticNet_DNAmProtein_Vars_model4.csv
