# PCGrimAge

## Index
1. [Instantiate model class](#Instantiate-model-class)
2. [Define clock metadata](#Define-clock-metadata)
3. [Download clock dependencies](#Download-clock-dependencies)
5. [Load features](#Load-features)
6. [Load weights into base model](#Load-weights-into-base-model)
7. [Load reference values](#Load-reference-values)
8. [Load preprocess and postprocess objects](#Load-preprocess-and-postprocess-objects)
10. [Check all clock parameters](#Check-all-clock-parameters)
10. [Basic test](#Basic-test)
11. [Save torch model](#Save-torch-model)
12. [Clear directory](#Clear-directory)

Let's first import some packages:

In [1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya

## Instantiate model class

In [2]:
def print_entire_class(cls):
    source = inspect.getsource(cls)
    print(source)

print_entire_class(pya.models.PCGrimAge)

class PCGrimAge(pyagingModel):
    def __init__(self):
        super().__init__()

        self.center = nn.Parameter(torch.empty(78464), requires_grad=False)
        self.rotation = nn.Parameter(torch.empty((78464, 1933)), requires_grad=False)

        self.PCPACKYRS = None
        self.PCADM = None
        self.PCB2M = None
        self.PCCystatinC = None
        self.PCGDF15 = None
        self.PCLeptin = None
        self.PCPAI1 = None
        self.PCTIMP1 = None

        self.features_PCPACKYRS = None
        self.features_PCADM = None
        self.features_PCB2M = None
        self.features_PCCystatinC = None
        self.features_PCGDF15 = None
        self.features_PCLeptin = None
        self.features_PCPAI1 = None
        self.features_PCTIMP1 = None

    def forward(self, x):
        CpGs = x[:, :-2]
        Female = x[:, -2].unsqueeze(1)
        Age = x[:, -1].unsqueeze(1)

        CpGs = CpGs - self.center  # Apply centering
        PCs = torch.mm(CpGs, self.rotation)  # A

In [3]:
model = pya.models.PCGrimAge()

## Define clock metadata

In [4]:
model.metadata["clock_name"] = 'pcgrimage'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2022
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "Higgins-Chen, Albert T., et al. \"A computational solution for bolstering reliability of epigenetic clocks: Implications for clinical trials and longitudinal tracking.\" Nature aging 2.7 (2022): 644-661."
model.metadata["doi"] = "https://doi.org/10.1038/s43587-022-00248-2"
model.metadata["research_only"] = None
model.metadata["notes"] = None

## Download clock dependencies

In [5]:
#download PCClock Rdata file from https://yale.app.box.com/s/kq0b0a7lxckxjvaz7x5n4keaug7tewry
logger = pya.logger.Logger()
url = "https://pyaging.s3.amazonaws.com/supporting_files/CalcAllPCClocks.RData"
dir = "."
pya.utils.download(url, dir, logger, indent_level=1)

|-----------> Downloading data to ./CalcAllPCClocks.RData


#### Download from R package

In [6]:
%%writefile download.r

library(dplyr)
library(tibble)
library(tidyr)
library(jsonlite)

load(file = "CalcAllPCClocks.RData")

print(ls(all.names = TRUE))

CalcPCGrimAge$rotation.names = colnames(CalcPCGrimAge$rotation)

CalcPCGrimAge$PCPACKYRS.model.names = names(CalcPCGrimAge$PCPACKYRS.model)
CalcPCGrimAge$PCADM.model.names = names(CalcPCGrimAge$PCADM.model)
CalcPCGrimAge$PCB2M.model.names = names(CalcPCGrimAge$PCB2M.model)
CalcPCGrimAge$PCCystatinC.model.names = names(CalcPCGrimAge$PCCystatinC.model)
CalcPCGrimAge$PCGDF15.model.names = names(CalcPCGrimAge$PCGDF15.model)
CalcPCGrimAge$PCLeptin.model.names = names(CalcPCGrimAge$PCLeptin.model)
CalcPCGrimAge$PCPAI1.model.names = names(CalcPCGrimAge$PCPAI1.model)
CalcPCGrimAge$PCTIMP1.model.names = names(CalcPCGrimAge$PCTIMP1.model)

write_json(CalcPCGrimAge, "CalcPCGrimAge.json", digits = 9)
write_json(CpGs, "PCGrimAgeCpGs.json")
write_json(imputeMissingCpGs, "PCGrimAgeReferenceCpGBetas.json", digits = 10)

Writing download.r


In [None]:
os.system("Rscript download.r")

256

## Load features

#### From JSON file

In [8]:
with open('PCGrimAgeCpGs.json', 'r') as f:
    features = json.load(f)
model.features = features + ['female'] + ['age']

FileNotFoundError: [Errno 2] No such file or directory: 'PCGrimAgeCpGs.json'

## Load weights into base model

#### From JSON file

In [9]:
with open('CalcPCGrimAge.json', 'r') as f:
    weights_dict = json.load(f)

#### PC component

In [10]:
model.center.data = torch.tensor(weights_dict['center']).float()
model.rotation.data = torch.tensor(weights_dict['rotation']).float()

#### Linear model

In [11]:
all_features = weights_dict['rotation.names'] + ['Female'] + ['Age']

model.PCPACKYRS = pya.models.LinearModel(input_dim=len(weights_dict['PCPACKYRS.model.names']))
model.PCPACKYRS.linear.weight.data = torch.tensor(weights_dict['PCPACKYRS.model']).unsqueeze(0).float()
model.PCPACKYRS.linear.bias.data = torch.tensor(weights_dict['PCPACKYRS.intercept']).float()
model.features_PCPACKYRS = indices = torch.tensor([all_features.index(item) for item in weights_dict['PCPACKYRS.model.names'] if item in all_features]).long()

model.PCADM = pya.models.LinearModel(input_dim=len(weights_dict['PCADM.model.names']))
model.PCADM.linear.weight.data = torch.tensor(weights_dict['PCADM.model']).unsqueeze(0).float()
model.PCADM.linear.bias.data = torch.tensor(weights_dict['PCADM.intercept']).float()
model.features_PCADM = indices = torch.tensor([all_features.index(item) for item in weights_dict['PCADM.model.names'] if item in all_features]).long()

model.PCB2M = pya.models.LinearModel(input_dim=len(weights_dict['PCB2M.model.names']))
model.PCB2M.linear.weight.data = torch.tensor(weights_dict['PCB2M.model']).unsqueeze(0).float()
model.PCB2M.linear.bias.data = torch.tensor(weights_dict['PCB2M.intercept']).float()
model.features_PCB2M = indices = torch.tensor([all_features.index(item) for item in weights_dict['PCB2M.model.names'] if item in all_features]).long()

model.PCCystatinC = pya.models.LinearModel(input_dim=len(weights_dict['PCCystatinC.model.names']))
model.PCCystatinC.linear.weight.data = torch.tensor(weights_dict['PCCystatinC.model']).unsqueeze(0).float()
model.PCCystatinC.linear.bias.data = torch.tensor(weights_dict['PCCystatinC.intercept']).float()
model.features_PCCystatinC = indices = torch.tensor([all_features.index(item) for item in weights_dict['PCCystatinC.model.names'] if item in all_features]).long()

model.PCGDF15 = pya.models.LinearModel(input_dim=len(weights_dict['PCGDF15.model.names']))
model.PCGDF15.linear.weight.data = torch.tensor(weights_dict['PCGDF15.model']).unsqueeze(0).float()
model.PCGDF15.linear.bias.data = torch.tensor(weights_dict['PCGDF15.intercept']).float()
model.features_PCGDF15 = indices = torch.tensor([all_features.index(item) for item in weights_dict['PCGDF15.model.names'] if item in all_features]).long()

model.PCLeptin = pya.models.LinearModel(input_dim=len(weights_dict['PCLeptin.model.names']))
model.PCLeptin.linear.weight.data = torch.tensor(weights_dict['PCLeptin.model']).unsqueeze(0).float()
model.PCLeptin.linear.bias.data = torch.tensor(weights_dict['PCLeptin.intercept']).float()
model.features_PCLeptin = indices = torch.tensor([all_features.index(item) for item in weights_dict['PCLeptin.model.names'] if item in all_features]).long()

model.PCPAI1 = pya.models.LinearModel(input_dim=len(weights_dict['PCPAI1.model.names']))
model.PCPAI1.linear.weight.data = torch.tensor(weights_dict['PCPAI1.model']).unsqueeze(0).float()
model.PCPAI1.linear.bias.data = torch.tensor(weights_dict['PCPAI1.intercept']).float()
model.features_PCPAI1 = indices = torch.tensor([all_features.index(item) for item in weights_dict['PCPAI1.model.names'] if item in all_features]).long()

model.PCTIMP1 = pya.models.LinearModel(input_dim=len(weights_dict['PCTIMP1.model.names']))
model.PCTIMP1.linear.weight.data = torch.tensor(weights_dict['PCTIMP1.model']).unsqueeze(0).float()
model.PCTIMP1.linear.bias.data = torch.tensor(weights_dict['PCTIMP1.intercept']).float()
model.features_PCTIMP1 = indices = torch.tensor([all_features.index(item) for item in weights_dict['PCTIMP1.model.names'] if item in all_features]).long()

#### Linear model

In [12]:
base_model = pya.models.LinearModel(input_dim=len(weights_dict['components']))

base_model.linear.weight.data = torch.tensor(weights_dict['PCGrimAge.model']).unsqueeze(0).float()
base_model.linear.bias.data = torch.tensor(weights_dict['PCGrimAge.intercept']).float()

model.base_model = base_model

In [13]:
weights_dict['components']

['PCPACKYRS',
 'PCADM',
 'PCB2M',
 'PCCystatinC',
 'PCGDF15',
 'PCLeptin',
 'PCPAI1',
 'PCTIMP1',
 'Age',
 'Female']

## Load reference values

#### From JSON file

In [14]:
with open('PCGrimAgeReferenceCpGBetas.json', 'r') as f:
    reference_feature_values = json.load(f)
model.reference_values = reference_feature_values + [1, 65] # 65yo F

## Load preprocess and postprocess objects

In [15]:
model.preprocess_name = None
model.preprocess_dependencies = None

In [16]:
model.postprocess_name = None
model.postprocess_dependencies = None

## Check all clock parameters

In [17]:
pya.utils.print_model_details(model)


Model Attributes:

training: True
metadata: {'approved_by_author': '⌛',
 'citation': 'Higgins-Chen, Albert T., et al. "A computational solution for '
             'bolstering reliability of epigenetic clocks: Implications for '
             'clinical trials and longitudinal tracking." Nature aging 2.7 '
             '(2022): 644-661.',
 'clock_name': 'pcgrimage',
 'data_type': 'methylation',
 'doi': 'https://doi.org/10.1038/s43587-022-00248-2',
 'notes': None,
 'research_only': None,
 'species': 'Homo sapiens',
 'version': None,
 'year': 2022}
reference_values: [0.82635363384, 0.18898814441, 0.72938889209, 0.8680421375, 0.090353927561, 0.0066895021761, 0.48924643338, 0.87262052546, 0.87955373232, 0.04847264273, 0.0093070979947, 0.16393676218, 0.058440936082, 0.18857484916, 0.58239394253, 0.86564960457, 0.58457176982, 0.82903550669, 0.065646928047, 0.8500055061, 0.79155429878, 0.83499889314, 0.7754384128, 0.0039641831799, 0.50570339787, 0.60547040884, 0.29093154314, 0.88154845595, 0.46

## Basic test

In [18]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float).float()
model.eval()
model.to(float)
pred = model(input)
pred

tensor([[34.0448],
        [37.0862],
        [45.8467],
        [39.5590],
        [21.7459],
        [23.2970],
        [35.3788],
        [39.7534],
        [45.1109],
        [31.1977]], dtype=torch.float64, grad_fn=<AddmmBackward0>)

## Save torch model

In [19]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")

## Clear directory
<a id="10"></a>

In [None]:
# Function to remove a folder and all its contents
def remove_folder(path):
    try:
        shutil.rmtree(path)
        print(f"Deleted folder: {path}")
    except Exception as e:
        print(f"Error deleting folder {path}: {e}")

# Get a list of all files and folders in the current directory
all_items = os.listdir('.')

# Loop through the items
for item in all_items:
    # Check if it's a file and does not end with .ipynb
    if os.path.isfile(item) and not item.endswith('.ipynb'):
        os.remove(item)
        print(f"Deleted file: {item}")
    # Check if it's a folder
    elif os.path.isdir(item):
        remove_folder(item)

Deleted file: CalcAllPCClocks.RData
Deleted file: download.r
