In [1]:
import torch
import pandas as pd
import pyaging as pya
import json
import os

In [2]:
#download PCClock Rdata file from https://yale.app.box.com/s/kq0b0a7lxckxjvaz7x5n4keaug7tewry

In [3]:
%%writefile pcclocks.r

library(dplyr)
library(tibble)
library(tidyr)
library(jsonlite)

load(file = "CalcAllPCClocks.RData")

print(ls(all.names = TRUE))

CalcPCGrimAge$rotation.names = colnames(CalcPCGrimAge$rotation)

CalcPCGrimAge$PCPACKYRS.model.names = names(CalcPCGrimAge$PCPACKYRS.model)
CalcPCGrimAge$PCADM.model.names = names(CalcPCGrimAge$PCADM.model)
CalcPCGrimAge$PCB2M.model.names = names(CalcPCGrimAge$PCB2M.model)
CalcPCGrimAge$PCCystatinC.model.names = names(CalcPCGrimAge$PCCystatinC.model)
CalcPCGrimAge$PCGDF15.model.names = names(CalcPCGrimAge$PCGDF15.model)
CalcPCGrimAge$PCLeptin.model.names = names(CalcPCGrimAge$PCLeptin.model)
CalcPCGrimAge$PCPAI1.model.names = names(CalcPCGrimAge$PCPAI1.model)
CalcPCGrimAge$PCTIMP1.model.names = names(CalcPCGrimAge$PCTIMP1.model)

write_json(CalcPCGrimAge, "CalcPCGrimAge.json")
write_json(CpGs, "CpGs.json")

Writing pcclocks.r


In [4]:
os.system("Rscript pcclocks.r")
os.system("rm pcclocks.r")


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



[1] "anti.trafo"        "CalcPCDNAmTL"      "CalcPCGrimAge"    
[4] "CalcPCHannum"      "CalcPCHorvath1"    "CalcPCHorvath2"   
[7] "CalcPCPhenoAge"    "CpGs"              "imputeMissingCpGs"


0

In [5]:
with open('CpGs.json', 'r') as f:
    features = json.load(f)
features = features + ['female'] + ['age']

with open('CalcPCGrimAge.json', 'r') as f:
    weights_dict = json.load(f)

center = torch.tensor(weights_dict['center']).float()
rotation = torch.tensor(weights_dict['rotation']).float()

In [6]:
num_features = rotation.shape[0]
num_components = rotation.shape[1]
comp_dims = [len(weights_dict[f'{comp}.model']) for comp in weights_dict['components'][:-2]]

all_features = weights_dict['rotation.names'] + ['Female'] + ['Age']

# Initialize the model
model = pya.models.PCGrimAge(input_dim=num_features, pc_dim=num_components, comp_dims=comp_dims)

model.center.data = center
model.rotation.data = rotation

for i, component in zip(range(len(model.step1_layers)), weights_dict['components']):
    
    model_weights = torch.tensor(weights_dict[f'{component}.model']).float()
    intercept = torch.tensor(weights_dict[f'{component}.intercept']).float()
    model.step1_layers[i].weight.data = model_weights.unsqueeze(0)
    model.step1_layers[i].bias.data = intercept
    
    indices = torch.tensor([all_features.index(item) for item in weights_dict[f'{component}.model.names'] if item in all_features]).long()
    model.step1_features[i].data = indices

model_weights = torch.tensor(weights_dict['PCGrimAge.model']).float()
intercept = torch.tensor(weights_dict['PCGrimAge.intercept']).float()
model.step2.weight.data = model_weights.unsqueeze(0)
model.step2.bias.data = intercept

In [7]:
weights_dict = {
    'preprocessing': None, 
    'preprocessing_helper': None,
    'postprocessing': None,
    'postprocessing_helper': None,
    'features': features,
    'weight_dict': model.state_dict(),
}

metadata_dict = {
    'species': 'Homo sapiens',
    'data_type': 'methylation',
    'year': 2022,
    'citation': "Higgins-Chen, Albert T., et al. \"A computational solution for bolstering reliability of epigenetic clocks: Implications for clinical trials and longitudinal tracking.\" Nature aging 2.7 (2022): 644-661.",
    'doi': "https://doi.org/10.1038/s43587-022-00248-2",
    "notes": None,
}

In [8]:
torch.save(weights_dict, '../weights/pcgrimage.pt')
torch.save(metadata_dict, '../metadata/pcgrimage.pt')

In [9]:
os.system("rm CalcPCGrimAge.json")
os.system("rm CpGs.json")

0