# DNAmFitAge

## Index
1. [Instantiate model class](#Instantiate-model-class)
2. [Define clock metadata](#Define-clock-metadata)
3. [Download clock dependencies](#Download-clock-dependencies)
5. [Load features](#Load-features)
6. [Load weights into base model](#Load-weights-into-base-model)
7. [Load reference values](#Load-reference-values)
8. [Load preprocess and postprocess objects](#Load-preprocess-and-postprocess-objects)
10. [Check all clock parameters](#Check-all-clock-parameters)
10. [Basic test](#Basic-test)
11. [Save torch model](#Save-torch-model)
12. [Clear directory](#Clear-directory)

Let's first import some packages:

In [1]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import numpy as np
import pyaging as pya

## Instantiate model class

In [2]:
def print_entire_class(cls):
    source = inspect.getsource(cls)
    print(source)

print_entire_class(pya.models.DNAmFitAge)

class DNAmFitAge(pyagingModel):
    def __init__(self):
        super().__init__()

        self.GaitF = None
        self.GripF = None
        self.GaitM = None
        self.GripM = None
        self.VO2Max = None

        self.features_GaitF = None
        self.features_GripF = None
        self.features_GaitM = None
        self.features_GripM = None
        self.features_VO2Max = None

    def forward(self, x):
        Female = x[:, -3]  # .unsqueeze(1)
        Age = x[:, -2]  # .unsqueeze(1)
        GrimAge = x[:, -1].unsqueeze(1)

        is_female = Female == 1
        is_male = Female == 0

        x_f = x[is_female]
        x_m = x[is_male]

        GaitF = self.GaitF(x_f[:, self.features_GaitF])
        GripF = self.GripF(x_f[:, self.features_GripF])
        VO2MaxF = self.VO2Max(x_f[:, self.features_VO2Max])
        GrimAgeF = GrimAge[is_female, :]

        GaitM = self.GaitM(x_m[:, self.features_GaitM])
        GripM = self.GripM(x_m[:, self.features_GripM])
        VO2MaxM

In [3]:
model = pya.models.DNAmFitAge()

## Define clock metadata

In [4]:
model.metadata["clock_name"] = 'dnamfitage'
model.metadata["data_type"] = 'methylation'
model.metadata["species"] = 'Homo sapiens'
model.metadata["year"] = 2023
model.metadata["approved_by_author"] = '⌛'
model.metadata["citation"] = "McGreevy, Kristen M., et al. \"DNAmFitAge: biological age indicator incorporating physical fitness.\" Aging (Albany NY) 15.10 (2023): 3904."
model.metadata["doi"] = 'https://doi.org/10.18632/aging.204538'
model.metadata["research_only"] = None
model.metadata["notes"] = 'Reference values is mean between male and female training medians'

## Download clock dependencies

#### Download GitHub repository

In [5]:
github_url = "https://github.com/kristenmcgreevy/DNAmFitAge.git"
github_folder_name = github_url.split('/')[-1].split('.')[0]
os.system(f"git clone {github_url}")

0

#### Download from R package

In [6]:
%%writefile download.r

options(repos = c(CRAN = "https://cloud.r-project.org/"))
library(jsonlite)

DNAmFitnessModels <- readRDS("DNAmFitAge/DNAmFitnessModelsandFitAge_Oct2022.rds")

AllCpGs <- DNAmFitnessModels$AllCpGs
write_json(AllCpGs, "AllCpGs.json")

MaleMedians <- DNAmFitnessModels$Male_Medians_All
write.csv(MaleMedians, "MaleMedians.csv")
FemaleMedians <- DNAmFitnessModels$Female_Medians_All
write.csv(FemaleMedians, "FemaleMedians.csv")

Gait_noAge_Females <- DNAmFitnessModels$Gait_noAge_Females
Gait_noAge_Males <- DNAmFitnessModels$Gait_noAge_Males
Grip_noAge_Females <- DNAmFitnessModels$Grip_noAge_Females
Grip_noAge_Males <- DNAmFitnessModels$Grip_noAge_Males
VO2maxModel <- DNAmFitnessModels$VO2maxModel
write.csv(Gait_noAge_Females, "Gait_noAge_Females.csv")
write.csv(Gait_noAge_Males, "Gait_noAge_Males.csv")
write.csv(Grip_noAge_Females, "Grip_noAge_Females.csv")
write.csv(Grip_noAge_Males, "Grip_noAge_Males.csv")
write.csv(VO2maxModel, "VO2maxModel.csv")

Writing download.r


In [7]:
os.system("Rscript download.r")

0

## Load features

#### From JSON file

In [8]:
with open('AllCpGs.json', 'r') as f:
    features_list = json.load(f)
model.features = features_list + ['female'] + ['age'] + ['grimage']

## Load weights into base model

#### From CSV file

In [9]:
gaitf_df = pd.read_csv('Gait_noAge_Females.csv', index_col=0)
gaitm_df = pd.read_csv('Gait_noAge_Males.csv', index_col=0)
gripf_df = pd.read_csv('Grip_noAge_Females.csv', index_col=0)
gripm_df = pd.read_csv('Grip_noAge_Males.csv', index_col=0)
vo2max_df = pd.read_csv('VO2maxModel.csv', index_col=0)

#### Linear model

In [10]:
all_features = features_list + ['Female'] + ['Age'] + ['GrimAge']

model.GaitF = pya.models.LinearModel(input_dim=len(gaitf_df))
model.GaitF.linear.weight.data = torch.tensor(np.array(gaitf_df['estimate'][1:])).unsqueeze(0).float()
model.GaitF.linear.bias.data = torch.tensor(np.array(gaitf_df['estimate'].iloc[0])).float()
model.features_GaitF = torch.tensor([all_features.index(item) for item in np.array(gaitf_df['term'][1:]) if item in all_features]).long()

model.GaitM = pya.models.LinearModel(input_dim=len(gaitm_df))
model.GaitM.linear.weight.data = torch.tensor(np.array(gaitm_df['estimate'][1:])).unsqueeze(0).float()
model.GaitM.linear.bias.data = torch.tensor(np.array(gaitm_df['estimate'].iloc[0])).float()
model.features_GaitM = torch.tensor([all_features.index(item) for item in np.array(gaitm_df['term'][1:]) if item in all_features]).long()

model.GripF = pya.models.LinearModel(input_dim=len(gripf_df))
model.GripF.linear.weight.data = torch.tensor(np.array(gripf_df['estimate'][1:])).unsqueeze(0).float()
model.GripF.linear.bias.data = torch.tensor(np.array(gripf_df['estimate'].iloc[0])).float()
model.features_GripF = torch.tensor([all_features.index(item) for item in np.array(gripf_df['term'][1:]) if item in all_features]).long()

model.GaitM = pya.models.LinearModel(input_dim=len(gaitm_df))
model.GaitM.linear.weight.data = torch.tensor(np.array(gaitm_df['estimate'][1:])).unsqueeze(0).float()
model.GaitM.linear.bias.data = torch.tensor(np.array(gaitm_df['estimate'].iloc[0])).float()
model.features_GaitM = torch.tensor([all_features.index(item) for item in np.array(gaitm_df['term'][1:]) if item in all_features]).long()

model.GripM = pya.models.LinearModel(input_dim=len(gripm_df))
model.GripM.linear.weight.data = torch.tensor(np.array(gripm_df['estimate'][1:])).unsqueeze(0).float()
model.GripM.linear.bias.data = torch.tensor(np.array(gripm_df['estimate'].iloc[0])).float()
model.features_GripM = torch.tensor([all_features.index(item) for item in np.array(gripm_df['term'][1:]) if item in all_features]).long()

model.VO2Max = pya.models.LinearModel(input_dim=len(vo2max_df))
model.VO2Max.linear.weight.data = torch.tensor(np.array(vo2max_df['estimate'][1:])).unsqueeze(0).float()
model.VO2Max.linear.bias.data = torch.tensor(np.array(vo2max_df['estimate'].iloc[0])).float()
model.features_VO2Max = torch.tensor([all_features.index(item) for item in np.array(vo2max_df['term'][1:]) if item in all_features]).long()

#### Linear model

In [11]:
base_model_m = pya.models.LinearModel(input_dim=4)

base_model_m.linear.weight.data = torch.tensor(np.array([0.1390346, 0.1787371, 0.1593873, 0.5228411])).unsqueeze(0).float()
base_model_m.linear.bias.data = torch.tensor(np.array([0.0])).float()

model.base_model_m = base_model_m

base_model_f = pya.models.LinearModel(input_dim=4)

base_model_f.linear.weight.data = torch.tensor(np.array([0.1044232, 0.1742083, 0.2278776, 0.4934908])).unsqueeze(0).float()
base_model_f.linear.bias.data = torch.tensor(np.array([0.0])).float()

model.base_model_f = base_model_f

## Load reference values

#### From CSV file

In [12]:
reference_df_f = pd.read_csv('FemaleMedians.csv', index_col=0)
reference_f = reference_df_f.loc[1, model.features[:-3]]
reference_df_m = pd.read_csv('MaleMedians.csv', index_col=0)
reference_m = reference_df_m.loc[1, model.features[:-3]]
reference = (reference_f + reference_m)/2
model.reference_values =  list(reference) + [1] + [65] + [65] #65yo F with 65GrimAge

## Load preprocess and postprocess objects

In [13]:
model.preprocess_name = None
model.preprocess_dependencies = None

In [14]:
model.postprocess_name = None
model.postprocess_dependencies = None

## Check all clock parameters

In [15]:
pya.utils.print_model_details(model)


Model Attributes:

training: True
metadata: {'approved_by_author': '⌛',
 'citation': 'McGreevy, Kristen M., et al. "DNAmFitAge: biological age '
             'indicator incorporating physical fitness." Aging (Albany NY) '
             '15.10 (2023): 3904.',
 'clock_name': 'dnamfitage',
 'data_type': 'methylation',
 'doi': 'https://doi.org/10.18632/aging.204538',
 'notes': 'Reference values is mean between male and female training medians',
 'research_only': None,
 'species': 'Homo sapiens',
 'version': None,
 'year': 2023}
reference_values: [0.521913219528255, 0.28125954210819004, 0.927523008548927, 0.01455467410155745, 0.041014116925727054, 0.12647568639954998, 0.7148617994059816, 0.6786637301838809, 0.909376031310397, 0.1136806555747305, 0.45398237911395245, 0.0544492346267719, 0.7738429377348031, 0.8480746411296824, 0.7667083937960659, 0.0159858833215953, 0.7183128068669931, 0.06813828137044395, 0.939547714031041, 0.8290646522154059, 0.01727972597475225, 0.0697125677059708, 0.36662

## Basic test

In [16]:
torch.manual_seed(42)
input = torch.randn(10, len(model.features), dtype=float)
model.eval()
model.to(float)
pred = model(input)
pred

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]], dtype=torch.float64, grad_fn=<IndexPutBackward0>)

## Save torch model

In [17]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")

## Clear directory
<a id="10"></a>

In [18]:
# Function to remove a folder and all its contents
def remove_folder(path):
    try:
        shutil.rmtree(path)
        print(f"Deleted folder: {path}")
    except Exception as e:
        print(f"Error deleting folder {path}: {e}")

# Get a list of all files and folders in the current directory
all_items = os.listdir('.')

# Loop through the items
for item in all_items:
    # Check if it's a file and does not end with .ipynb
    if os.path.isfile(item) and not item.endswith('.ipynb'):
        os.remove(item)
        print(f"Deleted file: {item}")
    # Check if it's a folder
    elif os.path.isdir(item):
        remove_folder(item)

Deleted file: Grip_noAge_Females.csv
Deleted file: Grip_noAge_Males.csv
Deleted file: Gait_noAge_Females.csv
Deleted file: VO2maxModel.csv
Deleted file: AllCpGs.json
Deleted file: Gait_noAge_Males.csv
Deleted folder: DNAmFitAge
Deleted file: download.r
Deleted file: FemaleMedians.csv
Deleted file: MaleMedians.csv
