In [1]:
import torch
import pandas as pd
import numpy as np
import pyaging as pya
import joblib
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import ARDRegression, ElasticNet
import os

In [2]:
os.system("git clone https://github.com/rsinghlab/HistoneClocks.git")

Cloning into 'HistoneClocks'...


0

In [3]:
histone = 'H3K4me1'

feature_selector_path = 'HistoneClocks/results/models/' + histone + '_feature_selector.pkl'
feature_selector = joblib.load(feature_selector_path)

dim_reduction_path = 'HistoneClocks/results/models/' + histone + '_dim_reduction.pkl'
dim_reduction = joblib.load(dim_reduction_path)

ard_model_path = 'HistoneClocks/results/models/' + histone + '_model.pkl'
ard_model = joblib.load(ard_model_path)

genes = pd.read_csv('HistoneClocks/metadata/Ensembl-105-EnsDb-for-Homo-sapiens-genes.csv')
chromosomes = ['1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '3', '4', '5', '6', '7', '8', '9', 'X']
genes = genes[genes['chr'].apply(lambda x: x in chromosomes)]
genes.index = genes.gene_id
features = genes.gene_id[np.abs(feature_selector.coef_) > 0].tolist()

In [4]:
model_weights = torch.tensor(ard_model.coef_).float()
intercept = torch.tensor([ard_model.intercept_]).float()
rotation = torch.tensor(dim_reduction.components_.T).float()

In [5]:
num_features = rotation.shape[0]
num_components = rotation.shape[1]

# Initialize the model
model = pya.models.PCARDModel(input_dim=num_features, pc_dim=num_components)

model.rotation.data = rotation
model.linear.weight.data = model_weights.unsqueeze(0)
model.linear.bias.data = intercept

In [6]:
weights_dict = {
    'preprocessing': None, 
    'preprocessing_helper': None,
    'postprocessing': None,
    'postprocessing_helper': None,
    'features': features,
    'weight_dict': model.state_dict(),
}

metadata_dict = {
    'species': 'Homo sapiens',
    'data_type': 'histone_mark',
    'year': 2023,
    'citation': "de Lima Camillo, Lucas Paulo, et al. \"Histone mark age of human tissues and cells.\" bioRxiv (2023): 2023-08.",
    'doi': "https://doi.org/10.1101/2023.08.21.554165",
    "notes": "This is still a preprint, so the model might change",
}

In [7]:
torch.save(weights_dict, '../weights/h3k4me1.pt')
torch.save(metadata_dict, '../metadata/h3k4me1.pt')

In [8]:
os.system("rm -r HistoneClocks")

0