# Steps to run the deep learning epigenetic clock

Author: Jose Jaime Martinez Magana

Day: 20 February 2023

This script will run the deep learning epigenetic clocks in python.
Follow this github https://github.com/rsinghlab/AltumAge for a complete description of deep lerning epigenetic clocks.

To run this clocks, the beta matrices should be normalized by BMIQ.

In [None]:
# if you are using slurm job handler you could request resources with the following command
# uncomment the next line for job request
srun --pty --mem=32G -p interactive bash
# if your server uses modules to handle packages and conda/anaconda/miniconda is install load your conda
# uncomment the next line if you have miniconda installed in your server
module load miniconda
# conda activate environmnet
conda activate epigenetic_clocks

In [None]:
# downloading the scaler and AltumAge from github
# change your path to the directory to save the data
cd /vast/palmer/scratch/montalvo-ortiz/jjm262/epigenomics/ewas_saliva_ses/databases/epigenetic_clocks
wget -O AltumAge.h5 https://github.com/rsinghlab/AltumAge/raw/main/example_dependencies/AltumAge.h5
wget -O scaler.pkl https://github.com/rsinghlab/AltumAge/raw/main/example_dependencies/scaler.pkl
wget -O multi_platform_cpgs.pkl https://github.com/rsinghlab/AltumAge/raw/main/example_dependencies/multi_platform_cpgs.pkl

In [None]:
# oppening python
python

# running the deep learning clock
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn import linear_model, preprocessing
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler

# loading scaler from where you dawnload the file
scaler = pd.read_pickle('https://github.com/rsinghlab/AltumAge/blob/main/example_dependencies/scaler.pkl')

# loading AltumAge
AltumAge = tf.keras.models.load_model('/vast/palmer/scratch/montalvo-ortiz/jjm262/epigenomics/ewas_saliva_ses/databases/epigenetic_clocks/AltumAge.h5')

# loading cpgs sites
cpgs = np.array(pd.read_pickle('/vast/palmer/scratch/montalvo-ortiz/jjm262/epigenomics/ewas_saliva_ses/databases/epigenetic_clocks/multi_platform_cpgs.pkl'))

# uploading the beta matrix that we previous generate for glint
data = pd.read_table("/vast/palmer/scratch/montalvo-ortiz/jjm262/epigenomics/ewas_saliva_ses/databases/qced/glint/qced_data_v02152023_datafile_beta.txt")

# if you use the beta matrix transpose the data
# AltumAge requieres cpg as columns and samples as rows
data_transposed = data.transpose()

# sussetting to AltumAge cpgs
data_transposed_filtered = data_transposed.filter(cpgs)

# creating a NA pandas dataframe with the AltumAge header
# this pandas dataframe will be all NaN values
cpgs_df = pd.DataFrame(columns=cpgs, index=data_transposed.index)

# merge the dataframes
data_merged = pd.merge(data_transposed_filtered, cpgs_df, how='inner', left_index=True,
                       right_index=True, suffixes=('', '_remove'))
# remove duplicate columns
data_merged.drop([i for i in data_merged.columns if 'remove' in i],
                 axis=1, inplace=True)
# detecting missing values
print(data_merged.isnull().sum())

# define scaler to set values between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
data_merged_scal = pd.DataFrame(scaler.fit_transform(data_merged), columns = data_merged.columns, index=data_merged.index)
# first imputation with SimpleImputer
# imputting missing values
data_merged_imp = pd.DataFrame(SimpleImputer(strategy='constant').fit_transform(data_merged_scal), columns=data_merged_scal.columns, index=data_merged.index)

# we should revise the prediction accuracy of AltumAge because we are using imputation to fill all missing columns with cero

# running AltumAge
pred_age_AltumAge = AltumAge.predict(data_merged_imp).flatten()
# making pandas dataframe
pred_age_AltumAge_df = pd.DataFrame(pred_age_AltumAge, index=data_merged_imp.index)
# add column name to the data frame
pred_age_AltumAge_df.columns=["AltumAge"]

# saving output
pred_age_AltumAge_df.to_csv("/vast/palmer/scratch/montalvo-ortiz/jjm262/epigenomics/ewas_saliva_ses/results/epigenetic_age/altumage.csv")

# exit python
exit()