In [2]:
import os
import sys
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
# os.environ['CUDA_HOME'] = '/u/local/cuda/12.3'
# os.environ['PATH'] = f"/u/local/cuda/12.3/bin:{os.environ['PATH']}"
from IPython.display import display, HTML
# adjust width % as desired
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
# import os
# from datetime import datetime
# # import pydot
# # import graphviz
# # import pipdeptree

# # !pipdeptree
# # !pip freeze > requirements.txt
# # Step 1: Create the requirements directory and filenames
# # os.makedirs("requirements", exist_ok=True)
# current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
# filename_basic = f"requirements_{current_time}.txt"
# filename_detailed = f"requirements-detailed_{current_time}.txt"
# filename_svg = f"dependencies_{current_time}.svg"
# filepath_basic = os.path.join("requirements", filename_basic)
# filepath_detailed = os.path.join("requirements", filename_detailed)
# filepath_svg = os.path.join("requirements", filename_svg)

# # Step 2: Run the commands and save the output to files without printing to the cell
# !pipdeptree 2>/dev/null | grep -E '^\w+' > {filepath_basic} 2>/dev/null
# !pipdeptree --freeze > {filepath_detailed} 2> /dev/null
# !pipdeptree --graph-output dot 2> /dev/null | dot -Tsvg -o {filepath_svg} 2> /dev/null

In [3]:
import inspect
import shutil
import json
import cupy as cp
#import cudf
#import dask_cudf
#from dask_cuda import LocalCUDACluster
#import dask
#from dask.distributed import Client, wait
import pyarrow.feather as fth
#import dask.dataframe as dd
#import torch
import pickle
import pandas as pd
import pyaging as pya
from pygam import LinearGAM, LogisticGAM, s
import numpy as np
import scipy.sparse as sp
from scipy import stats, sparse
import seaborn as sns
import matplotlib.pyplot as plt
from numba import jit, prange
import matplotlib.gridspec as gridspec
from mepylome import Manifest
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.gam.api import GLMGam, BSplines
from statsmodels.stats.anova import anova_lm
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.experimental import enable_iterative_imputer  # Enable experimental features first
from sklearn.impute import KNNImputer, IterativeImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from patsy import dmatrix
#import tensorflow as tf
#tf.get_logger().setLevel('ERROR')
#from tensorflow.keras.layers import Input, Dense
#from tensorflow.keras.models import Model
from scipy.stats import pearsonr

In [4]:
model = pya.models.GrimAge2()

In [5]:
model.metadata.update({
    "clock_name": 'grimage2',
    "data_type": 'methylation',
    "species": 'Homo sapiens',
    "year": 2022,
    "approved_by_author": '⌛',
    "citation": "Lu, Ake T., et al. \"DNA methylation GrimAge version 2.\" Aging (Albany NY) 14.23 (2022): 9484.",
    "doi": "https://doi.org/10.18632/aging.204434",
    "research_only": True,
    "notes": None
})

In [6]:
urls = [
    "https://pyaging.s3.amazonaws.com/supporting_files/grimage2_subcomponents.csv",
    "https://pyaging.s3.amazonaws.com/supporting_files/grimage2.csv",
    "https://pyaging.s3.amazonaws.com/supporting_files/datMiniAnnotation3_Gold.csv",
]
dir = "."
logger = pya.logger.Logger()

for url in urls:
    pya.utils.download(url, dir, logger, indent_level=1)

|-----------> Data found in ./grimage2_subcomponents.csv
|-----------> Data found in ./grimage2.csv
|-----------> Data found in ./datMiniAnnotation3_Gold.csv


In [None]:
# Load feature sets from CSV files
df = pd.read_csv('grimage2_subcomponents.csv', index_col=0)
df_grimage = pd.read_csv('grimage2.csv', index_col=0)

# Identify features
all_features = np.unique(df['var'])[2:].tolist() + ['Female', 'Age']
model.features = all_features

In [None]:
# Function to load weights for subcomponents
def load_model_weights(y_pred, model_attr):
    indices = torch.tensor([all_features.index(item) for item in df.loc[df['Y.pred'] == y_pred, 'var'] if item in all_features]).long()
    model_layer = pya.models.LinearModel(input_dim=len(indices))
    model_layer.linear.weight.data = torch.tensor(df.loc[df['Y.pred'] == y_pred, 'beta'][1:].values).unsqueeze(0).float()
    model_layer.linear.bias.data = torch.tensor(df.loc[df['Y.pred'] == y_pred, 'beta'].iloc[0]).float()
    setattr(model, model_attr, model_layer)
    setattr(model, f'features_{model_attr}', indices)

# Apply the function to each subcomponent
components = {
    'DNAmPACKYRS': 'PACKYRS', 
    'DNAmadm': 'ADM', 
    'DNAmB2M': 'B2M',
    'DNAmCystatin_C': 'CystatinC', 
    'DNAmGDF_15': 'GDF15',
    'DNAmleptin': 'Leptin',
    'DNAmpai_1': 'PAI1',
    'DNAmTIMP_1': 'TIMP1',
    'DNAmlog.CRP': 'LogCRP',
    'DNAmlog.A1C': 'A1C'
}

for y_pred, model_attr in components.items():
    load_model_weights(y_pred, model_attr)

# Load base model weights
base_model = pya.models.LinearModel(input_dim=len(df_grimage))
base_model.linear.weight.data = torch.tensor(df_grimage['beta'].tolist()).unsqueeze(0).float()
base_model.linear.bias.data = torch.tensor([0]).float()
model.base_model = base_model

In [None]:
reference_df = pd.read_csv('datMiniAnnotation3_Gold.csv', index_col=0)
model.reference_values = reference_df.loc[model.features[:-2]]['gold'].tolist() + [1, 65]  # Example: 65-year-old female

In [None]:
# Define the directory path
weights_dir = "../weights"

# Create the directory if it doesn't exist
os.makedirs(weights_dir, exist_ok=True)

# Save the model
torch.save(model, os.path.join(weights_dir, f"{model.metadata['clock_name']}.pt"))

In [None]:
# print(methylation_data_subset_pd.columns)
missing_features = [feature for feature in model.features if feature not in methylation_data_subset_pd.columns]
# print("Missing features:", missing_features)
missing_features

In [None]:
# Extract relevant features
# Adjust the model's features list to include only those present in the data
available_features = [feature for feature in model.features if feature in methylation_data_subset_pd.columns]
input_data_subset = methylation_data_subset_pd[available_features].values

# # Convert to tensor and run the model
# input_tensor_final = torch.tensor(input_data_final, dtype=torch.float32)
# model.eval()
# model.to(float)
# pred = model(input_tensor_final)
# print(pred)

# Convert to tensor and run the model
input_tensor_subset = torch.tensor(input_data_subset, dtype=torch.float32)
model.eval()
model.to(float)
pred = model(input_tensor_subset)
print(pred)

In [None]:
def remove_folder(path):
    try:
        shutil.rmtree(path)
        print(f"Deleted folder: {path}")
    except Exception as e:
        print(f"Error deleting folder {path}: {e}")

# Get a list of all files and folders in the current directory
all_items = os.listdir('.')        

In [None]:
for item in os.listdir('.'):
    if os.path.isfile(item) and not item.endswith('.ipynb'):
        os.remove(item)
    elif os.path.isdir(item):
        shutil.rmtree(item)