In [1]:
import os
os.environ['CUDA_HOME'] = '/u/local/cuda/12.3'
os.environ['PATH'] = f"/u/local/cuda/12.3/bin:{os.environ['PATH']}"

In [None]:
!pip --version
!conda --version
!conda install -n base conda-libmamba-solver -y
conda config --set solver libmamba

pip 24.2 from /u/home/c/cobeaman/.local/lib/python3.9/site-packages/pip (python 3.9)
conda 23.1.0
done
Solving environment: failed with initial frozen solve. Retrying with flexible solve.
Solving environment: failed with repodata from current_repodata.json, will retry with next repodata source.
done
Solving environment: - 

In [None]:
!pip install \
    --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple \
    "cudf-cu12>=24.8.1,<=24.8.1" "dask-cudf-cu12>=24.8.1,<=24.8.1" \
    "dask-cuda>=24.8.1,<=24.8.1"

In [None]:
pip install torch pandas numpy pyaging

In [2]:
import os
import inspect
import shutil
import json
import torch
import pandas as pd
import pyaging as pya
import numpy as np

In [None]:
# Load your specific methylation data
methylation_data_subset = pd.read_csv('/u/project/ophoff/cobeaman/Tools/DNAmGrimAgeGitHub/input/mymetharray_subset_2458_r_653_c_08092024_082626.csv', index_col=0)
methylation_data_final = pd.read_csv('/u/project/ophoff/cobeaman/Tools/DNAmGrimAgeGitHub/input/mymetharray_final_2458_r_731788_c_08092024_081841.csv', index_col=0)

In [None]:
# Ensure 'Female' column is binary (0 or 1)
methylation_data_final['Female'] = (methylation_data_final['Female'] == 1).astype(int)
methylation_data_subset['Female'] = (methylation_data_subset['Female'] == 1).astype(int)

# Handle any missing data (if necessary)
# methylation_data_final.dropna(inplace=True)
# methylation_data_subset.dropna(inplace=True)

In [None]:
# Convert DataFrames to AnnData objects
adata_final = pya.pp.df_to_adata(methylation_data_final, metadata_cols=['Female', 'age'], imputer_strategy='knn')
adata_subset = pya.pp.df_to_adata(methylation_data_subset, metadata_cols=['Female', 'age'], imputer_strategy='knn')

In [None]:
# Predict age using GrimAge2 for both datasets
pya.pred.predict_age(adata_final, ['GrimAge2'])
pya.pred.predict_age(adata_subset, ['GrimAge2'])

In [None]:
# View the first few rows of predictions
print(adata_final.obs[['GrimAge2']].head())
print(adata_subset.obs[['GrimAge2']].head())

# Save predictions to CSV files
adata_final.obs[['GrimAge2']].to_csv('GrimAge2_predictions_final.csv')
adata_subset.obs[['GrimAge2']].to_csv('GrimAge2_predictions_subset.csv')

In [None]:
model = pya.models.GrimAge2()

In [9]:
model.metadata.update({
    "clock_name": 'grimage2',
    "data_type": 'methylation',
    "species": 'Homo sapiens',
    "year": 2022,
    "approved_by_author": '⌛',
    "citation": "Lu, Ake T., et al. \"DNA methylation GrimAge version 2.\" Aging (Albany NY) 14.23 (2022): 9484.",
    "doi": "https://doi.org/10.18632/aging.204434",
    "research_only": True,
    "notes": None
})

In [None]:
urls = [
    "https://pyaging.s3.amazonaws.com/supporting_files/grimage2_subcomponents.csv",
    "https://pyaging.s3.amazonaws.com/supporting_files/grimage2.csv",
    "https://pyaging.s3.amazonaws.com/supporting_files/datMiniAnnotation3_Gold.csv",
]
dir = "."
logger = pya.logger.Logger()

for url in urls:
    pya.utils.download(url, dir, logger, indent_level=1)


In [None]:
# Load feature sets from CSV files
df = pd.read_csv('grimage2_subcomponents.csv', index_col=0)
df_grimage = pd.read_csv('grimage2.csv', index_col=0)

# Identify features
all_features = np.unique(df['var'])[2:].tolist() + ['Female', 'Age']
model.features = all_features

In [11]:
# Function to load weights for subcomponents
def load_model_weights(y_pred, model_attr):
    indices = torch.tensor([all_features.index(item) for item in df.loc[df['Y.pred'] == y_pred, 'var'] if item in all_features]).long()
    model_layer = pya.models.LinearModel(input_dim=len(indices))
    model_layer.linear.weight.data = torch.tensor(df.loc[df['Y.pred'] == y_pred, 'beta'][1:].values).unsqueeze(0).float()
    model_layer.linear.bias.data = torch.tensor(df.loc[df['Y.pred'] == y_pred, 'beta'].iloc[0]).float()
    setattr(model, model_attr, model_layer)
    setattr(model, f'features_{model_attr}', indices)

# Apply the function to each subcomponent
components = {
    'DNAmPACKYRS': 'PACKYRS', 
    'DNAmadm': 'ADM', 
    'DNAmB2M': 'B2M',
    'DNAmCystatin_C': 'CystatinC', 
    'DNAmGDF_15': 'GDF15',
    'DNAmleptin': 'Leptin',
    'DNAmpai_1': 'PAI1',
    'DNAmTIMP_1': 'TIMP1',
    'DNAmlog.CRP': 'LogCRP',
    'DNAmlog.A1C': 'A1C'
}

for y_pred, model_attr in components.items():
    load_model_weights(y_pred, model_attr)

# Load base model weights
base_model = pya.models.LinearModel(input_dim=len(df_grimage))
base_model.linear.weight.data = torch.tensor(df_grimage['beta'].tolist()).unsqueeze(0).float()
base_model.linear.bias.data = torch.tensor([0]).float()
model.base_model = base_model

In [None]:
reference_df = pd.read_csv('datMiniAnnotation3_Gold.csv', index_col=0)
model.reference_values = reference_df.loc[model.features[:-2]]['gold'].tolist() + [1, 65]  # Example: 65-year-old female

In [None]:
model.preprocess_name = None
model.preprocess_dependencies = None
model.postprocess_name = 'cox_to_years'
model.postprocess_dependencies = None

In [None]:
pya.utils.print_model_details(model)

In [None]:
torch.save(model, f"../weights/{model.metadata['clock_name']}.pt")

In [None]:
# Extract relevant features
input_data_final = methylation_data_final[model.features].values
input_data_subset = methylation_data_subset[model.features].values

# Convert to tensor and run the model
input_tensor_final = torch.tensor(input_data_final, dtype=torch.float32)
model.eval()
model.to(float)
pred = model(input_tensor_final)
print(pred)

# Convert to tensor and run the model
input_tensor_subset = torch.tensor(input_data_subset, dtype=torch.float32)
model.eval()
model.to(float)
pred = model(input_tensor_final)
print(pred)

In [None]:
def remove_folder(path):
    try:
        shutil.rmtree(path)
        print(f"Deleted folder: {path}")
    except Exception as e:
        print(f"Error deleting folder {path}: {e}")

# Get a list of all files and folders in the current directory
all_items = os.listdir('.')        

In [None]:
for item in os.listdir('.'):
    if os.path.isfile(item) and not item.endswith('.ipynb'):
        os.remove(item)
    elif os.path.isdir(item):
        shutil.rmtree(item)