In [1]:
import os
from os.path import join
from itertools import chain
import argparse
import yaml

import pandas as pd
import numpy as np

import sklearn
import sklearn.model_selection
import sklearn.preprocessing
import sklearn.ensemble
import sklearn.manifold
import sklearn.linear_model
import sklearn.svm

import torch
import torch.utils.data

import dredda.model as model

import dredda.data as data
import dredda.test as test
import dredda.model as model
import dredda.train as train
from dredda.cloud_files import remote_files_manifest, remote_files_checksum, download_if_not_exist,local_prefix
from dredda.helpers import seed_all
os.chdir("../")

  from .autonotebook import tqdm as notebook_tqdm


# Reproduce the prediction list with the pretrained model

Specify the parameters

In [2]:
from argparse import Namespace
args=Namespace()
args.ckpt_fp=join(local_prefix, "checkpoint/CRISPRi_LINCS-model-epoch_best-20210320.pt")
args.lincs_level5_fs_table_fp=join(local_prefix, "dataset/CRISPRi_LINCS_processed/level5_ae_expectation_logtrans_fs_1000-368.hdf")
args.source_dataset_name="CRISPRi"
args.target_dataset_name="LINCS"
args.out_dir="train_dir/notebook-test"
test_out_dir = join(args.out_dir, "test")
args.reference_score_fp=None
if os.path.isdir(test_out_dir):
    raise ValueError(f"{test_out_dir} already exists")
else:
    os.makedirs(test_out_dir)
# save args
args_fp = join(test_out_dir, "test--args.yaml")
with open(args_fp, "x") as f:
    yaml.dump(args.__dict__, f)

Load data

In [3]:
level5_fs_df = pd.read_hdf(args.lincs_level5_fs_table_fp)
level5_fs_df_T = level5_fs_df.T

Create model

In [4]:
model_fp = args.ckpt_fp
model_new = model.FCModelDualBranchAE(n_in_features=level5_fs_df_T.shape[1])
model_new.load_state_dict(torch.load(model_fp))

<All keys matched successfully>

Get Prediction

In [5]:
trainer_new = train.DualBranchDATrainer(
    model_new,
    args.source_dataset_name,
    args.target_dataset_name,
    model_new.ae_encoder_t.parameters(),
    chain(
        model_new.ae_encoder_s.parameters(),
        model_new.ae_decoder.parameters(),
        model_new.feature.parameters(),
        model_new.class_classifier.parameters(),
        model_new.domain_classifier.parameters(),
    ),
)

level5_pred_raw = trainer_new.predict(level5_fs_df_T.values, "source", True)[0]

def pred_post_process(pred, feature_df):
    from scipy.special import softmax

    pred_df = pd.DataFrame(
        pred,
        index=feature_df.index,
        columns=["class_%d" % (i) for i in range(pred.shape[1])],
    )
    pred_df["prediction_expected_classes"] = pred_df.loc[
        :, ["class_2", "class_3"]
    ].sum(axis=1)
    pred_df = pred_df.apply(softmax, axis=1)
    return pred_df

level5_pred_df = pred_post_process(level5_pred_raw, level5_fs_df_T)

Evaluate and save prediction

In [6]:
root_dir = "download/dataset"
siginfo_fp = join(root_dir, "LINCS/GSE70138_Broad_LINCS_sig_info_2017-03-06.txt")
exp_results_fp = join(root_dir, "exp_results_single/exp_results_single.csv")
deccode_plurip_fp = join(root_dir, "others/deccode_plurip.csv")

lre = test.PredictionEvaluator(
    full_prediction_df=level5_pred_df,
    features_df=level5_fs_df_T,
    siginfo_fp=siginfo_fp,
    ref_score_fp=args.reference_score_fp,
    exp_results_fp=exp_results_fp,
    deccode_plurip_fp=deccode_plurip_fp,
)

lre.save_prediction(out_dir=test_out_dir)

lre.save_evaluation_score(out_dir=test_out_dir)

{'entropy_ratio': 0.4007786226992918, 'mean_reciprocal_rank_exp_results': 0.009967780499349432, 'nDCG@50_exp_results': 0.0499856902407718, 'nDCG@100_exp_results': 0.07967856834212413, 'nDCG@150_exp_results': 0.10728093149426379, 'nDCG@200_exp_results': 0.10728093149426379, 'mean_reciprocal_rank_deccode': 0.019108043498413763, 'nDCG@50_deccode': 0.10132434387953139, 'nDCG@100_deccode': 0.13341365300561292, 'nDCG@150_deccode': 0.13341365300561292, 'nDCG@200_deccode': 0.18450497091673512}
