In [1]:
import anndata as ad
from mpmath import zeros

In [2]:
de_test_pred = ad.read_h5ad("../../data/neurips-2023-data/de_test_pred.h5ad")

In [14]:
preds = ad.read_h5ad("../../data/perturbench_data/output/output_mean_across_celltypes.h5ad")

In [23]:
def anndata_to_dataframe(adata, layer_name="clipped_sign_log10_pval"):
  import pandas as pd

  metadata_cols = ['cell_type', 'sm_name', 'sm_lincs_id', 'SMILES', 'split', 'control']
  metadata = adata.obs[metadata_cols].copy()

  # turn all category columns to string
  for col in metadata.select_dtypes(include=["category"]).columns:
      metadata[col] = metadata[col].astype(str)

  data = pd.DataFrame(
    adata.layers[layer_name],
    columns=adata.var_names,
    index=adata.obs.index
  )

  return pd.concat([metadata, data], axis=1).reset_index(drop=True)

In [44]:
import pandas as pd
import anndata as ad

## VIASH START
par = {
  "de_test_h5ad": "/Users/arturszalata/PycharmProjects/task-dge-perturbation-prediction-analysis/data/neurips-2023-data/de_test_pred.h5ad",
  "layer": "clipped_sign_log10_pval",
  "id_map": "/Users/arturszalata/PycharmProjects/task-dge-perturbation-prediction-analysis/data/neurips-2023-data/id_map.csv",
  "output": "/Users/arturszalata/PycharmProjects/task-dge-perturbation-prediction-analysis/data/perturbench_data/output/latent_additive_output.h5ad",
}
meta = {
    "functionality_name": "latent_additive",
}
## VIASH END

# Load de_test_h5ad instead of de_train_h5ad
de_test_h5ad = ad.read_h5ad(par["de_test_h5ad"])
id_map = pd.read_csv(par["id_map"])
gene_names = list(de_test_h5ad.var_names)

In [45]:
# Convert de_test_h5ad to a DataFrame using the specified layer
de_test = anndata_to_dataframe(de_test_h5ad, par["layer"])

In [46]:
de_test["combined_key"] = de_test["cell_type"].astype(str) + "_" + de_test["sm_name"].astype(str)
id_map["combined_key"] = id_map["cell_type"].astype(str) + "_" + id_map["sm_name"].astype(str)

# Filter rows in de_test based on matching combined keys in id_map
filtered_data = de_test.set_index("combined_key").loc[id_map["combined_key"]]

# Ensure the filtered data aligns with id_map and reset index
filtered_data = filtered_data.reset_index(drop=True)

In [47]:
output = ad.AnnData(
    layers={
        "prediction": filtered_data[gene_names].values
    },
    obs=pd.DataFrame(index=id_map["id"]),
    var=pd.DataFrame(index=gene_names),
    uns={
      "dataset_id": de_test_h5ad.uns["dataset_id"],
      "method_id": meta["functionality_name"]
    }
)

# Save the output to an h5ad file with gzip compression
output.write_h5ad(par["output"], compression="gzip")



In [51]:
mean_scores = ad.read_h5ad("../../data/perturbench_data/output/output_scores_mean.h5ad")
la_scores = ad.read_h5ad("../../data/perturbench_data/output/output_scores_latent_additive.h5ad")



In [54]:
zeros_scores = ad.read_h5ad("../../data/perturbench_data/output/output_scores_zeros.h5ad")



In [55]:
zeros_scores.uns

{'dataset_id': 'neurips-2023-data',
 'method_id': 'zeros',
 'metric_ids': array(['mean_rowwise_rmse', 'mean_rowwise_mae'], dtype=object),
 'metric_values': array([0.91792511, 0.63508651])}

In [52]:
mean_scores.uns

{'dataset_id': 'neurips-2023-data',
 'method_id': 'mean_across_celltypes',
 'metric_ids': array(['mean_rowwise_rmse', 'mean_rowwise_mae'], dtype=object),
 'metric_values': array([0.89247379, 0.64371648])}

In [53]:
la_scores.uns

{'dataset_id': 'neurips-2023-data',
 'method_id': 'latent_additive',
 'metric_ids': array(['mean_rowwise_rmse', 'mean_rowwise_mae'], dtype=object),
 'metric_values': array([1.16177953, 0.82232145])}