In [2]:
import pickle
from pathlib import Path

import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
PAIRS_DIR = "."
NODEWISE_DIR = "2_outputs/nodewise/"
PAIRWISE_DIR = "2_outputs/pairwise/"
PAIRWISE_PROCESSED_DIR = "./3_outputs/pairwise_processed/"
NODEWISE_PROCESSED_DIR = "./3_outputs/nodewise_processed/"

## Prepare Pairwise Feature Files

In [3]:
def reshape_pairwise_feature(
    pairs_file,
    feature_file,
    out_file,
):
    with open(pairs_file, "rb") as f:
        pairs = pickle.load(f)

    p0s, p0_counts = np.unique([p[0] for p in pairs], return_counts=True)
    p1s, p1_counts = np.unique([p[1] for p in pairs], return_counts=True)

    val_df = pd.read_csv(feature_file)

    feature_df = pd.DataFrame(
        val_df.values.reshape((len(p0s), len(p1s))), columns=p1s, index=p0s
    )
    feature_df.to_csv(out_file)


def reshape_pairwise_feature2(
    pairs_file,
    feature_file,
    out_file,
):
    with open(pairs_file, "rb") as f:
        pairs = pickle.load(f)

    p0s = np.unique([p[0] for p in pairs])
    p1s = pairs[0][1]

    feature_df = pd.read_csv(feature_file, header=None)

    feature_df.index = p0s
    feature_df.columns = p1s
    feature_df.to_csv(out_file)

In [4]:
feature_input_dir = Path(NODEWISE_DIR)
out_dir = Path(NODEWISE_PROCESSED_DIR)
out_dir.mkdir(exist_ok=True, parents=True)

feature_input_dir = Path(NODEWISE_DIR)
feature_files = list(feature_input_dir.glob(f"*.csv"))

for feature_file in tqdm(feature_files):
    m_df = pd.read_csv(feature_file)
    m_df.to_csv(out_dir / feature_file.name)

100%|██████████| 9/9 [00:08<00:00,  1.11it/s]


In [5]:
feature_input_dir = Path(PAIRWISE_DIR)
pairs_dir = Path(PAIRS_DIR)
out_dir = Path(PAIRWISE_PROCESSED_DIR)
out_dir.mkdir(exist_ok=True, parents=True)

for suffix in ("_disease", "_pathway"):
    feature_files = list(feature_input_dir.glob(f"*{suffix}.csv"))

    for feature_file in tqdm(feature_files):
        if feature_file.name.startswith("personalized_pagerank"):
            fun = reshape_pairwise_feature2
            pairs_file = pairs_dir / f"input_pairs_obl{suffix}.pkl"
        else:
            fun = reshape_pairwise_feature
            pairs_file = pairs_dir / f"pairs_obl{suffix}.pkl"
        out_file = out_dir / feature_file.name
        fun(
            pairs_file=pairs_file,
            feature_file=feature_file,
            out_file=out_file,
        )

100%|██████████| 8/8 [02:02<00:00, 15.35s/it]
100%|██████████| 8/8 [09:03<00:00, 67.92s/it]
