Memory performance comparison (single-core)
----

In this notebook, we produce Suppl. Tab. 3 displaying the single-core memory performance differences
between CellRank and Palantir on 100k cells.

# Preliminaries

## Dependencies

1. Please consult the [analysis_files/README.md](analysis_files/README.md) on how to run the memory performance benchmarks.

## Import packages

In [1]:
# import standard packages
from pathlib import Path
import pickle
import os
import sys

import numpy as np
import pandas as pd

## Set up paths

In [2]:
sys.path.insert(0, "../..")  # this depends on the notebook depth and must be adapted per notebook

from paths import DATA_DIR, FIG_DIR

## Set global parameters

In [3]:
root =  DATA_DIR / 'benchmarking' / 'memory_analysis_1_core'

palantir_path = root / "palantir"
cellrank_path = root / "gpcca"

## Load the data

In [4]:
res = {'CellRank (fate probs.)': [], 'CellRank (initial/terminal)': [], 'Palantir (fate probs.)': []}

for fname in os.listdir(palantir_path):
    with open(palantir_path / fname, 'rb') as fin:
        data = pickle.load(fin)
    res['Palantir (fate probs.)'].append(max(data) / 1024)
    
for fname in os.listdir(cellrank_path):
    if not fname.endswith(".pickle"):
        continue
    with open(cellrank_path / fname, 'rb') as fin:
        data = pickle.load(fin)
    # add macrostates and kernel memory together
    res['CellRank (initial/terminal)'].append(max((max(data['macro_mem']), max(data['kernel_mem']))) / 1024)
    res['CellRank (fate probs.)'].append(max(data['ap_mem']) / 1024)

### Clean the index

In [5]:
df = pd.DataFrame(res)
df.index = np.arange(1, 11)
df.index.name = 'subset'
df.round(2)

Unnamed: 0_level_0,CellRank (fate probs.),CellRank (initial/terminal),Palantir (fate probs.)
subset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,14.3,12.82,79.8
2,14.19,12.79,78.21
3,14.27,12.81,80.37
4,14.16,12.81,89.37
5,14.16,12.8,80.48
6,14.27,12.83,79.34
7,14.24,12.78,80.13
8,14.25,12.77,80.55
9,14.17,12.78,88.45
10,14.16,12.82,88.33


# Generate the table

## Calculate mean and standard deviation across the splits

In [6]:
tall_df = df.melt(value_vars=df.columns, var_name='algorithm', value_name='memory')

mean = tall_df.groupby('algorithm').mean().T
mean.index.name = 'size'
mean.columns = [f"{c} mean" for c in mean.columns]

std = tall_df.groupby('algorithm').std().T
std.index.name = 'size'
std.columns = [f"{c} std" for c in std.columns]

stats = pd.concat([mean, std], axis=1)
stats.index = [100_000]
stats.index.name = '#cells (thousands)'
stats = stats.round(2)
stats

Unnamed: 0_level_0,CellRank (fate probs.) mean,CellRank (initial/terminal) mean,Palantir (fate probs.) mean,CellRank (fate probs.) std,CellRank (initial/terminal) std,Palantir (fate probs.) std
#cells (thousands),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
100000,14.22,12.8,82.5,0.05,0.02,4.35


## Reorder the dataframe

In [7]:
order = ['CellRank (initial/terminal)', 'CellRank (fate probs.)', 'Palantir (fate probs.)']

In [8]:
stats = stats[[f"{c} {s}" for c in order for s in ('mean', 'std')]]
stats

Unnamed: 0_level_0,CellRank (initial/terminal) mean,CellRank (initial/terminal) std,CellRank (fate probs.) mean,CellRank (fate probs.) std,Palantir (fate probs.) mean,Palantir (fate probs.) std
#cells (thousands),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
100000,12.8,0.02,14.22,0.05,82.5,4.35


## Save the results

In [9]:
stats.to_csv(DATA_DIR / "benchmarking_results" / "suppl_tab_memory_benchmark_1_core" / "statistics.csv")