In [None]:
import os

import pandas as pd
import numpy as np

In [None]:
import scanpy as sc
import quicat
from scipy import sparse as sp
import matplotlib.pyplot as plt
import seaborn as sb

In [None]:
report_dir = '/home/daniele/Code/github_synced/barcoding/quicat_paper_code/reports/'

In [None]:
dpi = 300

In [None]:
synt = 'dna/synthetic/'
yogesh = 'dna/yogesh/'
sc_synth = 'single_cell/synthetic/'

In [None]:
def kb_to_gb(kb_value):
    return float(kb_value) / 1048576

def convert_to_seconds(time_str):
    mm, ss = time_str.split(':')
    ss, ms = ss.split('.')
    total_seconds = int(mm) * 60 + int(ss) + float(f"0.{ms}")
    return total_seconds

In [None]:
def extract_info(file_path):
    info = {}
    with open(file_path) as f:
        lines = f.readlines()
    for line in lines:
        if 'Elapsed (wall clock) time' in line:
            info['Elapsed time'] = convert_to_seconds(line.split(": ")[1].strip())
        elif 'Maximum resident set size (kbytes)' in line:
            info['Peak memory'] = kb_to_gb(line.split(": ")[1].strip())
    return info

In [None]:
benchmark_dict = {}

### bartab

In [None]:
benchmark_dict['bartab'] = {}
benchmark_dict['bartab']['dna synthethic'] = extract_info(f'{report_dir}{synt}bartab_dna_synthetic.log')
benchmark_dict['bartab']['dna Goyal'] = extract_info(f'{report_dir}{yogesh}bartab_dna_yogesh.log')

### pycashier

In [None]:
benchmark_dict['pycashier'] = {}
benchmark_dict['pycashier']['dna synthethic'] = extract_info(f'{report_dir}{synt}pycashier_dna_synthetic.log')
benchmark_dict['pycashier']['dna Goyal'] = extract_info(f'{report_dir}{yogesh}pycashier_dna_yogesh.log')

### quicat

In [None]:
benchmark_dict['quicat'] = {}
benchmark_dict['quicat']['dna synthethic'] = extract_info(f'{report_dir}{synt}quicat_dna_synthetic.log')
benchmark_dict['quicat']['dna Goyal'] = extract_info(f'{report_dir}{yogesh}quicat_dna_yogesh.log')

In [None]:
benchmark_dict

### plot

In [None]:
color_mapping = {
    'pycashier': '#332288',
    'quicat': '#DDCC77',
    'bartab': '#CC6677'
}

In [None]:
data_flattened=[]
for tool, datasets in benchmark_dict.items():
    for dataset, metrics in datasets.items():
        data_flattened.append([tool, dataset, metrics['Elapsed time'], metrics['Peak memory']])
df = pd.DataFrame(data_flattened, columns=['Tool', 'Dataset', 'Elapsed time (s)', 'Peak memory (GB)'])


In [None]:
datasets = df['Dataset'].unique()
tools = df['Tool'].unique()

In [None]:
fig, ax = plt.subplots(figsize=(8, 6))
bar_width = 0.2
index = np.arange(len(datasets))
for i, tool in enumerate(tools):
    tool_data = df[df['Tool'] == tool]
    ax.bar(index + i * bar_width, tool_data['Elapsed time (s)'], bar_width, label=tool,  color=color_mapping[tool], edgecolor='black', alpha=0.3)

ax.set_xlabel('Dataset')
ax.set_ylabel('Elapsed Time (s)')
ax.set_xticks(index + bar_width * (len(tools) - 1) / 2)
ax.set_xticklabels(datasets)
ax.legend()

plt.tight_layout()
plt.savefig(f'{report_dir}figures/fig2/barplot_runtime.pdf', dpi = dpi, bbox_inches='tight')
plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 6), gridspec_kw={'height_ratios': [1, 2]})

bar_width = 0.2
index = np.arange(len(datasets))

for i, tool in enumerate(tools):
    tool_data = df[df['Tool'] == tool]
    
    # Plot on both axes
    ax1.bar(index + i * bar_width, tool_data['Elapsed time (s)'], bar_width, label=tool,  
            color=color_mapping[tool], edgecolor='black', alpha=0.3)
    ax2.bar(index + i * bar_width, tool_data['Elapsed time (s)'], bar_width, label=tool,  
            color=color_mapping[tool], edgecolor='black', alpha=0.3)

ax1.set_ylim(250, df['Elapsed time (s)'].max() *1.2)
ax2.set_ylim(0, 150)

ax1.spines['bottom'].set_visible(False)
ax2.spines['top'].set_visible(False)

d = .005  
kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False, lw=1)

ax1.plot((-d, +d), (-d, +d), **kwargs)  
ax1.plot((1 - d, 1 + d), (-d, +d), **kwargs) 

kwargs.update(transform=ax2.transAxes)  
ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs) 
ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs) 

ax2.set_xlabel('Dataset')
ax2.set_xticks(index + bar_width * (len(tools) - 1) / 2)
ax2.set_xticklabels(datasets)

ax1.set_ylabel('Elapsed Time (s)')
ax2.set_ylabel('Elapsed Time (s)')

ax1.legend(loc='upper left', bbox_to_anchor=(0, 1.))

plt.tight_layout()
plt.savefig(f'{report_dir}figures/fig2/barplot_runtime_y_axis_split.pdf', dpi = dpi, bbox_inches='tight')

plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(8, 6))

bar_width = 0.2
index = np.arange(len(datasets))
for i, tool in enumerate(tools):
    tool_data = df[df['Tool'] == tool]
    ax.bar(index + i * bar_width, tool_data['Peak memory (GB)'], bar_width, label=tool,color=color_mapping[tool], edgecolor='black', alpha=0.3)

ax.set_xlabel('Dataset')
ax.set_ylabel('Peak memory (Gb)')
ax.set_xticks(index + bar_width * (len(tools) - 1) / 2)
ax.set_xticklabels(datasets)
ax.legend()

plt.tight_layout()
plt.savefig(f'{report_dir}figures/fig2/barplot_peak_memory.pdf', dpi = dpi, bbox_inches='tight')
plt.show()