In [None]:
import os
os.environ["OMP_NUM_THREADS"] = "1"

import numpy as np
import pandas as pd
import sys,os
import random
import copy

import matplotlib.pyplot as plt
import seaborn as sns

from utils.eval import find_best_matches, generate_exprs

from methods import NMF, PCA, sparse_PCA, moCluster, MOFA2, iClusterPlus

from methods.utils import interpret_results, resultsHandler

from pathlib import Path
import multiprocessing as mp


# Settings for simulated data
N_GENES_LIST = ['5', '50', '500']
SCENARIOS = ['A', 'B', 'C']


In [None]:
for METHOD in [NMF]:  # NMF, MOFA2, moCluster, sparse_PCA, iClusterPlus
    for SCENARIO in SCENARIOS:
        for N_GENES in N_GENES_LIST:

            #### prep file paths
            # data files
            data_path = f'/home/hartung/data/simulated/{SCENARIO}'
            FILE = f'{SCENARIO}.n_genes={N_GENES},m=4,std=1,overlap=yes.exprs_z.tsv'
            file_path = os.path.join(data_path, FILE)
            # output folder
            output_path = os.path.join('/home/hartung/data/simulated/results', METHOD.__name__.split('.')[1], SCENARIO, N_GENES)
            # ground truth file
            ground_truth_file = os.path.join(data_path, f'{SCENARIO}.n_genes={N_GENES},m=4,std=1,overlap=yes.biclusters.tsv')

            if SCENARIO == 'A':
                file_path = file_path.replace('overlap=yes', 'overlap=no')
                ground_truth_file= ground_truth_file.replace('overlap=yes', 'overlap=no')
            
            #### prep params
            combinations = METHOD.generate_arg_list(file_path, output_path, ground_truth_file)
            
            #### run algorithm
            with mp.Pool(1) as pool:
                pool.map(METHOD.run_simulated, combinations)



## Collect results

In [None]:
# Settings for simulated data
N_GENES_LIST = ['5', '50', '500']
SCENARIOS = ['A', 'B', 'C']

scores = []
for METHOD in [MOFA2, moCluster, NMF, sparse_PCA, iClusterPlus]:  # MOFA2, moCluster, NMF, sparse_PCA, iClusterPlus
    method_name = METHOD.__name__.split('.')[1]
    for SCENARIO in SCENARIOS:
        for N_GENES in N_GENES_LIST:
            
             #### prep file paths
            # data files
            data_path = f'/local/DESMOND2_data_simulated/simulated/{SCENARIO}'
            FILE = f'{SCENARIO}.n_genes={N_GENES},m=4,std=1,overlap=yes.exprs_z.tsv'
            file_path = os.path.join(data_path, FILE)
            # output folder
            output_path = os.path.join(f'/home/bba1401/data/unpast_simluated/{method_name}/{SCENARIO}/{N_GENES}')
            # ground truth file
            ground_truth_file = os.path.join(data_path, f'{SCENARIO}.n_genes={N_GENES},m=4,std=1,overlap=yes.biclusters.tsv')

            if SCENARIO == 'A':
                file_path = file_path.replace('overlap=yes', 'overlap=no')
                ground_truth_file= ground_truth_file.replace('overlap=yes', 'overlap=no')
            
            #### prep params
            combinations = METHOD.generate_arg_list(file_path, output_path, ground_truth_file)
            print(len(combinations), 'combinations')
            for i, comb in enumerate(combinations):
                if not i % 1000:
                    print(i)
                try:
                    performance, runtime = resultsHandler.evaluate_simulated(**comb)
                    scores.append({**performance, 'runtime': runtime, 'parameters': comb['output_path'], 'scenario': SCENARIO, 'gsize':N_GENES, 'run':comb['random_state'], 'seed':comb['random_state']})
                except Exception as e:
                    print(e)
                    continue
            
pd.DataFrame(scores).to_csv(f'/home/bba1401/data/unpast_simluated/{method_name}/{method_name}_ABC.tsv', sep='\t')