In [None]:
import os
os.environ["OMP_NUM_THREADS"] = "1"

import numpy as np
import pandas as pd
import sys,os
import random
import copy

import matplotlib.pyplot as plt
import seaborn as sns

from utils.eval import find_best_matches, generate_exprs

from methods import NMF, PCA, sparse_PCA, moCluster, MOFA2

from methods.utils import interpret_results, resultsHandler

from pathlib import Path
import multiprocessing as mp


# Settings for simulated data
N_GENES_LIST = ['5', '50', '500']
SCENARIOS = ['A', 'B', 'C']

for METHOD in [sparse_PCA]:  # NMF, MOFA2, moCluster, sparse_PCA
    for SCENARIO in SCENARIOS:
        for N_GENES in N_GENES_LIST:

            #### prep file paths
            # data files
            data_path = f'/home/hartung/data/simulated/{SCENARIO}'
            FILE = f'{SCENARIO}.n_genes={N_GENES},m=4,std=1,overlap=yes.exprs_z.tsv'
            file_path = os.path.join(data_path, FILE)
            # output folder
            output_path = os.path.join('/home/hartung/data/simulated/results', METHOD.__name__.split('.')[1], SCENARIO, N_GENES)
            # ground truth file
            ground_truth_file = os.path.join(data_path, f'{SCENARIO}.n_genes={N_GENES},m=4,std=1,overlap=yes.biclusters.tsv')

            if SCENARIO == 'A':
                file_path = file_path.replace('overlap=yes', 'overlap=no')
                ground_truth_file= ground_truth_file.replace('overlap=yes', 'overlap=no')
            
            #### prep params
            combinations = METHOD.generate_arg_list(file_path, output_path, ground_truth_file)
            
            #### run algorithm
            with mp.Pool(38) as pool:
                pool.map(METHOD.run_simulated, combinations)



Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=6/random_state=0/alpha=0.5/ridge_alpha=0.01/max_iter=5000/method=cd/tol=1e-08...
Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=9/random_state=0/alpha=1/ridge_alpha=0.01/max_iter=5000/method=lars/tol=1e-08...Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=8/random_state=0/alpha=0.5/ridge_alpha=0.001/max_iter=5000/method=lars/tol=1e-08...

Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=9/random_state=0/alpha=0.5/ridge_alpha=0.001/max_iter=5000/method=lars/tol=1e-08...
Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=10/random_state=0/alpha=0.5/ridge_alpha=0.01/max_iter=5000/method=lars/tol=1e-09...
Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=10/random_state=0/alpha=1.1/ridge_alpha=0.01/max_iter=5000/method=lars/tol=1e-09...
Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_co

Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=10/random_state=3/alpha=1.1/ridge_alpha=0.001/max_iter=5000/method=lars/tol=1e-08...
Saved /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=6/random_state=3/alpha=0.5/ridge_alpha=0.001/max_iter=5000/method=cd/tol=1e-08.
Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=10/random_state=3/alpha=5/ridge_alpha=0.01/max_iter=5000/method=cd/tol=1e-09...
Saved /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=10/random_state=3/alpha=5/ridge_alpha=0.01/max_iter=5000/method=cd/tol=1e-09.
Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=10/random_state=3/alpha=50/ridge_alpha=0.01/max_iter=5000/method=lars/tol=1e-08...
Saved /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=10/random_state=0/alpha=1/ridge_alpha=0.01/max_iter=5000/method=cd/tol=1e-08.
Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=4/random_st

Saved /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=9/random_state=4/alpha=5/ridge_alpha=0.01/max_iter=5000/method=lars/tol=1e-08.
Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=9/random_state=4/alpha=10/ridge_alpha=0.1/max_iter=5000/method=cd/tol=1e-09...
Saved /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=9/random_state=4/alpha=10/ridge_alpha=0.1/max_iter=5000/method=cd/tol=1e-09.
Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=10/random_state=4/alpha=0.5/ridge_alpha=0.1/max_iter=5000/method=lars/tol=1e-08...
Saved /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=10/random_state=3/alpha=0.5/ridge_alpha=0.001/max_iter=5000/method=cd/tol=1e-09.
Running /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=10/random_state=4/alpha=1/ridge_alpha=0.001/max_iter=5000/method=lars/tol=1e-09...
Saved /home/hartung/data/simulated/results/sparse_PCA/A/5/n_components=5/random_state=

In [None]:
for METHOD in [NMF, sparse_PCA]:  # MOFA2
    for SCENARIO in SCENCARIOS:
        for N_GENES in N_GENES_LIST:
            
            combinations = METHOD.generate_arg_list(file_path, output_path, ground_truth_file)
            
            scores = []
            for comb in combinations:
                try:
                    score, runtime = resultsHandler.evaluate_simulated(**comb)
                    scores.append(score)
                except:
                    continue
            max(scores)

In [4]:
scores = []
for comb in combinations:
    score, runtime = resultsHandler.evaluate_simulated(**comb)
    scores.append(score)
max(scores)

output_path /home/hartung/data/simulated/results/MOFA2/B/50/n_factors=1/n_cluster=4/random_state=0
output_path /home/hartung/data/simulated/results/MOFA2/B/50/n_factors=2/n_cluster=4/random_state=0
output_path /home/hartung/data/simulated/results/MOFA2/B/50/n_factors=3/n_cluster=4/random_state=0
output_path /home/hartung/data/simulated/results/MOFA2/B/50/n_factors=4/n_cluster=4/random_state=0
output_path /home/hartung/data/simulated/results/MOFA2/B/50/n_factors=5/n_cluster=4/random_state=0
output_path /home/hartung/data/simulated/results/MOFA2/B/50/n_factors=6/n_cluster=4/random_state=0
output_path /home/hartung/data/simulated/results/MOFA2/B/50/n_factors=7/n_cluster=4/random_state=0
output_path /home/hartung/data/simulated/results/MOFA2/B/50/n_factors=8/n_cluster=4/random_state=0
output_path /home/hartung/data/simulated/results/MOFA2/B/50/n_factors=9/n_cluster=4/random_state=0
output_path /home/hartung/data/simulated/results/MOFA2/B/50/n_factors=10/n_cluster=4/random_state=0
output_pa

0.7333333333333334