## get best parameters

In [6]:
# best average rank in BRCA

import pandas as pd
from methods import NMF, sparse_PCA, moCluster, MOFA2, iClusterPlus

for method in [NMF, moCluster, MOFA2, iClusterPlus, sparse_PCA]:
    method_name = method.__name__.split('.')[-1]
    print('method_name', method_name)
    
    df_meta = pd.read_csv(f'/cosybio/project/hartung/unpast/unpast_real/{method_name}_METABRIC.tsv', sep='\t', index_col=0)
    df_tcga = pd.read_csv(f'/cosybio/project/hartung/unpast/unpast_real/{method_name}_TCGA.tsv', sep='\t', index_col=0)
    
    df_meta = df_meta.dropna(subset='parameters')
    df_tcga = df_tcga.dropna(subset='parameters')
    
    # remove random state from params
    df_meta['parameters'] = df_meta['parameters'].map(lambda x: ';'.join([y for y in x.split(';') if not y.startswith('random_state=')]))
    df_tcga['parameters'] = df_tcga['parameters'].map(lambda x: ';'.join([y for y in x.split(';') if not y.startswith('random_state=')]))
    
    # merge permutations
    df_meta = df_meta.groupby('parameters').mean()
    df_tcga = df_tcga.groupby('parameters').mean()
    
    df_meta = df_meta.sort_values('PAM50', ascending=False)
    df_meta['rank'] = list(range(len(df_meta.index)))
    
    df_tcga = df_tcga.sort_values('PAM50', ascending=False)
    df_tcga['rank'] = list(range(len(df_tcga.index)))
    
    df = pd.concat([df_tcga, df_meta]).groupby('parameters').sum()
    
    params = df.sort_values('rank', ascending=True).index[0]
    d = {y[0]: y[1] for y in [x.split('=') for x in params.split(';') if len(x)]}
    print(d)

method_name NMF
{'k': '8', 'init': 'nndsvda', 'tol': '0.0001', 'transposed': 'False', 'alpha_W': '-0.1', 'alpha_H': '0.0', 'shuffle': 'False', 'solver': 'cd', 'beta_loss': 'frobenius', 'max_iter': '1000'}
method_name moCluster
{'n_dimensions': '5', 'n_cluster': '13', 'solver': 'svd', 'center': 'True', 'method': 'globalScore', 'option': 'uniform', 'scale': 'False', 'k': '1'}
method_name MOFA2
{'n_factors': '12', 'n_cluster': '9', 'ard_weights': 'True', 'ard_factors': 'False', 'likelihood': 'gaussian', 'spikeslab_weights': 'True', 'spikeslab_factors': 'False'}
method_name iClusterPlus
{'lambda_n': '10', 'n_cluster': '12', 'lambda_scale': '1', 'iter_max': '20', 'eps': '0.0001', 'type': 'gaussian', 'burnin_n': '200', 'draw_n': '200', 'sdev': '0.05'}
method_name sparse_PCA
{'n_components': '9', 'alpha': '1', 'ridge_alpha': '0.001', 'max_iter': '1000', 'method': 'cd', 'tol': '1e-08'}


In [3]:
# highest PAM50 in each dataset

import pandas as pd
from methods import NMF, sparse_PCA, moCluster, MOFA2, iClusterPlus

for method in [NMF, moCluster, MOFA2, iClusterPlus, sparse_PCA]:
    method_name = method.__name__.split('.')[-1]
    print('method_name', method_name)
    for dataset in ['TCGA', 'METABRIC']:
        print(dataset)
        
        df_meta = pd.read_csv(f'/cosybio/project/hartung/unpast/unpast_real/{method_name}_{dataset}.tsv', sep='\t', index_col=0)
        
        df_meta = df_meta.dropna(subset='parameters')
        
        # remove random state from params
        df_meta['parameters'] = df_meta['parameters'].map(lambda x: ';'.join([y for y in x.split(';') if not y.startswith('random_state=')]))
        
        # merge permutations
        df_meta = df_meta.groupby('parameters').mean()
        
        params = df_meta.sort_values('PAM50', ascending=False).index[0]
        d = {}
        for x in params.split(';'):
            if not len(x):
                continue
            key, value = x.split('=') 
            try:
                value = eval(value)
            except:
                pass
            d[key] = value
        print(d)

method_name NMF
TCGA
{'k': 8, 'init': 'nndsvda', 'tol': 0.0001, 'transposed': False, 'alpha_W': -0.1, 'alpha_H': 0.0, 'shuffle': False, 'solver': 'cd', 'beta_loss': 'frobenius', 'max_iter': 1000}
METABRIC
{'k': 3, 'init': 'nndsvda', 'tol': 0.0001, 'transposed': False, 'alpha_W': -0.1, 'alpha_H': 0.0, 'shuffle': False, 'solver': 'cd', 'beta_loss': 'frobenius', 'max_iter': 200}
method_name moCluster
TCGA
{'n_dimensions': 4, 'n_cluster': 10, 'solver': 'fast', 'center': True, 'method': 'globalScore', 'option': 'inertia', 'scale': False, 'k': 1}
METABRIC
{'n_dimensions': 15, 'n_cluster': 20, 'solver': 'fast', 'center': True, 'method': 'globalScore', 'option': 'lambda1', 'scale': False, 'k': 0.1}
method_name MOFA2
TCGA
{'n_factors': 2, 'n_cluster': 7, 'ard_weights': True, 'ard_factors': False, 'likelihood': 'gaussian', 'spikeslab_weights': True, 'spikeslab_factors': False}
METABRIC
{'n_factors': 19, 'n_cluster': 11, 'ard_weights': True, 'ard_factors': False, 'likelihood': 'gaussian', 'spikes

In [15]:
# highest in simulated

import pandas as pd
from methods import NMF, sparse_PCA, moCluster, MOFA2, iClusterPlus

for method in [NMF, moCluster, MOFA2, iClusterPlus, sparse_PCA]:
    method_name = method.__name__.split('.')[-1]
    print('method_name', method_name)
    
    df_meta = pd.read_csv(f'/cosybio/project/hartung/unpast/unpast_simluated/{method_name}_ABC.tsv', sep='\t', index_col=0)
    
    df_meta = df_meta.dropna(subset='parameters')
    
    # # remove random state from params
    df_meta['parameters'] = df_meta['parameters'].map(lambda x: ';'.join([y for y in x.split(';') if not y.startswith('random_state=')]))
    
    params = df_meta.groupby('parameters').mean().sort_values(['performance'], ascending=False).index[0]
    
    d = {}
    for x in params.split(';'):
        if not len(x):
            continue
        key, value = x.split('=') 
        try:
            value = eval(value)
        except:
            pass
        d[key] = value
    print(d)

method_name NMF
{'k': 3, 'init': 'nndsvd', 'tol': 0.0001, 'transposed': False, 'alpha_W': 0.2, 'alpha_H': 0.0, 'shuffle': True, 'solver': 'cd', 'beta_loss': 'frobenius', 'max_iter': 1000}
method_name moCluster
{'n_dimensions': 8, 'n_cluster': 8, 'solver': 'svd', 'center': True, 'method': 'globalScore', 'option': 'uniform', 'scale': False, 'k': 0.1}
method_name MOFA2
{'n_factors': 18, 'n_cluster': 5, 'ard_weights': True, 'ard_factors': False, 'likelihood': 'gaussian', 'spikeslab_weights': True, 'spikeslab_factors': False}
method_name iClusterPlus
{'lambda_n': 10, 'n_cluster': 4, 'lambda_scale': 1, 'iter_max': 20, 'eps': 0.0001, 'type': 'gaussian', 'burnin_n': 200, 'draw_n': 200, 'sdev': 0.05}
method_name sparse_PCA
{'n_components': 2, 'alpha': 1, 'ridge_alpha': 0.1, 'max_iter': 1000, 'method': 'cd', 'tol': 1e-08}


## get results with parameters

In [1]:
import best_parameters
import pandas as pd
from methods import NMF, sparse_PCA, moCluster, MOFA2, iClusterPlus

for method in [NMF, moCluster, MOFA2, iClusterPlus, sparse_PCA]:
    method_name = method.__name__.split('.')[-1]
    print('method_name', method_name)
    
    df_meta = pd.read_csv(f'/cosybio/project/hartung/unpast/unpast_real/{method_name}_METABRIC.tsv', sep='\t', index_col=0)
    df_tcga = pd.read_csv(f'/cosybio/project/hartung/unpast/unpast_real/{method_name}_TCGA.tsv', sep='\t', index_col=0)
    
    df_meta = df_meta.dropna(subset='parameters')
    df_tcga = df_tcga.dropna(subset='parameters')
    
    # remove random state from params
    df_meta['parameters'] = df_meta['parameters'].map(lambda x: ';'.join([y for y in x.split(';') if not y.startswith('random_state=')]))
    df_tcga['parameters'] = df_tcga['parameters'].map(lambda x: ';'.join([y for y in x.split(';') if not y.startswith('random_state=')]))
    
    # merge permutations
    df_meta = df_meta.groupby('parameters').mean()
    df_tcga = df_tcga.groupby('parameters').mean()
    
    df_meta = df_meta.sort_values('PAM50', ascending=False)
    df_meta['rank'] = list(range(len(df_meta.index)))
    
    df_tcga = df_tcga.sort_values('PAM50', ascending=False)
    df_tcga['rank'] = list(range(len(df_tcga.index)))
    
    df = pd.concat([df_tcga, df_meta]).groupby('parameters').sum()
    
    params = df.sort_values('rank', ascending=True).index[0]
    d = {y[0]: y[1] for y in [x.split('=') for x in params.split(';') if len(x)]}
    print(d)