In [None]:
import os
os.environ["OMP_NUM_THREADS"] = "1"
import best_parameters

import pandas as pd

from methods import MOFA2, NMF, sparse_PCA, moCluster, iClusterPlus
from utils.eval import calculate_perfromance
from methods.utils.miscellaneous import combination_to_string

from pathlib import Path
import multiprocessing as mp

import numpy as np
import pandas as pd
import sys, os


p = '/cosybio/project/hartung/unpast/unpast_asthma/asthma_GSE4302.RMA.maxRowVariance.log2exprs.tsv'
exprs = pd.read_csv(p, sep='\t', index_col=0)

anno = pd.read_csv("/cosybio/project/hartung/unpast/unpast_asthma/asthma_GSE4302.annot.tsv",sep="\t",index_col=0)

known_groups = {"Th2 asthma":{"Th2-high": set(anno.loc[anno["Th2 asthma"]==1,:].index.values)}}

freqs  = {'Th2-high':len(known_groups["Th2 asthma"]["Th2-high"])/anno.shape[0]}

METHODS = [NMF] # [sparse_PCA, NMF, moCluster, MOFA2, iClusterPlus]
for METHOD in METHODS:
    method_name = METHOD.__name__.split('.')[-1]
    
    performances = []
    best_matches_list = []
    
    print('method_name:', method_name)
    comb = getattr(best_parameters.OptimizedBRCAForAsthmaARI, method_name)
    seeds = [1, 2, 3, 4, 5]
    if method_name == 'moCluster':
        seeds = [1]
        
    for seed in seeds:
        out_dir = os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results', method_name, 'GSE4302', str(seed))
        print(out_dir)
        comb['exprs_file'] = p
        comb['output_path'] = out_dir
        comb['random_state'] = seed
        result_m, runtime_m = METHOD.run_real(comb)
        result_m.to_csv(os.path.join(comb['output_path'], 'result.tsv'), sep='\t')
        
        performance, best_matches = calculate_perfromance(result_m, known_groups, set(anno.index.values)) #, adjust_pvals="BH", pval_cutoff=1
        
        performance = performance.to_dict()
        best_matches = best_matches.to_dict()
        
        params_string = combination_to_string(comb)
        
        performance.update({'run': comb['random_state'], 'parameters': params_string})
        best_matches.update({'run': comb['random_state'], 'parameters': params_string})
        
        performances.append(performance)
        best_matches_list.append(best_matches)
        
    pd.DataFrame.from_records(performances).to_csv(os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results', f'{method_name}_GSE4302.tsv'), sep="\t")
    pd.DataFrame.from_records(best_matches_list).to_csv(os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results', f'{method_name}_GSE4302_best_matches.tsv'), sep="\t")
    



p = '/cosybio/project/hartung/unpast/unpast_asthma/asthma_GSE89809_epithelial.RMA.maxRowVariance.log2exprs.tsv'
exprs = pd.read_csv(p, sep='\t', index_col=0)

anno = pd.read_csv("/cosybio/project/hartung/unpast/unpast_asthma/asthma_GSE89809_epithelial.annot.tsv",sep="\t",index_col=0)
anno.head(3)

known_groups = {"Th2 asthma":{"Th2-high": set(anno.loc[anno["Th2 asthma"]==1,:].index.values)}}

freqs  = {'Th2-high':len(known_groups["Th2 asthma"]["Th2-high"])/anno.shape[0]}

METHODS = [NMF] # [moCluster, sparse_PCA, iClusterPlus, MOFA2]
for METHOD in METHODS:
    method_name = METHOD.__name__.split('.')[-1]
    comb = getattr(best_parameters.OptimizedBRCAForAsthmaARI, method_name)
    seeds = [1, 2, 3, 4, 5]
    
    if method_name == 'moCluster':
        seeds = [1]
    
    for seed in seeds:
        out_dir = os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results', method_name, 'GSE89809', str(seed))
        
        comb['exprs_file'] = p
        comb['output_path'] = out_dir
        comb['random_state'] = seed
        result_m, runtime_m = METHOD.run_real(comb)
        result_m.to_csv(os.path.join(comb['output_path'], 'result.tsv'), sep='\t')
        
        performance, best_matches = calculate_perfromance(result_m, known_groups, set(anno.index.values)) # , adjust_pvals="BH", pval_cutoff=1
        
        performance = performance.to_dict()
        best_matches = best_matches.to_dict()
        
        params_string = combination_to_string(comb)
        
        performance.update({'run': comb['random_state'], 'parameters': params_string})
        best_matches.update({'run': comb['random_state'], 'parameters': params_string})
        
        performances.append(performance)
        best_matches_list.append(best_matches)
        
    pd.DataFrame.from_records(performances).to_csv(os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results', f'{method_name}_GSE89809.tsv'), sep="\t")
    pd.DataFrame.from_records(best_matches_list).to_csv(os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results', f'{method_name}_GSE89809_best_matches.tsv'), sep="\t")


# Run with default parameters

In [None]:
import os
os.environ["OMP_NUM_THREADS"] = "1"
import best_parameters

import pandas as pd

from methods import MOFA2, NMF, sparse_PCA, moCluster, iClusterPlus


from pathlib import Path
import multiprocessing as mp

import numpy as np
import pandas as pd
import sys, os


p = '/cosybio/project/hartung/unpast/unpast_asthma/asthma_GSE4302.RMA.maxRowVariance.log2exprs.tsv'
exprs = pd.read_csv(p, sep='\t', index_col=0)

anno = pd.read_csv("/cosybio/project/hartung/unpast/unpast_asthma/asthma_GSE4302.annot.tsv",sep="\t",index_col=0)
anno.head(3)

known_groups = {"Th2 asthma":{"Th2-high": set(anno.loc[anno["Th2 asthma"]==1,:].index.values)}}

freqs  = {'Th2-high':len(known_groups["Th2 asthma"]["Th2-high"])/anno.shape[0]}

METHODS = [NMF, sparse_PCA] # []
for METHOD in METHODS:
    method_name = METHOD.__name__.split('.')[-1]
    
    performances = []
    best_matches_list = []
    
    print('method_name:', method_name)
    comb = getattr(best_parameters.DefaultAsthma, method_name)
    seeds = [1, 2, 3, 4, 5]
    if method_name == 'moCluster':
        seeds = [1]
        
    for seed in seeds:
        out_dir = os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results_default', method_name, 'GSE4302', str(seed))

        comb['exprs_file'] = p
        comb['output_path'] = out_dir
        comb['random_state'] = seed
        result_m, runtime_m = METHOD.run_real(comb)
        
        performance, best_matches = calculate_perfromance(result_m, known_groups, set(anno.index.values))
        
        performances.append(performance.to_dict())
        best_matches_list.append(best_matches.to_dict())
        
    pd.DataFrame.from_records(performances).to_csv(os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results_default', f'{method_name}_GSE4302_default_params.tsv'), sep="\t")
    pd.DataFrame.from_records(best_matches_list).to_csv(os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results_default', f'{method_name}_GSE4302_best_matches_default_params.tsv'), sep="\t")
    





p = '/cosybio/project/hartung/unpast/unpast_asthma/asthma_GSE89809_epithelial.RMA.maxRowVariance.log2exprs.tsv'
exprs = pd.read_csv(p, sep='\t', index_col=0)

anno = pd.read_csv("/cosybio/project/hartung/unpast/unpast_asthma/asthma_GSE89809_epithelial.annot.tsv",sep="\t",index_col=0)
anno.head(3)

known_groups = {"Th2 asthma":{"Th2-high": set(anno.loc[anno["Th2 asthma"]==1,:].index.values)}}

freqs  = {'Th2-high':len(known_groups["Th2 asthma"]["Th2-high"])/anno.shape[0]}

METHODS = [NMF, sparse_PCA] # []
for METHOD in METHODS:
    method_name = METHOD.__name__.split('.')[-1]
    comb = getattr(best_parameters.DefaultAsthma, method_name)
    seeds = [1, 2, 3, 4, 5]
    
    if method_name == 'moCluster':
        seeds = [1]
    
    for seed in seeds:
        out_dir = os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results_default', method_name, 'GSE89809', str(seed))
        
        comb['exprs_file'] = p
        comb['output_path'] = out_dir
        comb['random_state'] = seed
        result_m, runtime_m = METHOD.run_real(comb)
        
        performance, best_matches = calculate_perfromance(result_m, known_groups, set(anno.index.values))
        
        performances.append(performance.to_dict())
        best_matches_list.append(best_matches.to_dict())
        
    pd.DataFrame.from_records(performances).to_csv(os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results_default', f'{method_name}_GSE89809_default_params.tsv'), sep="\t")
    pd.DataFrame.from_records(best_matches_list).to_csv(os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results_default', f'{method_name}_GSE89809_best_matches_default_params.tsv'), sep="\t")



## Calculate performance

In [None]:
import numpy as np
import pandas as pd
import sys,os
import random
import copy
from time import time

import matplotlib.pyplot as plt
import seaborn as sns


from utils.method import read_bic_table
from utils.eval import calculate_perfromance

### Reading annotation and expression,
prepareing inputs for calculate_perfromance()

In [None]:
exprs_file = "/cosybio/project/hartung/unpast/unpast_asthma/asthma_GSE4302.RMA.maxRowVariance.log2exprs.tsv"
exprs = pd.read_csv(exprs_file, sep="\t",index_col=0)

anno  = pd.read_csv("/cosybio/project/hartung/unpast/unpast_asthma/asthma_GSE4302.annot.tsv",sep="\t",index_col=0)
anno.head(3)

In [None]:
classifications={"Th2 asthma":["Th2-high"]}
known_groups = {"Th2 asthma":{"Th2-high": set(anno.loc[anno["Th2 asthma"]==1,:].index.values)}}
known_groups

In [None]:
freqs  = {'Th2-high':len(known_groups["Th2 asthma"]["Th2-high"])/anno.shape[0]}
freqs

### calculate performances

In [None]:
from methods import MOFA2, NMF, sparse_PCA, moCluster, iClusterPlus


from pathlib import Path
import multiprocessing as mp

import numpy as np
import pandas as pd
import sys, os




METHODS = [sparse_PCA, NMF, moCluster, MOFA2, iClusterPlus] # []
for METHOD in METHODS:
    method_name = METHOD.__name__.split('.')[-1]
    
    print('method_name:', method_name)
    comb = getattr(best_parameters.OptimizedBRCA, method_name)
    if method_name == 'moCluster':
        out_dir = os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results', method_name, 'GSE4302')

        comb['exprs_file'] = p
        comb['output_path'] = out_dir
        result_m, runtime_m = METHOD.run_real(comb)
    else:
        for seed in range(1, 6):
            out_dir = os.path.join('/cosybio/project/hartung/unpast/unpast_asthma/results', method_name, 'GSE4302', str(seed))

            comb['exprs_file'] = p
            comb['output_path'] = out_dir
            comb['random_state'] = seed
            result_m, runtime_m = METHOD.run_real(comb)

    