In [1]:
import pandas as pd
import sys
import random

sys.path.insert(0, '..')

from unpast.run_unpast import unpast
from unpast.utils.method import prepare_input_matrix
from unpast.utils.consensus import make_consensus_biclusters
from unpast.utils.io import write_bic_table, read_bic_table

In [2]:
out_dir = "../unpast_results/asthma/"
data_dir = "../data/asthma/"

analysis_seed = 42
n_runs = 5
seeds = []
random.seed(analysis_seed)
for i in range(n_runs):
    seeds.append(random.randint(0,1000000))
print("generate %s seeds:"%n_runs,seeds)

generate  5  seeds [670487, 116739, 26225, 777572, 288389]


# GSE4302

In [3]:
exprs_file = data_dir+"/asthma_GSE4302.RMA.maxRowVariance.log2exprs.tsv.gz"
basename = "asthma_GSE4302"

In [4]:
biclusters = []
for n in range(n_runs):
    seed = seeds[n]
    result = unpast(exprs_file, 
                    basename, 
                    out_dir=out_dir,
                    seed = seed,
                    verbose = False)
    result.index = ["seed="+str(seed)+"_"+str(x) for x in result.index.values ]
    biclusters.append(result)

In [5]:
# filter and z-score transform input matrix, limit z-scores to [-3,3]
exprs_z = prepare_input_matrix(pd.read_csv(exprs_file,sep="\t",index_col = 0),
                               min_n_samples=5,
                               ceiling=3)
consensus_biclusters= make_consensus_biclusters(biclusters, 
                                                exprs_z, 
                                                seed = analysis_seed,
                                                verbose = False
                                                )
bic_fname =  out_dir+basename+".seed="+str(analysis_seed)+".kmeans.consensus.biclsuters.tsv"
write_bic_table(consensus_biclusters,bic_fname)
bic_fname

'../unpast_results/asthma/asthma_GSE4302.seed=42.kmeans.consensus.biclsuters.tsv'

# GSE89809

In [7]:
exprs_file = data_dir+"/asthma_GSE89809_epithelial.RMA.maxRowVariance.log2exprs.tsv.gz"
basename = "asthma_GSE89809"

In [8]:
biclusters = []
for n in range(n_runs):
    seed = seeds[n]
    result = unpast(exprs_file, 
                    basename, 
                    out_dir=out_dir,
                    seed = seed,
                    verbose = False)
    result.index = ["seed="+str(seed)+"_"+str(x) for x in result.index.values ]
    biclusters.append(result)

In [9]:
# filter and z-score transform input matrix, limit z-scores to [-3,3]
exprs_z = prepare_input_matrix(pd.read_csv(exprs_file,sep="\t",index_col = 0),
                               min_n_samples=5,
                               ceiling=3)
consensus_biclusters= make_consensus_biclusters(biclusters, 
                                                exprs_z, 
                                                seed = analysis_seed,
                                                verbose = False
                                                )
bic_fname =  out_dir+basename+".seed="+str(analysis_seed)+".kmeans.consensus.biclsuters.tsv"
write_bic_table(consensus_biclusters,bic_fname)
bic_fname

'../unpast_results/asthma/asthma_GSE89809.seed=42.kmeans.consensus.biclsuters.tsv'