In [63]:
import sys 
import os
import subprocess
import numpy as np 
import time
from itertools import product 
import glob
import re
import pandas as pd 

output_dir = '../output/run_clixo/option{}'.format(option); 
if not os.path.exists(output_dir): os.makedirs(output_dir)
stringdb_dir = '../output/extracting_string_interactions_subnetworks/'

In [64]:
def get_alpha_beta(l):
    """
    Yield function to get the correct alpha and beta names.
    
    """
    for x in l: 
        alpha = '{:.1f}'.format(x[0])
        beta =  '{:.1f}'.format(x[1])
        yield((alpha, beta))
        
    yield(None, None)

def read_clixo_stats(fn):
    dic = {'num_valid_clusters': np.nan, 'largest_cluster': np.nan, 
          'num_edges_in_clustergraph': np.nan, 'num_clusters': np.nan}
    with open(fn) as f: 
        for line in f: 
            if line.startswith('#'):
                if 'Num valid clusters' in line: 
                    dic['num_valid_clusters'] = int(line.strip().split()[-1])
                elif 'Largest cluster' in line:
                    dic['largest_cluster'] = int(line.strip().split()[-1])
                elif 'Num edges in clusterGraph' in line:
                    dic['num_edges_in_clustergraph'] = int(line.strip().split()[-1])
                elif 'Num clusters' in line:
                    dic['num_clusters'] = int(line.strip().split()[-1])

    return(dic)

In [66]:
clixo_stats = {}
pattern = 'option([0-9]).*alpha([0-9]*\.[0-9]*)_beta([0-9]*\.[0-9]*)'
for fn in glob.glob('../output/run_clixo/option*/*'):
    
    params = re.search(pattern, fn)
    option, alpha, beta = [float(x) for x in params.groups()]    
    if alpha == None:
        break
    
    # Running CliXo jobs with a new set of parameters
    clixo_fn = 'string_synapse_interactions_combined_score'
    clixo_fn += '.clixo_alpha{}_beta{}.txt'
    clixo_fn = clixo_fn.format(alpha, beta)
    clixo_fn = os.path.join(output_dir, clixo_fn)

    stats = read_clixo_stats(fn)
    clixo_stats[(option, alpha, beta)] = stats 

In [67]:
clixo_stats_df = pd.DataFrame.from_dict(clixo_stats, orient='index')
clixo_stats_df.reset_index(inplace=True)
clixo_stats_df.rename(columns={'level_0': 'option', 
                               'level_1': 'alpha',
                               'level_2': 'beta'}, inplace=True)

In [68]:
clixo_stats_df

Unnamed: 0,option,alpha,beta,num_valid_clusters,largest_cluster,num_edges_in_clustergraph,num_clusters
0,2.0,0.1,0.5,59,1196,715323,59.0
1,2.0,0.1,0.6,381,835,361627,381.0
2,2.0,0.1,0.7,799,510,156491,799.0
3,2.0,0.1,0.8,1425,115,35240,1425.0
4,2.0,0.1,0.9,1584,49,28182,1584.0
5,2.0,0.1,1.0,1696,49,26022,1696.0
6,2.0,0.1,1.1,1696,49,26022,1696.0
7,2.0,0.2,0.5,14,1199,720712,14.0
8,2.0,0.2,0.6,213,801,329422,213.0
9,2.0,0.2,0.7,245,526,165721,245.0
