In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNet, ElasticNetCV, Ridge, RidgeCV
from sklearn.metrics import mean_squared_error
np.warnings.filterwarnings("ignore")
import statsmodels.api as sm
np.warnings.resetwarnings()
import os

In [2]:
def getElasticNetBestHyperparams(X, y):
    l1_ratios = [.1, .5, .7, .9, .95, .99, 1]
    min_mse = 1
    best_l1_ratio = 1
    best_alpha = 1
    params_list = []
    for ratio in l1_ratios:
#         encv = ElasticNetCV(l1_ratio = ratio, n_alphas = 100, cv = 5, verbose = 1, precompute = True, max_iter=2500, n_jobs = -1)
        encv = ElasticNetCV(l1_ratio = ratio, n_alphas = 10, cv = 3, verbose = 1, precompute = True, max_iter=2500, n_jobs = -1)
        encv.fit(X, y)
        n_nonzeros = (encv.coef_ != 0).sum()
        _mse = np.mean(encv.mse_path_, axis=1)[np.where(encv.alphas_ == encv.alpha_)[0][0]]
        if (ratio == l1_ratios[0] or _mse < min_mse):
            min_mse = _mse
            best_l1_ratio = ratio
            best_alpha = encv.alpha_
        print("ratio(%e) -- n: %d -- alpha: %f -- mse: %f" % (ratio, n_nonzeros, encv.alpha_, _mse))
        if n_nonzeros == 0:
            break
        params_list.append(tuple([n_nonzeros, _mse, ratio, encv.alpha_]))
    # select the simplest model whose mean-squared error is 'not so bad'
    sorted_params_list = sorted(params_list)
    for param_tuple in sorted_params_list:
        if param_tuple[1] - min_mse <= 0.1:
            print("for target_gene " + y.name + ", nonzero_coeffs_num, MSE, l1-ratio, alpha:")
            print(param_tuple)
            return param_tuple
    return tuple([0, 0, 0, 0])

In [3]:
def getElasticNetSelectedFeatures(X, y, alpha, l1_ratio):
    enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=2000)
    enet.fit(X, y)
    selected_features = []
    for ix in range(enet.coef_.shape[0]):
        if (enet.coef_[ix] != 0):
            selected_features.append(ix)
    return list(X.columns[selected_features])

In [4]:
def getOlsSelectedFeatures(X, y, threshold):
    X = sm.add_constant(X)
    ols_model_results = sm.OLS(y, X).fit()
    p_values = ols_model_results.pvalues
    p_values = p_values.where(p_values < threshold)
    p_values.dropna(inplace=True)
    return list(p_values.index)[1:]

In [5]:
def getRidgeRegressionBestHyperparams(X, y):
    ridge_cv = RidgeCV(alphas=np.linspace(.0001,1,100), cv=10)  # check this
    ridge_cv.fit(X, y)
    best_alpha = ridge_cv.alpha_
    y_pred = ridge_cv.predict(X)
    rmse_train = np.sqrt(mean_squared_error(y, y_pred))
    r2_train = ridge_cv.score(X, y)
    return tuple([best_alpha, rmse_train, r2_train])  # (best alpha, rmse_train, r2_train)

In [6]:
def getAdjustedR2(R2, num_samples, num_features):
    adjusted_R2 = (1-R2) * (num_samples-1) / (num_samples-num_features-1)
    return adjusted_R2

In [7]:
def trainRidgeRegressionModel(X, y):
    # hold-out a small percentage of the samples as test set
    num_total_samples = X.shape[0]
    num_features = X.shape[1]
    test_set_size = int(0.2*num_total_samples)
    num_training_samples = num_total_samples - test_set_size
    test_row_ix = X.index[list(np.random.choice(a=num_total_samples, replace=False, size=test_set_size))]
    X_test = X.loc[test_row_ix]
    y_test = y[test_row_ix]
    X_train = X.drop(test_row_ix, axis=0, inplace=False)
    y_train = y.drop(test_row_ix, inplace=False)
    alpha, rmse_train, r2_train = getRidgeRegressionBestHyperparams(X_train, y_train)
    adj_r2_train = getAdjustedR2(r2_train, X_train.shape[0], X_train.shape[1])
    ridge_reg_model = Ridge(fit_intercept=True, alpha=alpha)
    ridge_reg_model.fit(X_train, y_train)
    r2_test = ridge_reg_model.score(X_test, y_test)
    adj_r2_test = getAdjustedR2(r2_test, X_test.shape[0], X_test.shape[1])
    y_test_pred = ridge_reg_model.predict(X_test)
    rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred))
    return tuple([alpha, rmse_train, r2_train, adj_r2_train, rmse_test, r2_test, adj_r2_test, test_set_size])

In [8]:
def runLinearRegressionPipeline(X, y, output_file_name):
    temp_file = 'temp.txt'
    eve_genes = ['hb', 'Kr', 'gt']
    # create new file for this run
    with open(temp_file, "w") as fp:
        # feature selection using Elastic Net
        _, _, l1_ratio, alpha = getElasticNetBestHyperparams(X, y)
        fp.write("l1_ratio:" + str(l1_ratio) + "\n");
        fp.write("alpha:" + str(alpha) + "\n")
        if l1_ratio == 0:
            print("no features selected for " + output_file_name)
            fp.write("0 features selected by ElasticNet")
        else:
            select_features_1 = getElasticNetSelectedFeatures(X, y, alpha, l1_ratio)
            # if target gene is 'eve'
            if y.name == 'eve':
                for gene in eve_genes:
                    if gene not in select_features_1:
                        select_features_1.append(gene)
            fp.write("select_features_1:")
            for feature in select_features_1:
                fp.write(feature + ",")
            fp.write("\n")
            X_reduced_1 = X[select_features_1]
            # feature selection using OLS
            select_features_2 = getOlsSelectedFeatures(X_reduced_1, y, 0.05)
            if len(select_features_2) == 0:
                X_reduced_2 = X_reduced_1
            else:
                if y.name == 'eve':  # if target gene is 'eve'
                    for gene in eve_genes:
                        if gene not in select_features_2:
                            select_features_2.append(gene)
                X_reduced_2 = X_reduced_1[select_features_2]
            fp.write("select_features_2:")
            for feature in select_features_2:
                fp.write(feature + ",")
            fp.write("\n")
            # train Ridge regression model with final set of features
            alpha, rmse_train, r2_train, adj_r2_train, rmse_test, r2_test, adj_r2_test, test_set_size = trainRidgeRegressionModel(X_reduced_2, y)
            fp.write("ridge_alpha:" + str(alpha) + "\n")
            fp.write("rmse_train:" + str(rmse_train) + "\n")
            fp.write("r2_train:" + str(r2_train) + "\n")
            fp.write("adj_r2_train:" + str(adj_r2_train) + "\n")
            fp.write("test_set_size:" + str(test_set_size) + "\n")
            fp.write("rmse_test:" + str(rmse_test) + "\n")
            fp.write("r2_test:" + str(r2_test) + "\n")
            fp.write("adj_r2_test:" + str(adj_r2_test) + "\n")
    os.rename(temp_file, output_file_name)

In [9]:
def modellingUsingCompleteEmbryo(cells_genes_df, genes_set, path):
    for target_gene in genes_set:
        print("modelling using complete embryo for target gene " + target_gene)
        y = cells_genes_df[target_gene].copy(deep=True)
        X = cells_genes_df.drop({target_gene}, axis=1)
        output_file_name = path + target_gene + "_complete.txt"
        # check if file already exists, if it does, then continue to next target_gene
        if (os.path.isfile(output_file_name)):
            print(output_file_name + " already exists!")
            continue
        runLinearRegressionPipeline(X, y, output_file_name)

In [10]:
def modellingPerCluster(cells_genes_df, cell_cluster_labels, gene_cluster_map, path):
    cells_genes_df["cluster_labels"] = cell_cluster_labels
    for target_gene, clusters in gene_cluster_map.items():
        X_cluster = cells_genes_df[cells_genes_df["cluster_labels"].isin(clusters)]
        print("modelling using cluster(s) " + ','.join(clusters) + " for target gene " + target_gene)
        y = X_cluster[target_gene].copy(deep=True)
        X = X_cluster.drop({target_gene, "cluster_labels"}, axis=1)
        print('original df shape: ', cells_genes_df.shape)
        print('cluster df shape: ', X_cluster.shape)
        output_file_name = path + target_gene + "_cluster_" + '_'.join(clusters) + ".txt"
        if (os.path.isfile(output_file_name)):
            print(output_file_name + " already exists!")
            continue
        runLinearRegressionPipeline(X, y, output_file_name)

In [11]:
def getTopGenesPerCluster():
    cluster_top_genes = {}
    cluster_top_genes_file = "cluster_top_genes.csv"
    with open(cluster_top_genes_file, "r") as fp:
        for line in fp:
            cluster_no = int(line.split(", ")[0].strip())
            top_genes = line.split(", ")[1:]
            top_genes = [gene.strip() for gene in top_genes]
            cluster_top_genes[cluster_no] = top_genes
    return cluster_top_genes

In [12]:
def getClusterLabels(X):
    cell_cluster_labels = []
    for cell_name in list(X.index):
        cell_cluster_labels.append(cell_name.split("_")[1])
    return cell_cluster_labels

In [13]:
def getTargetGenesWithClusters():
    target_gene_cluster_map = {
        'twe': ['4', '2'],
        'sna': ['4', '2'],
        'htl': ['4'],
        'tin': ['4'],
        'eve': ['3'],
        'brk': ['3', '5'],
        'vnd': ['3'],
        'rho': ['6', '11'],
        'sli': ['3', '6'],
        'pnt': ['3', '5'],
        'ind': ['3'],
        'sog': ['3', '8'],
        'ths': ['3'],
        'zen': ['6', '10'],
        'pnr': ['6', '3'],
        'shn': ['4', '3'],
        'tup': ['6', '10'],
        'ush': ['6', '10']
    }
    return target_gene_cluster_map

In [14]:
def main():
    data_file_name = "dge_normalized.txt"
    gene_sc_df = pd.read_csv(data_file_name, delimiter='\t', header=0)
    cells_genes_df = gene_sc_df.T
    cell_cluster_labels = getClusterLabels(cells_genes_df)
    cluster_top_genes = getTopGenesPerCluster()
    genes_set = set()
    gene_cluster_map = {}
    for cluster, top_genes in cluster_top_genes.items():
        genes_set.update(top_genes)
        for gene in top_genes:
            if gene not in gene_cluster_map:
                gene_cluster_map[gene] = [str(cluster)]
            else:
                gene_cluster_map[gene].append(str(cluster))
    modellingUsingCompleteEmbryo(cells_genes_df, genes_set)
    modellingPerCluster(cells_genes_df, cell_cluster_labels, gene_cluster_map)

In [16]:
def new_main():
    data_file_name = "dge_normalized.txt"
    gene_sc_df = pd.read_csv(data_file_name, delimiter='\t', header=0)
    cells_genes_df = gene_sc_df.T
    cell_cluster_labels = getClusterLabels(cells_genes_df)
    target_gene_cluster_map = getTargetGenesWithClusters()
    genes_set = set(target_gene_cluster_map.keys())
    # run the pipeline for target gene using all the cells of the embryo
    modellingUsingCompleteEmbryo(cells_genes_df, genes_set, "using_embryo_2/")
    # run the pipeline for target gene using cells from the designated clusters
    modellingPerCluster(cells_genes_df, cell_cluster_labels, target_gene_cluster_map, "using_clusters_2/")

In [17]:
new_main()

modelling using complete embryo for target gene ths


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.7min finished


ratio(1.000000e-01) -- n: 231 -- alpha: 1.145137 -- mse: 2.056864


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished


ratio(5.000000e-01) -- n: 38 -- alpha: 0.493425 -- mse: 2.086462


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished


ratio(7.000000e-01) -- n: 37 -- alpha: 0.352446 -- mse: 2.087072


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished


ratio(9.000000e-01) -- n: 34 -- alpha: 0.274125 -- mse: 2.088204


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.0min finished


ratio(9.500000e-01) -- n: 33 -- alpha: 0.259697 -- mse: 2.088458


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.0min finished


ratio(9.900000e-01) -- n: 33 -- alpha: 0.249204 -- mse: 2.088653


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished


ratio(1.000000e+00) -- n: 33 -- alpha: 0.246712 -- mse: 2.088701
for target_gene ths, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(33, 2.0884576900023162, 0.95, 0.25969720364617715)
modelling using complete embryo for target gene pnr


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.6min finished


ratio(1.000000e-01) -- n: 116 -- alpha: 1.298463 -- mse: 1.228623


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(5.000000e-01) -- n: 87 -- alpha: 0.259693 -- mse: 1.223757


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(7.000000e-01) -- n: 85 -- alpha: 0.185495 -- mse: 1.223656


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(9.000000e-01) -- n: 84 -- alpha: 0.144274 -- mse: 1.223663


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(9.500000e-01) -- n: 84 -- alpha: 0.136680 -- mse: 1.223672


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(9.900000e-01) -- n: 84 -- alpha: 0.131158 -- mse: 1.223682


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.6min finished


ratio(1.000000e+00) -- n: 84 -- alpha: 0.129846 -- mse: 1.223684
for target_gene pnr, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(84, 1.2236631841358452, 0.9, 0.14427371309149167)
modelling using complete embryo for target gene tup


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(1.000000e-01) -- n: 56 -- alpha: 1.453965 -- mse: 1.092551


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished


ratio(5.000000e-01) -- n: 37 -- alpha: 0.290793 -- mse: 1.080984


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.0min finished


ratio(7.000000e-01) -- n: 37 -- alpha: 0.207709 -- mse: 1.080305


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished


ratio(9.000000e-01) -- n: 37 -- alpha: 0.161552 -- mse: 1.080052


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished


ratio(9.500000e-01) -- n: 36 -- alpha: 0.153049 -- mse: 1.080025


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.0min finished


ratio(9.900000e-01) -- n: 36 -- alpha: 0.146865 -- mse: 1.080026


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.0min finished


ratio(1.000000e+00) -- n: 36 -- alpha: 0.145397 -- mse: 1.080027
for target_gene tup, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(36, 1.0800246932915873, 0.95, 0.15304899977968636)
modelling using complete embryo for target gene rho


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.4min finished


ratio(1.000000e-01) -- n: 237 -- alpha: 1.298175 -- mse: 2.772386


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.8min finished


ratio(5.000000e-01) -- n: 34 -- alpha: 0.559367 -- mse: 2.755155


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.6min finished


ratio(7.000000e-01) -- n: 32 -- alpha: 0.399548 -- mse: 2.752137


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.8min finished


ratio(9.000000e-01) -- n: 32 -- alpha: 0.310759 -- mse: 2.752270


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.9min finished


ratio(9.500000e-01) -- n: 32 -- alpha: 0.294404 -- mse: 2.752383


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.8min finished


ratio(9.900000e-01) -- n: 32 -- alpha: 0.282509 -- mse: 2.752492


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.8min finished


ratio(1.000000e+00) -- n: 32 -- alpha: 0.279683 -- mse: 2.752522
for target_gene rho, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(32, 2.7521367368598675, 0.7, 0.39954773811105437)
modelling using complete embryo for target gene pnt


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.5min finished


ratio(1.000000e-01) -- n: 102 -- alpha: 1.842555 -- mse: 2.633587


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.8min finished


ratio(5.000000e-01) -- n: 82 -- alpha: 0.368511 -- mse: 2.635649


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.1min finished


ratio(7.000000e-01) -- n: 81 -- alpha: 0.263222 -- mse: 2.636958


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.3min finished


ratio(9.000000e-01) -- n: 81 -- alpha: 0.204728 -- mse: 2.638008


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.2min finished


ratio(9.500000e-01) -- n: 81 -- alpha: 0.193953 -- mse: 2.638229


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.8min finished


ratio(9.900000e-01) -- n: 80 -- alpha: 0.186117 -- mse: 2.638395


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.6min finished


ratio(1.000000e+00) -- n: 79 -- alpha: 0.184256 -- mse: 2.638435
for target_gene pnt, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(79, 2.6384349741801749, 1, 0.1842555392068827)
modelling using complete embryo for target gene zen


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.5min finished


ratio(1.000000e-01) -- n: 103 -- alpha: 0.918039 -- mse: 0.805954


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(5.000000e-01) -- n: 76 -- alpha: 0.183608 -- mse: 0.796036


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(7.000000e-01) -- n: 74 -- alpha: 0.131148 -- mse: 0.795430


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(9.000000e-01) -- n: 73 -- alpha: 0.102004 -- mse: 0.795174


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(9.500000e-01) -- n: 73 -- alpha: 0.096636 -- mse: 0.795133


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(9.900000e-01) -- n: 73 -- alpha: 0.092731 -- mse: 0.795101


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(1.000000e+00) -- n: 72 -- alpha: 0.091804 -- mse: 0.795094
for target_gene zen, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(72, 0.79509433090674853, 1, 0.091803913387566202)
modelling using complete embryo for target gene htl


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.3min finished


ratio(1.000000e-01) -- n: 203 -- alpha: 0.917811 -- mse: 1.154260


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.6min finished


ratio(5.000000e-01) -- n: 35 -- alpha: 0.395473 -- mse: 1.158958


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.5min finished


ratio(7.000000e-01) -- n: 34 -- alpha: 0.282481 -- mse: 1.160707


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.5min finished


ratio(9.000000e-01) -- n: 33 -- alpha: 0.219707 -- mse: 1.161683


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.5min finished


ratio(9.500000e-01) -- n: 33 -- alpha: 0.208144 -- mse: 1.161886


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.5min finished


ratio(9.900000e-01) -- n: 33 -- alpha: 0.199734 -- mse: 1.162037


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(1.000000e+00) -- n: 33 -- alpha: 0.197736 -- mse: 1.162074
for target_gene htl, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(33, 1.1616831225998394, 0.9, 0.21970706236231879)
modelling using complete embryo for target gene twe


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.2min finished


ratio(1.000000e-01) -- n: 1 -- alpha: 1.698730 -- mse: 0.541660


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.4min finished


ratio(5.000000e-01) -- n: 1 -- alpha: 0.339746 -- mse: 0.541705


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.5min finished


ratio(7.000000e-01) -- n: 1 -- alpha: 0.242676 -- mse: 0.541709


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.5min finished


ratio(9.000000e-01) -- n: 0 -- alpha: 0.188748 -- mse: 0.541711
for target_gene twe, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(1, 0.54166026998583883, 0.1, 1.6987298950129852)
modelling using complete embryo for target gene sli


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.0min finished


ratio(1.000000e-01) -- n: 101 -- alpha: 2.339495 -- mse: 2.617391


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.6min finished


ratio(5.000000e-01) -- n: 75 -- alpha: 0.467899 -- mse: 2.634144


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  3.6min finished


ratio(7.000000e-01) -- n: 72 -- alpha: 0.334214 -- mse: 2.637225


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.9min finished


ratio(9.000000e-01) -- n: 70 -- alpha: 0.259944 -- mse: 2.639135


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.0min finished


ratio(9.500000e-01) -- n: 70 -- alpha: 0.246263 -- mse: 2.639520


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.9min finished


ratio(9.900000e-01) -- n: 70 -- alpha: 0.236313 -- mse: 2.639806


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.0min finished


ratio(1.000000e+00) -- n: 70 -- alpha: 0.233949 -- mse: 2.639873
for target_gene sli, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(70, 2.6391353040632688, 0.9, 0.25994387803040664)
modelling using complete embryo for target gene vnd


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.7min finished


ratio(1.000000e-01) -- n: 82 -- alpha: 1.123585 -- mse: 1.019111


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(5.000000e-01) -- n: 67 -- alpha: 0.224717 -- mse: 1.020660


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(7.000000e-01) -- n: 67 -- alpha: 0.160512 -- mse: 1.021596


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(9.000000e-01) -- n: 66 -- alpha: 0.124843 -- mse: 1.022275


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(9.500000e-01) -- n: 66 -- alpha: 0.118272 -- mse: 1.022406


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.5min finished


ratio(9.900000e-01) -- n: 66 -- alpha: 0.113493 -- mse: 1.022502


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.6min finished


ratio(1.000000e+00) -- n: 66 -- alpha: 0.112358 -- mse: 1.022525
for target_gene vnd, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(66, 1.0222753514378968, 0.9, 0.12484272817285622)
modelling using complete embryo for target gene shn


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.2min finished


ratio(1.000000e-01) -- n: 106 -- alpha: 1.878001 -- mse: 2.629248


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.9min finished


ratio(5.000000e-01) -- n: 90 -- alpha: 0.375600 -- mse: 2.662711


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.3min finished


ratio(7.000000e-01) -- n: 90 -- alpha: 0.268286 -- mse: 2.667855


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.5min finished


ratio(9.000000e-01) -- n: 88 -- alpha: 0.208667 -- mse: 2.671347


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.2min finished


ratio(9.500000e-01) -- n: 88 -- alpha: 0.197684 -- mse: 2.672040


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.7min finished


ratio(9.900000e-01) -- n: 88 -- alpha: 0.189697 -- mse: 2.672572


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.3min finished


ratio(1.000000e+00) -- n: 88 -- alpha: 0.187800 -- mse: 2.672699
for target_gene shn, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(88, 2.6713473069390634, 0.9, 0.20866681625249109)
modelling using complete embryo for target gene sna


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(1.000000e-01) -- n: 76 -- alpha: 1.427040 -- mse: 1.256797


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.5min finished


ratio(5.000000e-01) -- n: 55 -- alpha: 0.285408 -- mse: 1.245293


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(7.000000e-01) -- n: 54 -- alpha: 0.203863 -- mse: 1.245353


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(9.000000e-01) -- n: 53 -- alpha: 0.158560 -- mse: 1.245825


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(9.500000e-01) -- n: 52 -- alpha: 0.150215 -- mse: 1.245950


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(9.900000e-01) -- n: 52 -- alpha: 0.144145 -- mse: 1.246056


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.5min finished


ratio(1.000000e+00) -- n: 52 -- alpha: 0.142704 -- mse: 1.246089
for target_gene sna, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(52, 1.2459501108391535, 0.95, 0.15021473497657595)
modelling using complete embryo for target gene ush


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.0min finished


ratio(1.000000e-01) -- n: 139 -- alpha: 1.051147 -- mse: 1.156323


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(5.000000e-01) -- n: 117 -- alpha: 0.210229 -- mse: 1.171920


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(7.000000e-01) -- n: 115 -- alpha: 0.150164 -- mse: 1.173940


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(9.000000e-01) -- n: 114 -- alpha: 0.116794 -- mse: 1.175228


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(9.500000e-01) -- n: 114 -- alpha: 0.110647 -- mse: 1.175483


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(9.900000e-01) -- n: 114 -- alpha: 0.106176 -- mse: 1.175674


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.6min finished


ratio(1.000000e+00) -- n: 114 -- alpha: 0.105115 -- mse: 1.175719
for target_gene ush, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(114, 1.1752280973386, 0.9, 0.11679412226406369)
modelling using complete embryo for target gene tin


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.2min finished


ratio(1.000000e-01) -- n: 289 -- alpha: 0.871545 -- mse: 1.435812


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.7min finished


ratio(5.000000e-01) -- n: 245 -- alpha: 0.174309 -- mse: 1.460392


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(7.000000e-01) -- n: 47 -- alpha: 0.268241 -- mse: 1.459588


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(9.000000e-01) -- n: 47 -- alpha: 0.208632 -- mse: 1.458824


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(9.500000e-01) -- n: 46 -- alpha: 0.197651 -- mse: 1.458693


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.7min finished


ratio(9.900000e-01) -- n: 46 -- alpha: 0.189665 -- mse: 1.458575


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.6min finished


ratio(1.000000e+00) -- n: 46 -- alpha: 0.187769 -- mse: 1.458547
for target_gene tin, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(46, 1.4585469011623176, 1, 0.18776874375677421)
modelling using complete embryo for target gene ind


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.0min finished


ratio(1.000000e-01) -- n: 487 -- alpha: 0.608400 -- mse: 1.456743


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.1min finished


ratio(5.000000e-01) -- n: 81 -- alpha: 0.262152 -- mse: 1.446965


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.9min finished


ratio(7.000000e-01) -- n: 78 -- alpha: 0.187251 -- mse: 1.446564


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.0min finished


ratio(9.000000e-01) -- n: 79 -- alpha: 0.145640 -- mse: 1.446472


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.1min finished


ratio(9.500000e-01) -- n: 79 -- alpha: 0.137974 -- mse: 1.446458


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.9min finished


ratio(9.900000e-01) -- n: 79 -- alpha: 0.132400 -- mse: 1.446450


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.9min finished


ratio(1.000000e+00) -- n: 79 -- alpha: 0.131076 -- mse: 1.446448
for target_gene ind, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(78, 1.4465641636856859, 0.7, 0.1872510770488364)
modelling using complete embryo for target gene eve


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.3min finished


ratio(1.000000e-01) -- n: 117 -- alpha: 1.538603 -- mse: 2.182983


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.3min finished


ratio(5.000000e-01) -- n: 96 -- alpha: 0.307721 -- mse: 2.202442


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.3min finished


ratio(7.000000e-01) -- n: 93 -- alpha: 0.219800 -- mse: 2.207079


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.2min finished


ratio(9.000000e-01) -- n: 93 -- alpha: 0.170956 -- mse: 2.210168


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.3min finished


ratio(9.500000e-01) -- n: 93 -- alpha: 0.161958 -- mse: 2.210785


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.5min finished


ratio(9.900000e-01) -- n: 93 -- alpha: 0.155414 -- mse: 2.211247


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.2min finished


ratio(1.000000e+00) -- n: 93 -- alpha: 0.153860 -- mse: 2.211358
for target_gene eve, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(93, 2.2070793532205819, 0.7, 0.21980045471998513)
modelling using complete embryo for target gene sog


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.5min finished


ratio(1.000000e-01) -- n: 284 -- alpha: 1.182830 -- mse: 2.585005


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished


ratio(5.000000e-01) -- n: 48 -- alpha: 0.509666 -- mse: 2.639706


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(7.000000e-01) -- n: 47 -- alpha: 0.364047 -- mse: 2.645630


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished


ratio(9.000000e-01) -- n: 46 -- alpha: 0.283148 -- mse: 2.650110


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(9.500000e-01) -- n: 46 -- alpha: 0.268245 -- mse: 2.651045


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished


ratio(9.900000e-01) -- n: 46 -- alpha: 0.257407 -- mse: 2.651666


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(1.000000e+00) -- n: 46 -- alpha: 0.254833 -- mse: 2.651810
for target_gene sog, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(46, 2.6501099371156975, 0.9, 0.28314784912560698)
modelling using complete embryo for target gene brk


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.8min finished


ratio(1.000000e-01) -- n: 277 -- alpha: 0.987169 -- mse: 1.770148


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(5.000000e-01) -- n: 30 -- alpha: 0.425358 -- mse: 1.787169


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(7.000000e-01) -- n: 31 -- alpha: 0.303827 -- mse: 1.791134


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(9.000000e-01) -- n: 30 -- alpha: 0.236310 -- mse: 1.793269


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(9.500000e-01) -- n: 30 -- alpha: 0.223873 -- mse: 1.793657


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(9.900000e-01) -- n: 30 -- alpha: 0.214827 -- mse: 1.793947


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(1.000000e+00) -- n: 30 -- alpha: 0.212679 -- mse: 1.794017
for target_gene brk, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(30, 1.7871687073320726, 0.5, 0.42535821155048542)
modelling using cluster(s) 4,2 for target gene twe
original df shape:  (1297, 8925)
cluster df shape:  (347, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   23.2s finished


ratio(1.000000e-01) -- n: 1 -- alpha: 2.971907 -- mse: 0.555712


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   26.2s finished


ratio(5.000000e-01) -- n: 1 -- alpha: 0.594381 -- mse: 0.555339


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   35.5s finished


ratio(7.000000e-01) -- n: 1 -- alpha: 0.424558 -- mse: 0.555300


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   28.7s finished


ratio(9.000000e-01) -- n: 1 -- alpha: 0.330212 -- mse: 0.555277


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   29.7s finished


ratio(9.500000e-01) -- n: 1 -- alpha: 0.312832 -- mse: 0.555273


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   28.9s finished


ratio(9.900000e-01) -- n: 0 -- alpha: 0.300193 -- mse: 0.555270
for target_gene twe, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(1, 0.55527310213460879, 0.95, 0.31283236250804825)
modelling using cluster(s) 4,2 for target gene sna
original df shape:  (1297, 8925)
cluster df shape:  (347, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   27.6s finished


ratio(1.000000e-01) -- n: 175 -- alpha: 1.331645 -- mse: 1.248764


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   22.4s finished


ratio(5.000000e-01) -- n: 127 -- alpha: 0.266329 -- mse: 1.291358


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   23.5s finished


ratio(7.000000e-01) -- n: 121 -- alpha: 0.190235 -- mse: 1.303293


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   25.3s finished


ratio(9.000000e-01) -- n: 30 -- alpha: 0.318771 -- mse: 1.310068


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   31.9s finished


ratio(9.500000e-01) -- n: 30 -- alpha: 0.301994 -- mse: 1.310372


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   28.8s finished


ratio(9.900000e-01) -- n: 30 -- alpha: 0.289792 -- mse: 1.310693


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   26.7s finished


ratio(1.000000e+00) -- n: 30 -- alpha: 0.286894 -- mse: 1.310780
for target_gene sna, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(30, 1.3100677124488047, 0.9, 0.31877142970563171)
modelling using cluster(s) 4 for target gene htl
original df shape:  (1297, 8925)
cluster df shape:  (298, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   26.2s finished


ratio(1.000000e-01) -- n: 66 -- alpha: 3.104721 -- mse: 2.041798


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   19.7s finished


ratio(5.000000e-01) -- n: 48 -- alpha: 0.620944 -- mse: 2.072700


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   21.7s finished


ratio(7.000000e-01) -- n: 47 -- alpha: 0.443532 -- mse: 2.078216


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   32.4s finished


ratio(9.000000e-01) -- n: 44 -- alpha: 0.344969 -- mse: 2.081993


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   22.8s finished


ratio(9.500000e-01) -- n: 44 -- alpha: 0.326813 -- mse: 2.082786


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   23.0s finished


ratio(9.900000e-01) -- n: 44 -- alpha: 0.313608 -- mse: 2.083356


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   24.0s finished


ratio(1.000000e+00) -- n: 44 -- alpha: 0.310472 -- mse: 2.083493
for target_gene htl, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(44, 2.0819929556854047, 0.9, 0.3449689948895342)
modelling using cluster(s) 4 for target gene tin
original df shape:  (1297, 8925)
cluster df shape:  (298, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   24.9s finished


ratio(1.000000e-01) -- n: 27 -- alpha: 4.177193 -- mse: 2.032390


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   19.5s finished


ratio(5.000000e-01) -- n: 19 -- alpha: 0.835439 -- mse: 2.050896


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   20.9s finished


ratio(7.000000e-01) -- n: 19 -- alpha: 0.596742 -- mse: 2.057298


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   24.3s finished


ratio(9.000000e-01) -- n: 19 -- alpha: 0.464133 -- mse: 2.062420


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   25.5s finished


ratio(9.500000e-01) -- n: 19 -- alpha: 0.439705 -- mse: 2.063474


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   25.1s finished


ratio(9.900000e-01) -- n: 19 -- alpha: 0.421939 -- mse: 2.064272


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   26.0s finished


ratio(1.000000e+00) -- n: 19 -- alpha: 0.417719 -- mse: 2.064466
for target_gene tin, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(19, 2.0508955622154992, 0.5, 0.83543868198862847)
modelling using cluster(s) 3 for target gene eve
original df shape:  (1297, 8925)
cluster df shape:  (632, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   32.2s finished


ratio(1.000000e-01) -- n: 175 -- alpha: 1.897519 -- mse: 2.739305


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   30.3s finished


ratio(5.000000e-01) -- n: 20 -- alpha: 0.817616 -- mse: 2.813691


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   33.7s finished


ratio(7.000000e-01) -- n: 19 -- alpha: 0.584011 -- mse: 2.808560


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   40.8s finished


ratio(9.000000e-01) -- n: 19 -- alpha: 0.454231 -- mse: 2.806037


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   41.6s finished


ratio(9.500000e-01) -- n: 19 -- alpha: 0.430324 -- mse: 2.805606


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   42.3s finished


ratio(9.900000e-01) -- n: 19 -- alpha: 0.412937 -- mse: 2.805304


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   42.6s finished


ratio(1.000000e+00) -- n: 19 -- alpha: 0.408808 -- mse: 2.805233
for target_gene eve, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(19, 2.8052333137238605, 1, 0.4088079993075458)
modelling using cluster(s) 3,5 for target gene brk
original df shape:  (1297, 8925)
cluster df shape:  (714, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   34.5s finished


ratio(1.000000e-01) -- n: 260 -- alpha: 1.230836 -- mse: 1.911883


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   29.6s finished


ratio(5.000000e-01) -- n: 46 -- alpha: 0.530351 -- mse: 1.921308


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   28.0s finished


ratio(7.000000e-01) -- n: 43 -- alpha: 0.378822 -- mse: 1.922082


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   31.0s finished


ratio(9.000000e-01) -- n: 42 -- alpha: 0.294640 -- mse: 1.922768


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   28.3s finished


ratio(9.500000e-01) -- n: 41 -- alpha: 0.279132 -- mse: 1.922920


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   28.1s finished


ratio(9.900000e-01) -- n: 40 -- alpha: 0.267854 -- mse: 1.923044


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   29.9s finished


ratio(1.000000e+00) -- n: 40 -- alpha: 0.265176 -- mse: 1.923073
for target_gene brk, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(40, 1.9230443215522197, 0.99, 0.26785410681843358)
modelling using cluster(s) 3 for target gene vnd
original df shape:  (1297, 8925)
cluster df shape:  (632, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   23.3s finished


ratio(1.000000e-01) -- n: 52 -- alpha: 1.838139 -- mse: 1.329655


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   25.9s finished


ratio(5.000000e-01) -- n: 49 -- alpha: 0.367628 -- mse: 1.348758


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   25.5s finished


ratio(7.000000e-01) -- n: 49 -- alpha: 0.262591 -- mse: 1.353050


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   29.3s finished


ratio(9.000000e-01) -- n: 47 -- alpha: 0.204238 -- mse: 1.355753


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   28.5s finished


ratio(9.500000e-01) -- n: 47 -- alpha: 0.193488 -- mse: 1.356236


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   28.9s finished


ratio(9.900000e-01) -- n: 47 -- alpha: 0.185671 -- mse: 1.356596


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   29.1s finished


ratio(1.000000e+00) -- n: 47 -- alpha: 0.183814 -- mse: 1.356671
for target_gene vnd, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(47, 1.3557533999365143, 0.9, 0.20423762902575562)
modelling using cluster(s) 6,11 for target gene rho
original df shape:  (1297, 8925)
cluster df shape:  (59, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   24.3s finished


ratio(1.000000e-01) -- n: 106 -- alpha: 2.381466 -- mse: 1.324136


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    8.6s finished


ratio(5.000000e-01) -- n: 15 -- alpha: 1.026142 -- mse: 1.094822


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    8.0s finished


ratio(7.000000e-01) -- n: 15 -- alpha: 0.732959 -- mse: 1.021298


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.0s finished


ratio(9.000000e-01) -- n: 10 -- alpha: 0.570079 -- mse: 0.981644


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    8.7s finished


ratio(9.500000e-01) -- n: 10 -- alpha: 0.540075 -- mse: 0.974165


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.7s finished


ratio(9.900000e-01) -- n: 10 -- alpha: 0.518254 -- mse: 0.969013


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    8.6s finished


ratio(1.000000e+00) -- n: 10 -- alpha: 0.513071 -- mse: 0.967569
for target_gene rho, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(10, 0.96756918475190623, 1, 0.51307124377652269)
modelling using cluster(s) 3,6 for target gene sli
original df shape:  (1297, 8925)
cluster df shape:  (684, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished


ratio(1.000000e-01) -- n: 143 -- alpha: 2.197492 -- mse: 2.644573


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   46.2s finished


ratio(5.000000e-01) -- n: 96 -- alpha: 0.439498 -- mse: 2.698799


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   45.7s finished


ratio(7.000000e-01) -- n: 94 -- alpha: 0.313927 -- mse: 2.708645


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   50.7s finished


ratio(9.000000e-01) -- n: 93 -- alpha: 0.244166 -- mse: 2.714939


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   51.2s finished


ratio(9.500000e-01) -- n: 93 -- alpha: 0.231315 -- mse: 2.716224


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.0min finished


ratio(9.900000e-01) -- n: 93 -- alpha: 0.221969 -- mse: 2.717203


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   49.9s finished


ratio(1.000000e+00) -- n: 93 -- alpha: 0.219749 -- mse: 2.717445
for target_gene sli, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(93, 2.7149393693075168, 0.9, 0.24416577074113263)
modelling using cluster(s) 3,5 for target gene pnt
original df shape:  (1297, 8925)
cluster df shape:  (714, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   46.7s finished


ratio(1.000000e-01) -- n: 341 -- alpha: 1.266298 -- mse: 2.817300


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   39.8s finished


ratio(5.000000e-01) -- n: 58 -- alpha: 0.545631 -- mse: 2.798520


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   44.4s finished


ratio(7.000000e-01) -- n: 56 -- alpha: 0.389737 -- mse: 2.794654


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   49.2s finished


ratio(9.000000e-01) -- n: 52 -- alpha: 0.303129 -- mse: 2.792658


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   50.5s finished


ratio(9.500000e-01) -- n: 52 -- alpha: 0.287174 -- mse: 2.792319


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   55.2s finished


ratio(9.900000e-01) -- n: 52 -- alpha: 0.275571 -- mse: 2.792079


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   53.8s finished


ratio(1.000000e+00) -- n: 52 -- alpha: 0.272816 -- mse: 2.792017
for target_gene pnt, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(52, 2.7920167173884685, 1, 0.27281571587784914)
modelling using cluster(s) 3 for target gene ind
original df shape:  (1297, 8925)
cluster df shape:  (632, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   44.6s finished


ratio(1.000000e-01) -- n: 167 -- alpha: 1.734252 -- mse: 2.290381


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   39.4s finished


ratio(5.000000e-01) -- n: 124 -- alpha: 0.346850 -- mse: 2.296959


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   39.7s finished


ratio(7.000000e-01) -- n: 122 -- alpha: 0.247750 -- mse: 2.306950


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   42.3s finished


ratio(9.000000e-01) -- n: 119 -- alpha: 0.192695 -- mse: 2.313375


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   42.5s finished


ratio(9.500000e-01) -- n: 119 -- alpha: 0.182553 -- mse: 2.314511


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   43.4s finished


ratio(9.900000e-01) -- n: 119 -- alpha: 0.175177 -- mse: 2.315336


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   43.1s finished


ratio(1.000000e+00) -- n: 119 -- alpha: 0.173425 -- mse: 2.315529
for target_gene ind, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(119, 2.3133747478218232, 0.9, 0.1926947134384264)
modelling using cluster(s) 3,8 for target gene sog
original df shape:  (1297, 8925)
cluster df shape:  (698, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   29.3s finished


ratio(1.000000e-01) -- n: 268 -- alpha: 1.420481 -- mse: 2.352408


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   23.4s finished


ratio(5.000000e-01) -- n: 33 -- alpha: 0.612067 -- mse: 2.335948


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   24.2s finished


ratio(7.000000e-01) -- n: 31 -- alpha: 0.437191 -- mse: 2.336313


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   25.0s finished


ratio(9.000000e-01) -- n: 28 -- alpha: 0.340037 -- mse: 2.337182


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   24.9s finished


ratio(9.500000e-01) -- n: 28 -- alpha: 0.322140 -- mse: 2.337437


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   25.2s finished


ratio(9.900000e-01) -- n: 28 -- alpha: 0.309125 -- mse: 2.337653


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   25.5s finished


ratio(1.000000e+00) -- n: 28 -- alpha: 0.306033 -- mse: 2.337711
for target_gene sog, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(28, 2.3371818702995628, 0.9, 0.34003710853258462)
modelling using cluster(s) 3 for target gene ths
original df shape:  (1297, 8925)
cluster df shape:  (632, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   29.6s finished


ratio(1.000000e-01) -- n: 347 -- alpha: 1.119880 -- mse: 2.125709


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   24.5s finished


ratio(5.000000e-01) -- n: 66 -- alpha: 0.482542 -- mse: 2.247733


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   27.9s finished


ratio(7.000000e-01) -- n: 63 -- alpha: 0.344673 -- mse: 2.263129


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   29.5s finished


ratio(9.000000e-01) -- n: 61 -- alpha: 0.268079 -- mse: 2.274027


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   29.2s finished


ratio(9.500000e-01) -- n: 61 -- alpha: 0.253969 -- mse: 2.276324


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   29.5s finished


ratio(9.900000e-01) -- n: 61 -- alpha: 0.243708 -- mse: 2.278050


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   29.6s finished


ratio(1.000000e+00) -- n: 61 -- alpha: 0.241271 -- mse: 2.278466
for target_gene ths, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(347, 2.125709428026751, 0.1, 1.1198803012347904)
modelling using cluster(s) 6,10 for target gene zen
original df shape:  (1297, 8925)
cluster df shape:  (77, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   24.4s finished


ratio(1.000000e-01) -- n: 193 -- alpha: 0.891033 -- mse: 2.658315


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   10.1s finished


ratio(5.000000e-01) -- n: 82 -- alpha: 0.178207 -- mse: 2.633316


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.9s finished


ratio(7.000000e-01) -- n: 71 -- alpha: 0.127290 -- mse: 2.668563


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   10.9s finished


ratio(9.000000e-01) -- n: 69 -- alpha: 0.099004 -- mse: 2.722021


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   11.0s finished


ratio(9.500000e-01) -- n: 68 -- alpha: 0.093793 -- mse: 2.749526


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   10.5s finished


ratio(9.900000e-01) -- n: 73 -- alpha: 0.041776 -- mse: 2.798784


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   10.2s finished


ratio(1.000000e+00) -- n: 72 -- alpha: 0.041358 -- mse: 2.803388
for target_gene zen, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(69, 2.7220212393506125, 0.9, 0.099003706761385576)
modelling using cluster(s) 6,3 for target gene pnr
original df shape:  (1297, 8925)
cluster df shape:  (684, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   43.7s finished


ratio(1.000000e-01) -- n: 106 -- alpha: 1.934035 -- mse: 1.526107


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   32.1s finished


ratio(5.000000e-01) -- n: 75 -- alpha: 0.386807 -- mse: 1.538776


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   31.0s finished


ratio(7.000000e-01) -- n: 73 -- alpha: 0.276291 -- mse: 1.544689


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   35.5s finished


ratio(9.000000e-01) -- n: 73 -- alpha: 0.214893 -- mse: 1.548358


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   35.5s finished


ratio(9.500000e-01) -- n: 72 -- alpha: 0.203583 -- mse: 1.549071


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   35.8s finished


ratio(9.900000e-01) -- n: 72 -- alpha: 0.195357 -- mse: 1.549658


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   35.8s finished


ratio(1.000000e+00) -- n: 72 -- alpha: 0.193403 -- mse: 1.549791
for target_gene pnr, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(72, 1.5490706840995667, 0.95, 0.203582589246776)
modelling using cluster(s) 4,3 for target gene shn
original df shape:  (1297, 8925)
cluster df shape:  (930, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(1.000000e-01) -- n: 86 -- alpha: 2.362997 -- mse: 2.526482


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished


ratio(5.000000e-01) -- n: 63 -- alpha: 0.472599 -- mse: 2.548356


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(7.000000e-01) -- n: 61 -- alpha: 0.337571 -- mse: 2.553609


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished


ratio(9.000000e-01) -- n: 60 -- alpha: 0.262555 -- mse: 2.557328


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(9.500000e-01) -- n: 59 -- alpha: 0.248737 -- mse: 2.558058


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(9.900000e-01) -- n: 59 -- alpha: 0.238687 -- mse: 2.558606


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.3min finished


ratio(1.000000e+00) -- n: 59 -- alpha: 0.236300 -- mse: 2.558739
for target_gene shn, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(59, 2.5580579724285624, 0.95, 0.2487365784592219)
modelling using cluster(s) 6,10 for target gene tup
original df shape:  (1297, 8925)
cluster df shape:  (77, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   14.1s finished


ratio(1.000000e-01) -- n: 63 -- alpha: 3.346531 -- mse: 1.224193


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.8s finished


ratio(5.000000e-01) -- n: 26 -- alpha: 0.669306 -- mse: 1.250076


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.2s finished


ratio(7.000000e-01) -- n: 20 -- alpha: 0.478076 -- mse: 1.272593


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.4s finished


ratio(9.000000e-01) -- n: 18 -- alpha: 0.371837 -- mse: 1.299046


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.3s finished


ratio(9.500000e-01) -- n: 17 -- alpha: 0.352266 -- mse: 1.305331


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.3s finished


ratio(9.900000e-01) -- n: 17 -- alpha: 0.338033 -- mse: 1.311034


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.2s finished


ratio(1.000000e+00) -- n: 17 -- alpha: 0.334653 -- mse: 1.312579
for target_gene tup, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(17, 1.3053307509883683, 0.95, 0.35226641810014986)
modelling using cluster(s) 6,10 for target gene ush
original df shape:  (1297, 8925)
cluster df shape:  (77, 8925)


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   25.4s finished


ratio(1.000000e-01) -- n: 213 -- alpha: 0.042832 -- mse: 1.690448


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   10.5s finished


ratio(5.000000e-01) -- n: 94 -- alpha: 0.008566 -- mse: 2.007874


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   10.2s finished


ratio(7.000000e-01) -- n: 78 -- alpha: 0.061189 -- mse: 2.068369


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   10.5s finished


ratio(9.000000e-01) -- n: 71 -- alpha: 0.047591 -- mse: 2.139635


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   10.8s finished


ratio(9.500000e-01) -- n: 69 -- alpha: 0.097135 -- mse: 2.182194


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   10.7s finished


ratio(9.900000e-01) -- n: 65 -- alpha: 0.093211 -- mse: 2.213634


..............................[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   11.1s finished


ratio(1.000000e+00) -- n: 82 -- alpha: 0.004283 -- mse: 2.221990
for target_gene ush, nonzero_coeffs_num, MSE, l1-ratio, alpha:
(213, 1.6904481070230908, 0.1, 0.042831955025569239)


  return np.dot(wresid, wresid) / self.df_resid


In [None]:
# please ignore this cell; it's only for testing purposes
def main2():
    data_file_name = "dge_normalized.txt"
    gene_sc_df = pd.read_csv(data_file_name, delimiter='\t', header=0)
    cells_genes_df = gene_sc_df.T
    cluster_top_genes = {1:["Act87E"]}
    print("calling modellingUsingCompleteEmbryo")
    modellingUsingCompleteEmbryo(cells_genes_df, cluster_top_genes)