In [3]:
import numpy as np
import pandas as pd
from scipy import stats
import os

from common import OUTPUTPATH
from models import linear_pcc
import data

In [4]:
# estimators here
STATE = np.random.RandomState(seed=1000)
linear_pcc.fit(data.x1, data.y)

<modules.multilabel.ProbabilisticClassifierChain at 0x7f81b127de20>

In [5]:
num_importance = 100 # defult values show all variables
from sklearn.inspection import permutation_importance

def individual_importance_dataframe(est, X, y, scoring='neg_log_loss', num_importance=100000, n_repeats=30):
    """
    This algorithm use linear estimators and get the importance variables where p < 0.05
    To easy comparison, we set importance score: 1- p
    """
    r = permutation_importance(est, X, y, n_repeats=n_repeats, scoring=scoring)
    non_zero_indx = np.where(r.importances_mean > 0)
    indx = r.importances_mean[non_zero_indx].argsort()[::-1]
    important_variables = X.columns[non_zero_indx][indx]
    importance_scores = r.importances_mean[non_zero_indx][indx]
    coef = est.coef_[0][non_zero_indx][indx]
    df = pd.DataFrame({'variables': important_variables[:num_importance], 'coef': coef[:num_importance], 'importance': importance_scores[:num_importance]})
    return df

def linear_importance_dataframe(est, X, y, scoring = 'neg_log_loss', num_importance=100000, n_repeats=30):
    """
    Combine all morphologies plots together
    """
    data1 = pd.concat([X, y], axis=1)
    cnt = None
    for i in range(3):
        col_indx = -4 + i

        estimator, X, y = est[i], data1.iloc[:, :col_indx], data.y.iloc[:, i]
        temp_df = individual_importance_dataframe(est=estimator, X=X, y=y, scoring=scoring, num_importance=num_importance, n_repeats=n_repeats)
        if not cnt: 
            df = temp_df
            cnt = 1
        else:
            df = pd.concat([df, temp_df], axis=1)

    upper_columns = ['Sphere', 'Worm', 'Vesicle']
    lower_columns = ['variables', 'coef', 'importance']
    df.columns = pd.MultiIndex.from_product([upper_columns, lower_columns], names=['Phase', 'Property'])
    return df

show all importance dataframe for Sphere, Worm, Vesicle

In [6]:
linear_importance_dataframe(linear_pcc.fitted_, data.abbrev_x1, data.y, num_importance=100000)



Phase,Sphere,Sphere,Sphere,Worm,Worm,Worm,Vesicle,Vesicle,Vesicle
Property,variables,coef,importance,variables,coef,importance,variables,coef,importance
0,mw_tot_cre,0.000738,7.980805,mw_tot_cre,-0.00042,6.162994,mw_tot_cre,-0.000204,2.397071
1,mv_tot_cre,-0.046841,3.783763,mv_tot_cre,0.024149,1.550817,mv_tot_cre,0.013208,1.116525
2,dp_cre,-0.054974,3.760046,dp_cre,0.026126,1.390675,dp_cre,0.01538,1.063644
3,apol_cna,0.231271,1.052951,mw_tot_cna,-0.000266,0.1645097,mw_tot_cna,-0.000157,0.06239715
4,psa_cna,-0.043151,0.3281867,apol_cna,-0.065693,0.1423314,psa_cna,0.009357,0.02956265
5,mv_tot_cna,0.044544,0.2380768,mv_tot_cna,0.02516,0.08463098,mw_cna,-0.003235,0.01557988
6,dp_cna,0.044611,0.2025216,conc,0.075894,0.04805958,apol_cna,-0.012553,0.009531407
7,psa_cre,-0.169942,0.1204407,psa_cna,0.010289,0.03302468,temp,0.013764,0.007668041
8,conc,-0.1072,0.06043887,psa_cre,0.061203,0.02716029,conc,0.011315,0.004278364
9,mw_tot_cna,-0.000139,0.04058973,mw_cna,0.004199,0.02570507,mv_tot_cna,0.004351,0.004225794


In [8]:
top5_linear_dataframe = linear_importance_dataframe(linear_pcc.fitted_, data.abbrev_x1, data.y, num_importance=5)
top5_linear_dataframe.to_csv(os.path.join(OUTPUTPATH, 'Top5_linear_dataframe.csv'))
top5_linear_dataframe




Phase,Sphere,Sphere,Sphere,Worm,Worm,Worm,Vesicle,Vesicle,Vesicle
Property,variables,coef,importance,variables,coef,importance,variables,coef,importance
0,mw_tot_cre,0.000738,8.007907,mw_tot_cre,-0.00042,6.121813,mw_tot_cre,-0.000204,2.3736
1,mv_tot_cre,-0.046841,3.777106,mv_tot_cre,0.024149,1.568045,mv_tot_cre,0.013208,1.14004
2,dp_cre,-0.054974,3.74852,dp_cre,0.026126,1.410971,dp_cre,0.01538,1.081454
3,apol_cna,0.231271,1.060032,mw_tot_cna,-0.000266,0.170789,mw_tot_cna,-0.000157,0.061926
4,psa_cna,-0.043151,0.334054,apol_cna,-0.065693,0.149355,psa_cna,0.009357,0.030507
