In [1]:
#!/usr/bin/env python

'''
DESCRIPTION
-----------
    LocalOutlierFactor with trained model

RETURN
------
    {DATASET}_lof_seen.png : png file
        Similarity scores of seen label
    {DATASET}_lof_seen.png : png file
        Similarity score of unseen label

EXPORTED FILE(s) LOCATION
-------------------------
    ./reports/retrieval/{EXPERIMENT}/{DATASET}_lof_seen.png
    ./reports/retrieval/{EXPERIMENT}/{DATASET}_lof_unseen.png
'''

# importing default libraries
import os, argparse, sys
# sys.path.append('./')
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))
os.chdir(ROOT_DIR)
sys.path.append(ROOT_DIR)
# importing scripts in scripts folder
from scripts import config as src

**** scripts/config.py IMPORTED!!!
**** PROJECT FOLDER ,  /home/pgundogdu/projects/signalization_prior_knowledge_based_nn


In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.manifold import TSNE
from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score, adjusted_rand_score, adjusted_mutual_info_score, fowlkes_mallows_score
import warnings
warnings.filterwarnings('ignore')
import glob
TINY_SIZE = 8
SMALL_SIZE = 10
MEDIUM_SIZE = 16
BIGGER_SIZE = 20

plt.rc('font', size=MEDIUM_SIZE)         # controls default text sizes
plt.rc('axes', titlesize=12)    # fontsize of the axes title
plt.rc('axes', labelsize=12)     # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)     # fontsize of the tick labels
plt.rc('ytick', labelsize=TINY_SIZE)     # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('legend', title_fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=MEDIUM_SIZE)  # fontsize of the figure title

In [40]:
for i_design in df_result['design'].unique():
    print(i_design)
    for i_exp in df_result['experiment_index'].unique():
        print(i_exp)
#         df_temp = df_result[df_result['design']==i_design]
        df_temp = df_result[(df_result.design == i_design ) & (df_result.experiment_index == i_exp)]
        

1_layer_dense100_
0
     prediction  ground_truth    cell_out  experiment_index             design
0             0             1  cell_out_4                 0  1_layer_dense100_
1             0             1  cell_out_4                 0  1_layer_dense100_
2             0             1  cell_out_4                 0  1_layer_dense100_
3             0             1  cell_out_4                 0  1_layer_dense100_
4             0             1  cell_out_4                 0  1_layer_dense100_
..          ...           ...         ...               ...                ...
140           0             3  cell_out_4                 0  1_layer_dense100_
141           0             3  cell_out_4                 0  1_layer_dense100_
142           0             3  cell_out_4                 0  1_layer_dense100_
143           0             3  cell_out_4                 0  1_layer_dense100_
144           2             0  cell_out_4                 0  1_layer_dense100_

[145 rows x 5 columns]
1
     p

In [43]:
def generate_metrics(dataframe):
    list_homo, list_comp, list_vmes, list_ari, list_ami, list_fm, list_acc, list_mean = [],[],[],[],[],[],[],[]
    for i_design in dataframe['design'].unique():
        for i_exp in dataframe['experiment_index'].unique():
#             df_temp = dataframe[(dataframe['experiment_index']==i_exp) and (dataframe['design']==i_design)]
            df_temp = df_result[(df_result.design == i_design ) & (df_result.experiment_index == i_exp)]
#             print(df_temp)

            list_homo.append([ homogeneity_score(df_temp['ground_truth'], df_temp['prediction']), i_exp, 'homogeneity', i_design])
            list_comp.append([ completeness_score(df_temp['ground_truth'], df_temp['prediction']), i_exp, 'completeness', i_design])
            list_vmes.append([ v_measure_score(df_temp['ground_truth'], df_temp['prediction']), i_exp, 'v_measure', i_design])
            list_ari.append([ adjusted_rand_score(df_temp['ground_truth'], df_temp['prediction']), i_exp, 'ari', i_design])
            list_ami.append([ adjusted_mutual_info_score(df_temp['ground_truth'], df_temp['prediction']), i_exp, 'ami', i_design])
            list_fm.append([ fowlkes_mallows_score(df_temp['ground_truth'], df_temp['prediction']), i_exp, 'fowlkes_mallows', i_design])
            list_mean.append([ np.mean( [homogeneity_score(df_temp['ground_truth'], df_temp['prediction'])
                                    , completeness_score(df_temp['ground_truth'], df_temp['prediction'])
                                    , v_measure_score(df_temp['ground_truth'], df_temp['prediction'])
                                    , adjusted_rand_score(df_temp['ground_truth'], df_temp['prediction'])
                                    , adjusted_mutual_info_score(df_temp['ground_truth'], df_temp['prediction'])
                                    , fowlkes_mallows_score(df_temp['ground_truth'], df_temp['prediction'])]), i_exp, 'mean', i_design])

        result = [element for lis in [list_homo, list_comp, list_vmes, list_ari, list_ami, list_fm, list_mean] for element in lis]

        df_metric = pd.DataFrame(result, columns=['score','expr','metric','design'])
    return(df_metric)

In [51]:
df_result = pd.read_csv('./models/exper_mouse/LeavePGroupsOut/clustering_cell_out_2.csv')
df_metric = generate_metrics(df_result)
df_metric.groupby(['design','metric']).mean().reset_index().pivot(index='design', columns='metric', values='score')

metric,ami,ari,completeness,fowlkes_mallows,homogeneity,mean,v_measure
design,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1_layer_dense100,0.511283,0.526387,0.631886,0.843868,0.521718,0.59292,0.522377
1_layer_metabolic_signaling,0.551741,0.541291,0.899618,0.863148,0.559895,0.661577,0.553768
1_layer_metabolic_signaling+100dense,0.428619,0.428572,0.70885,0.821556,0.440751,0.544982,0.441541
1_layer_ppi100,0.52338,0.536094,0.784093,0.86961,0.53657,0.630136,0.531069
1_layer_ppitf100,0.44899,0.462438,0.804713,0.836454,0.46055,0.578174,0.455898
1_layer_signaling,0.297188,0.289314,0.811446,0.790991,0.298833,0.464838,0.301256
1_layer_signaling+100dense,0.306194,0.292515,0.812556,0.785993,0.311131,0.469809,0.310465
2_layer_metabolic_signaling,0.472208,0.485482,0.796084,0.870487,0.480368,0.597288,0.479098
2_layer_signaling,0.326461,0.315159,0.746324,0.820792,0.340835,0.480932,0.33602


In [54]:
df_result = pd.read_csv('./models/exper_mouse/LeavePGroupsOut/clustering_cell_out_4.csv')
df_metric = generate_metrics(df_result)
df_metric.groupby(['design','metric']).mean().reset_index().pivot(index='design', columns='metric', values='score')

metric,ami,ari,completeness,fowlkes_mallows,homogeneity,mean,v_measure
design,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1_layer_dense100_,0.717164,0.661862,0.845254,0.804528,0.657726,0.735965,0.729257
1_layer_metabolic_signaling+100dense_,0.672116,0.627008,0.809856,0.788132,0.613355,0.699475,0.686381
1_layer_metabolic_signaling_,0.773883,0.736237,0.919449,0.851586,0.69638,0.79325,0.781966
1_layer_ppi100_,0.746848,0.697715,0.930009,0.83568,0.657902,0.770664,0.75583
1_layer_ppitf100_,0.709219,0.633256,0.855844,0.801619,0.637398,0.726209,0.719918
1_layer_signaling+100dense_,0.705861,0.636867,0.884702,0.804918,0.62769,0.729304,0.71579
1_layer_signaling_,0.698619,0.63176,0.835992,0.786173,0.62067,0.71359,0.708325
2_layer_metabolic_signaling_,0.719913,0.67682,0.859716,0.81726,0.651876,0.742725,0.730766
2_layer_signaling_,0.690117,0.605318,0.90012,0.788937,0.617207,0.717235,0.701709


In [52]:
df_result = pd.read_csv('./models/exper_mouse/LeavePGroupsOut/clustering_cell_out_6.csv')
df_metric = generate_metrics(df_result)
df_metric.groupby(['design','metric']).mean().reset_index().pivot(index='design', columns='metric', values='score')

metric,ami,ari,completeness,fowlkes_mallows,homogeneity,mean,v_measure
design,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1_layer_dense100,0.678906,0.567342,0.825051,0.713203,0.612342,0.681594,0.692719
1_layer_metabolic_signaling,0.714786,0.613209,0.8465,0.743555,0.650478,0.716291,0.729216
1_layer_metabolic_signaling+100dense,0.730499,0.620828,0.833369,0.741079,0.676619,0.724169,0.742618
1_layer_ppi100,0.769236,0.695699,0.878954,0.798343,0.712999,0.772491,0.779712
1_layer_ppitf100,0.739452,0.636897,0.87563,0.756144,0.670297,0.738513,0.752658
1_layer_signaling,0.678168,0.536688,0.831598,0.69468,0.604262,0.672927,0.692168
1_layer_signaling+100dense,0.755511,0.698849,0.8615,0.801716,0.708051,0.76539,0.766714
2_layer_metabolic_signaling,0.728402,0.606735,0.852014,0.733972,0.668084,0.721735,0.741202
2_layer_signaling,0.71591,0.630475,0.83706,0.754917,0.651175,0.719756,0.729


In [53]:
df_result = pd.read_csv('./models/exper_mouse/LeavePGroupsOut/clustering_cell_out_8.csv')
df_metric = generate_metrics(df_result)
df_metric.groupby(['design','metric']).mean().reset_index().pivot(index='design', columns='metric', values='score')

metric,ami,ari,completeness,fowlkes_mallows,homogeneity,mean,v_measure
design,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1_layer_dense100,0.691546,0.543818,0.807224,0.658959,0.635782,0.67419,0.707813
1_layer_metabolic_signaling,0.701941,0.560554,0.810362,0.675723,0.655567,0.687327,0.719817
1_layer_metabolic_signaling+100dense,0.705245,0.557495,0.82108,0.673916,0.653598,0.688971,0.72249
1_layer_ppi100,0.667677,0.506093,0.780046,0.634031,0.618664,0.64863,0.68527
1_layer_ppitf100,0.733982,0.619704,0.810209,0.707194,0.704569,0.721003,0.75036
1_layer_signaling,0.65659,0.509742,0.81863,0.650035,0.597627,0.651149,0.674273
1_layer_signaling+100dense,0.675536,0.545133,0.772858,0.66112,0.642207,0.665152,0.694058
2_layer_metabolic_signaling,0.635363,0.46165,0.764497,0.603514,0.589064,0.618739,0.658348
2_layer_signaling,0.714033,0.572402,0.832811,0.687775,0.664617,0.700353,0.730479
