# Examples of feature set usage:

In [None]:
import os,sys
sys.path.append(os.path.abspath(".."))
from features_set import features_set
import pandas as pd

<b>Structure of a module</b>
  
![](../res/images/module.png)

## Binary classes

In [None]:
# set up the parameters
parameters = {
    'feature_path': "../data/features/features.xlsx", # path to csv/xls file with features
    'outcome_path': "../data/features/extended_clinical_df.xlsx", #path to csv/xls file with outcome
    'patient_column': 'Patient', # name of column with patient id
    'patient_in_outcome_column': 'PatientID', # name of column with patient id in clinical data file
    'outcome_column': '1yearsurvival' # name of outcome column
}

In [None]:
# initialize feature set
fs = features_set(**parameters)

In [None]:
# excluding patients with unknown outcome (in case they are represented)
fs.handle_nan(axis=0)

In [None]:
fs._feature_outcome_dataframe.head(5)

In [None]:
# visualization of feature values distribution in classes (in .html report)
fs.plot_distribution(fs._feature_column[:100])

<b>Example of plotted distributions of feature values in classes:</b>
![](../res/images/distr.png)

In [None]:
# visualization of feature mutual (Spearman) correlation coefficient matrix (in .html report)
fs.plot_correlation_matrix(fs._feature_column[:100])

Example of feature correlation matrix:
![](../res/images/corr.png)

In [None]:
# visualization of Mann-Whitney Bonferroni corrected p-values for binary classes test (in .html report)
fs.plot_MW_p(fs._feature_column[:100])

Example of Mann-Whitney p-values:
![](../res/images/p_MW.png)

In [None]:
# visualization of univariate ROC-curves (in .html report)
fs.plot_univariate_roc(fs._feature_column[:100])

Example of univariate feature ROC:
![](../res/images/roc.png)

In [None]:
# calculation of basic statistics for each feature (in .xlsx):
# number of NaN, mean, std, min, max; if applicable: MW-p, univariate ROC AUC, volume correlation
fs.calculate_basic_stats(volume_feature='original_shape_VoxelVolume')

In [None]:
# checking the excel table
print('Basic statistics for each feature')
pd.read_excel('../data/features/features_basic_stats.xlsx')

In [None]:
# volume analysis
fs.volume_analysis(volume_feature='original_shape_VoxelVolume')

<b>Example of volume precision-recall curve:</b>
![](../res/images/vol_prc.png)

<b>Example of volume Spearman correlation coefficients:</b>
![](../res/images/vol_corr.png)

## Multi-class

In [None]:
parameters = {
    'feature_path': "../data/features/features.xlsx", # path to csv/xls file with features
    'outcome_path': "../data/features/extended_clinical_df.xlsx", #path to csv/xls file with outcome
    'patient_column': 'Patient', # name of column with patient id
    'patient_in_outcome_column': 'PatientID', # name of column with patient id in clinical data file
    'outcome_column': 'Overall.Stage' # name of outcome column
}

In [None]:
fs = features_set(**parameters)

In [None]:
fs._feature_outcome_dataframe[fs._feature_outcome_dataframe['Overall.Stage'].isnull()]

In [None]:
fs.handle_nan(axis=0)

In [None]:
fs.plot_distribution(fs._feature_column[:100])

In [None]:
fs.plot_distribution(fs._feature_column[:100], ['I', 'IIIb'])

In [None]:
fs.plot_correlation_matrix(fs._feature_column[:100])

In [None]:
fs.plot_MW_p(fs._feature_column[:100], ['I', 'IIIb'])

In [None]:
fs.plot_univariate_roc(fs._feature_column[:100], ['I', 'IIIb'])

In [None]:
fs.calculate_basic_stats(volume_feature='original_shape_VoxelVolume')

In [None]:
fs.volume_analysis(volume_feature='original_shape_VoxelVolume')

In [None]:
print('Basic statistics for each feature')
pd.read_excel('../data/features/features_basic_stats.xlsx')