In [39]:
%load_ext autoreload
%autoreload 2
# Replace the path below with the location of the fatigue database file if needed
%env DATABASE_PATH=data/fatigue_database.csv

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
env: DATABASE_PATH=data/fatigue_database.csv


In [37]:
from pu_criterion.experiment import ExperimentPU
from pu_criterion.non_pu_classification import LogRegression, LDA
from pu_criterion.PU import PUClassifier
from pu_criterion.classification import LinearLogisticRegression, LinearLogisticRegression2, RandomForestClass, LinearDiscriminantClassifier
from pu_criterion.propensity import LogProbitPropensity, LogisticPropensity, GumbelPropensity
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
import numpy as np
from pu_criterion.visualisation import plot_results, criterion_2d, scatter
import matplotlib.pyplot as plt

# PU learning application to fatigue database

## Features

Choice of features for:
+ the classification model (fatigue criterion)
+ the propensity model (affecting the observations)

In [38]:
# Features for classification model
cfeatures_DV = ['p_n', 'tau_n']  # Dang Van variables
cfeatures = ['tresca_a_m_0_mean_n', 'sllbt_a_a_25_max_n', 'sttc_a_m_25_max_n', 'Tau0_25_mean', 'thickness_0_mean']  # Additional variables

# Features for propensity model
efeatures = ['fm', 'f0']   # Equivalent test severity + initial severity

## Experiment parameters

### PU learning model
The PU learning model is defined by:
+ a classification model (default `LinearLogisticRegression`, note that `LinearDiscriminantClassifier` is also possible)
+ a propensity model (default `LogisticPropensity`, other possible choices are `LogProbitPropensity`and `GumbelPropensity`)

### Sampling parameters
+ subsampling_factor: to subsample the majority class (default: `None`, ie no subsampling)
+ upsampling_factor: to oversample the minority class (default: `1`, ie no upsampling)

### Random state
Represents the seed for random operations involved in the expriment: the train/test partition. Note that expriments are repeated multiple times with different seeds to assess the consistency of the results.

### Execution time
For a PU learning model with `LinearLogisticRegression` as classifier and `LogisticPropensity` for propensity, each experiment takes normally less than $1$ minute. (Execution time is higher for the other propensity models.)

## Commands to launch multiple experiments and save the outputs

The estimated model, the set of features used and the performances obtained are stored in the specified folder in the command `exp.save(...)`. Of course, feel free to adapt it to your needs.

In [34]:
upsampling_factor = 1 # Default (no upsampling) -> you can try to increase it (but it does not seem to improve performances)
for random_state in tqdm(range(100)):
    subsampling_factor = None
    exp = ExperimentPU(cfeatures, efeatures, random_state, subsampling_factor, LinearLogisticRegression, LogisticPropensity, False, baseline_model=LogRegression, da=False, upsample=upsampling_factor)
    exp.fit(n_inits=10, verbose=False)
    exp.fit_baseline()
    exp.scores()
    exp.save('experiments/my_experiment/{}c_{}e_{}_{}_balanced/'.format(len(cfeatures), len(efeatures), random_state, upsampling_factor))


  0%|                                                   | 0/100 [00:00<?, ?it/s][A
  1%|▍                                        | 1/100 [00:39<1:04:54, 39.34s/it][A
  2%|▊                                        | 2/100 [01:17<1:02:56, 38.54s/it][A
  3%|█▏                                       | 3/100 [01:54<1:01:08, 37.82s/it][A
  4%|█▋                                         | 4/100 [02:20<53:03, 33.16s/it][A
  5%|██▏                                        | 5/100 [02:47<49:11, 31.07s/it][A
  6%|██▌                                        | 6/100 [03:20<49:44, 31.75s/it][A
  7%|███                                        | 7/100 [03:56<51:09, 33.01s/it][A
  8%|███▍                                       | 8/100 [04:38<55:00, 35.87s/it][A
  9%|███▊                                       | 9/100 [05:15<54:58, 36.25s/it][A
 10%|████▏                                     | 10/100 [05:44<51:14, 34.16s/it][A
 11%|████▌                                     | 11/100 [06:23<52:39, 35.50

## Commands to launch one experiment and get the results

In [55]:
exp = ExperimentPU(cfeatures, efeatures, 0, None, LinearLogisticRegression, LogisticPropensity, False, baseline_model=LogRegression, da=False, w=1., upsample=1)
exp.fit(verbose=True, n_inits=10)
exp.fit_baseline()
exp.scores()
# exp.save('Models/{}c_{}e_{}_{}_balanced/'.format(len(cfeatures), len(efeatures), 2, 30))

Likelihood at initialization 1: -0.065
Likelihood at initialization 2: -0.064
Likelihood at initialization 3: -0.069
Likelihood at initialization 4: -0.063
Likelihood at initialization 5: -0.061
Likelihood at initialization 6: -0.065
Likelihood at initialization 7: -0.061
Likelihood at initialization 8: -0.066
Likelihood at initialization 9: -0.061
Likelihood at initialization 10: -0.062
Best likelihood over 10 initializations : -0.061


### Performance evaluation for the above experiment

Please not that, the prediction threshold used for `Precision`, `Recall` and `F1` score metrics was not calibrated. Hence, the performance differences do not mean anything.

To compare the results, please rely on the `ROC AUC` and `PR AUC` metrics that are not sensitive to this default threshold.

In [49]:
# Class prediction results for Dang Van
exp.DV_cscores

{'ROC AUC': 0.8238863341069717,
 'PR AUC': 0.18780958665073394,
 'Recall': 0.41411042944785276,
 'Precision': 0.13513513513513514,
 'F1': 0.2037735849056604}

In [50]:
# Label prediction results for Dang Van
exp.DV_scores

{'ROC AUC': 0.8056255420317593,
 'PR AUC': 0.05327239205474168,
 'Recall': 0.6142857142857143,
 'Precision': 0.04223968565815324,
 'F1': 0.07904411764705882}

In [51]:
# Class prediction results for standard classifier
exp.base_cscores

{'ROC AUC': 0.8650607995892556,
 'PR AUC': 0.3597984729642183,
 'Recall': 0.6349693251533742,
 'Precision': 0.2134020618556701,
 'F1': 0.3194444444444444}

In [52]:
# Label prediction results for standard classifier
exp.base_perfs

{'ROC AUC': 0.879463724991776,
 'PR AUC': 0.1292916773588844,
 'Recall': 0.7785714285714286,
 'Precision': 0.057308096740273394,
 'F1': 0.10675808031341821}

In [53]:
# Class prediction results for PU learning
exp.cscores

{'ROC AUC': 0.6750905706936025,
 'PR AUC': 0.07939547128213242,
 'Recall': 0.9263803680981595,
 'Precision': 0.03794446538509863,
 'F1': 0.07290283645141822}

In [54]:
# Label prediction results for PU learning
exp.perfs

{'ROC AUC': 0.5741719997607584,
 'PR AUC': 0.07981365981769739,
 'Recall': 0.5642857142857143,
 'Precision': 0.01518646674356017,
 'F1': 0.029576937476600527}