## Active Learning

In [1]:
from skactiveml.pool import RandomSampling, UncertaintySampling, QueryByCommittee
from skactiveml.utils import unlabeled_indices, labeled_indices, MISSING_LABEL

In [2]:
from research_aml_elliptic.src.experiments.general_functions.elliptic_data_preprocessing import run_elliptic_preprocessing_pipeline

Root directory:  /Users/kevinaraujo/repos/dissertation/PPCA-UnB-Dissertation/models/notebooks/research_aml_elliptic


In [3]:
def recreate_original_df():
    X_train, X_test, y_train, y_test = run_elliptic_preprocessing_pipeline(last_train_time_step=last_train_time_step,
                                                                             last_time_step=last_time_step,
                                                                             only_labeled=only_labeled)
    df_train = pd.concat([X_train, y_train], axis=1)
    df_test = pd.concat([X_test, y_test], axis=1)
    df = pd.concat([df_train, df_test])
    X = df.drop(['class'], axis=1)
    y = df['class']

    return X, y 

### Identifying Query Strategies

In [4]:
query_strategies = {
    # Random Sampling strategy
    # Selects instances randomly from the unlabeled pool
    'Random Sampling': RandomSampling(),
    
    # Uncertainty Sampling strategy using Entropy
    # Selects instances with the highest entropy (uncertainty) in the predicted class probabilities
    # random_state=42 sets the random seed for reproducibility
    'Entropy': UncertaintySampling(method='entropy', random_state=139),
    
    # Uncertainty Sampling strategy using Margin Sampling
    # Selects instances with the smallest margin between the predicted probabilities of the two most likely classes
    'Margin': UncertaintySampling(method='margin_sampling'),
    
    # Query-by-Committee strategy using KL Divergence
    # Selects instances based on the disagreement among a committee of models
    # The disagreement is measured by the Kullback-Leibler (KL) divergence between the predicted class probabilities of the committee members
    'Query-by-Committee-KL': QueryByCommittee(method='KL_divergence')
}