In [2]:
import os 
import sys
import pandas as pd 
from sklearn.model_selection import train_test_split
from uvars import givemesomecredit_dir,tML_dir
sys.path.append(tML_dir)
from pagluon import agluon_pipeline

  from .autonotebook import tqdm as notebook_tqdm


1. Regression:
For a regression task, we might want to apply post-processing such as clipping the predictions to ensure they stay within a reasonable range.

Example 1: Regression with Post-Processing (Clipping)

In [None]:
# Sample data
train_data = pd.DataFrame({'feature1': [1, 2, 3], 'feature2': [4, 5, 6], 'target': [10, 15, 20]})
valid_data = pd.DataFrame({'feature1': [4, 5], 'feature2': [7, 8], 'target': [25, 30]})
test = pd.DataFrame({'feature1': [6, 7], 'feature2': [9, 10]})

# Call function for regression with clipping
agluon_pipeline(
    expname='ag_tun', 
    agluon_dir='experiments', 
    presents=['best_quality'], 
    problem_type='regression', 
    eval_metric='rmse', 
    verbosity=3, 
    sample_weight=None, 
    train_data=train_data, 
    valid_data=valid_data, 
    test=test, 
    fcols=['feature1', 'feature2'], 
    tcol='target', 
    time_limit=180, 
    custom_thresholds={'clip': [0, 100]}  # Clipping regression predictions between 0 and 100
)


Example 2: Regression without Post-Processing:

In [None]:
# Call function for regression without clipping
agluon_pipeline(
    expname='ag_tun', 
    agluon_dir='experiments/', 
    presents=['best_quality'], 
    problem_type='regression', 
    eval_metric='rmse', 
    verbosity=3, 
    sample_weight=None, 
    train_data=train_data, 
    valid_data=valid_data, 
    test=test, 
    fcols=['feature1', 'feature2'], 
    tcol='target', 
    time_limit=180
)


2. Binary Classification:
For a binary classification task, we might choose not to apply any post-processing (since binary classification typically uses probabilities directly or a decision threshold). However, you can still use custom thresholds for decision-making.

Example 1: Binary Classification with Custom Thresholding (e.g., 0.7)

In [None]:
# Sample binary classification data
train_data = pd.DataFrame({'feature1': [1, 2, 3], 'feature2': [4, 5, 6], 'target': [0, 1, 0]})
valid_data = pd.DataFrame({'feature1': [4, 5], 'feature2': [7, 8], 'target': [1, 0]})
test = pd.DataFrame({'feature1': [6, 7], 'feature2': [9, 10]})

# Call function for binary classification with custom decision threshold
agluon_pipeline(
    expname='ag_tun', 
    agluon_dir='experiments/', 
    presents=['best_quality'], 
    problem_type='binary', 
    eval_metric='roc_auc', 
    verbosity=3, 
    sample_weight=None, 
    train_data=train_data, 
    valid_data=valid_data, 
    test=test, 
    fcols=['feature1', 'feature2'], 
    tcol='target', 
    time_limit=180, 
    calibrate_decision_threshold=True, 
    custom_thresholds={'multi_label': 0.7}  # Custom threshold for binary classification
)


Example 2: Binary Classification without Custom Thresholding:

In [None]:
# Call function for binary classification without custom thresholding
agluon_pipeline(
    expname='ag_tun', 
    agluon_dir='experiments/', 
    presents=['best_quality'], 
    problem_type='binary', 
    eval_metric='roc_auc', 
    verbosity=3, 
    sample_weight=None, 
    train_data=train_data, 
    valid_data=valid_data, 
    test=test, 
    fcols=['feature1', 'feature2'], 
    tcol='target', 
    time_limit=180
)


3. Multiclass Classification:
In a multiclass classification task, predictions typically output probabilities for each class. Here, post-processing like setting a custom threshold can be useful for classification, especially if you want to adjust the decision boundaries.

Example 1: Multiclass Classification with Custom Thresholding:

# Sample multiclass classification data
train_data = pd.DataFrame({'feature1': [1, 2, 3], 'feature2': [4, 5, 6], 'target': [0, 1, 2]})
valid_data = pd.DataFrame({'feature1': [4, 5], 'feature2': [7, 8], 'target': [2, 0]})
test = pd.DataFrame({'feature1': [6, 7], 'feature2': [9, 10]})

# Call function for multiclass classification with custom thresholds (e.g., thresholding probabilities)
agluon_pipeline(
    expname='ag_tun', 
    agluon_dir='experiments/', 
    presents=['best_quality'], 
    problem_type='multiclass', 
    eval_metric='accuracy', 
    verbosity=3, 
    sample_weight=None, 
    train_data=train_data, 
    valid_data=valid_data, 
    test=test, 
    fcols=['feature1', 'feature2'], 
    tcol='target', 
    time_limit=180, 
    custom_thresholds={'multi_label': 0.7}  # Threshold for multiclass classification
)


Example 2: Multiclass Classification without Custom Thresholding:

# Call function for multiclass classification without custom thresholding
agluon_pipeline(
    expname='ag_tun', 
    agluon_dir='experiments/', 
    presents=['best_quality'], 
    problem_type='multiclass', 
    eval_metric='accuracy', 
    verbosity=3, 
    sample_weight=None, 
    train_data=train_data, 
    valid_data=valid_data, 
    test=test, 
    fcols=['feature1', 'feature2'], 
    tcol='target', 
    time_limit=180
)
