In [5]:
import import_ipynb

In [2]:
def get_dataset_path(configurations, config_name, site, sample_size, data_path, is_train=True, is_balanced=False):
    """
    Generate dataset path dynamically for training data and use a fixed dataset for testing.
    """
    if is_train:
        # Dynamically generate training dataset paths
        dataset_template = configurations[config_name]["dataset_template"]
        dataset_filename = dataset_template.format(sample_size=sample_size)
        dataset_filename = f"{sample_size}/{dataset_filename}"
    else:
        # Always use the stratified balanced dataset for testing
        dataset_filename = f"1000/simple_random_1000_rev1.csv"
        if is_balanced:
            dataset_filename = f"1000/stratified_balanced_1000_rev1.csv"
    
    return f"{data_path}/{site}/{dataset_filename}"

In [3]:
def initialize_model(configurations, config_name):
    """
    Initialize the model with fixed parameters based on the configuration.
    """
    config = configurations[config_name]
    return config["model"](**config["fixed_params"])

In [4]:
from sklearn.metrics import accuracy_score, f1_score

def evaluate_model(model, X_test, y_test):
    """
    Evaluate the model and return metrics including OA, F1, and AP.
    """
     # Predict probabilities
    y_scores = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else model.decision_function(X_test)
              
    y_pred = model.predict(X_test)
    oa_score = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")
    
    return oa_score, f1
