In [7]:
from bernn import TrainAEThenClassifierHoldout
from ax.service.managed_loop import optimize


In [14]:
# Replace argparse with a simple class to simulate arguments
class Args:
    def __init__(self):
        self.random_recs = 0  # TODO to deprecate, no longer used
        self.predict_tests = 0
        # self.balanced_rec_loader = 0
        self.early_stop = 50
        self.early_warmup_stop = -1
        self.train_after_warmup = 0
        self.threshold = 0.0
        self.n_epochs = 1000
        self.n_trials = 100
        self.device = 'cuda:0'
        self.rec_loss = 'l1'
        self.tied_weights = 0
        self.random = 1
        self.variational = 0
        self.zinb = 0  # TODO resolve problems, do not use
        self.use_mapping = 1  # Use batch mapping for reconstruct
        self.bdisc = 1
        self.n_repeats = 5
        self.dloss = 'inverseTriplet'  # one of revDANN, DANN, inverseTriplet, revTriplet
        self.csv_file = 'unique_genes.csv'
        self.best_features_file = ''  # best_unique_genes.tsv
        self.bad_batches = ''  # 0;23;22;21;20;19;18;17;16;15
        self.remove_zeros = 0
        self.n_meta = 0
        self.embeddings_meta = 0
        self.features_to_keep = 'features_proteins.csv'
        self.groupkfold = 1
        self.dataset = 'alzheimer'
        self.bs = 32  # Batch size
        self.path = '../data/Alzheimer/'
        self.exp_id = 'default_ae_then_classifier'
        self.strategy = 'CU_DEM'  # only for Alzheimer dataset
        self.n_agg = 5  # Number of trailing values to get stable valid values
        self.n_layers = 2  # N layers for classifier
        self.log1p = 1  # log1p the data? Should be 0 with zinb
        self.pool = 1  # only for Alzheimer dataset
        self.kan = 1
        self.update_grid = 1
        self.use_l1 = 1
        self.clip_val = 1.0
        self.log_metrics = 1
        self.log_plots = 1
        self.prune_network = 1.0

args = Args()


In [15]:
train = TrainAEThenClassifierHoldout(args, args.path, fix_thres=-1, load_tb=False, 
                                        log_metrics=args.log_metrics, keep_models=False,
                                        log_inputs=False, log_plots=args.log_plots,
                                        log_tb=False, log_neptune=True, log_mlflow=True, 
                                        groupkfold=args.groupkfold, pools=True)

# train.train()
# List of hyperparameters getting optimized
parameters = [
    {"name": "nu", "type": "range", "bounds": [1e-4, 1e2], "log_scale": False},
    {"name": "lr", "type": "range", "bounds": [1e-4, 1e-2], "log_scale": True},
    {"name": "wd", "type": "range", "bounds": [1e-8, 1e-5], "log_scale": True},
    # {"name": "l1", "type": "range", "bounds": [1e-8, 1e-5], "log_scale": True},
    # {"name": "lr_b", "type": "range", "bounds": [1e-6, 1e-1], "log_scale": True},
    # {"name": "wd_b", "type": "range", "bounds": [1e-8, 1e-5], "log_scale": True},
    {"name": "smoothing", "type": "range", "bounds": [0., 0.2]},
    {"name": "margin", "type": "range", "bounds": [0., 10.]},
    {"name": "warmup", "type": "range", "bounds": [1, 1000]},
    {"name": "disc_b_warmup", "type": "range", "bounds": [1, 2]},

    {"name": "dropout", "type": "range", "bounds": [0.0, 0.5]},
    # {"name": "ncols", "type": "range", "bounds": [20, 10000]},
    {"name": "scaler", "type": "choice",
        "values": ['standard_per_batch', 'standard', 'robust', 'robust_per_batch']},  # scaler whould be no for zinb
    # {"name": "layer3", "type": "range", "bounds": [32, 512]},
    {"name": "layer2", "type": "range", "bounds": [32, 512]},
    {"name": "layer1", "type": "range", "bounds": [512, 1024]},
    # {"name": "layer2", "type": "range", "bounds": [32, 64]},
    # {"name": "layer1", "type": "range", "bounds": [64, 128]},
    
]

# Some hyperparameters are not always required. They are set to a default value in Train.train()
if args.dloss in ['revTriplet', 'revDANN', 'DANN', 'inverseTriplet', 'normae']:
    # gamma = 0 will ensure DANN is not learned
    parameters += [{"name": "gamma", "type": "range", "bounds": [1e-2, 1e2], "log_scale": True}]
if args.variational:
    # beta = 0 because useless outside a variational autoencoder
    parameters += [{"name": "beta", "type": "range", "bounds": [1e-2, 1e2], "log_scale": True}]
if args.zinb:
    # zeta = 0 because useless outside a zinb autoencoder
    parameters += [{"name": "zeta", "type": "range", "bounds": [1e-2, 1e2], "log_scale": True}]
if args.kan and args.use_l1:
    # zeta = 0 because useless outside a zinb autoencoder
    parameters += [{"name": "reg_entropy", "type": "range", "bounds": [1e-4, 1e-2], "log_scale": True}]
if args.use_l1:
    parameters += [{"name": "l1", "type": "range", "bounds": [1e-4, 1e-2], "log_scale": True}]
if args.prune_network:
    parameters += [{"name": "prune_threshold", "type": "range", "bounds": [1e-3, 3e-3], "log_scale": True}]

best_parameters, values, experiment, model = optimize(
    parameters=parameters,
    evaluation_function=train.train,
    objective_name='mcc',
    minimize=False,
    total_trials=args.n_trials,
    random_seed=41,

)


[INFO 05-13 16:40:11] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter nu. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 05-13 16:40:11] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter lr. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 05-13 16:40:11] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter wd. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 05-13 16:40:11] ax.service.utils.instantiation: Inferred value type of ParameterType.FLOAT for parameter smoothing. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO

{'nu': 57.62209361696243, 'lr': 0.0002866874793641167, 'wd': 2.1707730937174506e-06, 'smoothing': 0, 'margin': 9.637901186943054, 'warmup': 363, 'disc_b_warmup': 1, 'dropout': 0.15570732951164246, 'layer2': 227, 'layer1': 956, 'gamma': 0.013416354045760274, 'reg_entropy': 0.005716725144156548, 'l1': 0.00115086353941168, 'prune_threshold': 0.001444340628738408, 'scaler': 'robust_per_batch', 'beta': 0, 'zeta': 0, 'thres': 0}
See results using: tensorboard --logdir=logs/ae_then_classifier_holdout/c5daecba-b7e6-43d8-9057-5ca1ae38ea75 --port=6006
[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ADLab/BERNN/e/BE-420
Rep: 0
Best Loss Epoch 0, Losses: 0.9252324146883828, Domain Losses: 13.952556814466204, Domain Accuracy: 0.05133928571428571
Best Loss Epoch 1, Losses: 0.8122090995311737, Domain Losses: 13.686608927590507, Domain Accuracy: 0.05915178571428571
Best Loss Epoch 2, Losses: 0.7699982055595943, Domain Losses: 13.280011177062988, Domain Accuracy: 0.0479

KeyboardInterrupt: 