# Libraries

In [1]:
cd /home/tvangraft/tudelft/thesis/metaengineering

/home/tvangraft/tudelft/thesis/metaengineering


In [2]:
from src.pipeline.dataloader import DataLoader
from src.orchestrator.orchestrator import SklearnOrchestrator

from src.settings.strategy import Strategy
from src.settings.tier import Tier

from src.utils.utils import build_config

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.svm import SVR
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def get_dl_config_for_strategy(tier: Tier):
    lookup = {
        Tier.TIER0: dict(
            additional_filters=["is_precursor",],
            additional_transforms=["log_fold_change_protein",]
        ),
        Tier.TIER1: dict(
            additional_frames=["interaction_frame",],
            additional_filters=[
                "is_precursor",
                "has_at_least_n_interaction",
            ],
            additional_transforms=[
                "log_fold_change_protein",
                "ppi_coo_matrix",
            ]
        )
    }
    return lookup.get(tier)

In [4]:
params = {
    # 'SVR': {
    #     'regressor__regressor': SVR(),
    #     'regressor__regressor__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    #     #'regressor__pca__n_components': [0.1, 0.25, 0.5, 0.75, 0.99],
    #     'regressor__preprocessor__num__scaler': [MinMaxScaler(), StandardScaler(), RobustScaler()],
    # },
    'RandomForestRegressor': {
        'regressor__regressor': RandomForestRegressor(),
        'regressor__regressor__n_estimators': [10, 25, 50, 75, 100],
        'regressor__regressor__criterion': ['squared_error', 'friedman_mse'],
        'regressor__regressor__max_depth': [5, 10, 20],
        # 'regressor__pca__n_components': [0.1, 0.25, 0.5, 0.75, 0.99],
        'regressor__preprocessor__num__scaler': [MinMaxScaler(), StandardScaler(), RobustScaler()]
    },
    'ElasticNet': {
        'regressor__regressor': ElasticNet(),
        'regressor__regressor__l1_ratio': [0.01, 0.25, 0.5, 0.75, 1],
        'regressor__regressor__tol': [0.01],
        #'regressor__pca__n_components': [0.1, 0.25, 0.5, 0.75, 0.99],
        'regressor__preprocessor__num__scaler': [MinMaxScaler(), StandardScaler(), RobustScaler()]
    },
}

strategies = [Strategy.ALL, Strategy.ONE_VS_ALL, Strategy.METABOLITE_CENTRIC]
tiers = [Tier.TIER0, Tier.TIER1]

In [None]:
DataLoader.DATA_FOLDER = './data/training/'
for tier in tiers:
    for strategy in strategies:
        orchestrator: SklearnOrchestrator = SklearnOrchestrator()
        orchestrator.prepare_orchestrator(
            *build_config(
                strategy=strategy,
                tier=tier,
                params=params,
                forced_training=False,
                forced_testing=False,
                **get_dl_config_for_strategy(tier)
            )
        )
        orchestrator.run()