# Libraries

In [1]:
cd /home/tvangraft/tudelft/thesis/metaengineering

/home/tvangraft/tudelft/thesis/metaengineering


In [2]:
from src.pipeline.dataloader import DataLoader
from src.orchestrator.orchestrator import Orchestrator

from src.settings.strategy import Strategy
from src.settings.tier import Tier

from src.utils.utils import build_config

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.svm import SVR
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def get_dl_config_for_strategy(tier: Tier):
    lookup = {
        Tier.TIER0: dict(
            additional_filters=["is_precursor",],
            additional_transforms=["log_fold_change_protein",]
        ),
        Tier.TIER1: dict(
            additional_frames=["interaction_frame",],
            additional_filters=[
                "is_precursor",
                "has_at_least_n_interaction",
            ],
            additional_transforms=[
                "log_fold_change_protein",
                "ppi_coo_matrix",
            ]
        )
    }
    return lookup.get(tier)

In [4]:
params = {
    'SVR': {
        'regressor__regressor': SVR(),
        'regressor__regressor__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        #'regressor__pca__n_components': [0.1, 0.25, 0.5, 0.75, 0.99],
        'regressor__preprocessor__num__scaler': [MinMaxScaler(), StandardScaler(), RobustScaler()],
    },
    'RandomForestRegressor': {
        'regressor__regressor': RandomForestRegressor(),
        'regressor__regressor__n_estimators': [10, 25, 50, 75, 100],
        'regressor__regressor__criterion': ['squared_error', 'friedman_mse'],
        'regressor__regressor__max_depth': [5, 10, 20],
        # 'regressor__pca__n_components': [0.1, 0.25, 0.5, 0.75, 0.99],
        'regressor__preprocessor__num__scaler': [MinMaxScaler(), StandardScaler(), RobustScaler()]
    },
    'ElasticNet': {
        'regressor__regressor': ElasticNet(),
        'regressor__regressor__l1_ratio': [0.01, 0.25, 0.5, 0.75, 1],
        'regressor__regressor__tol': [0.01],
        #'regressor__pca__n_components': [0.1, 0.25, 0.5, 0.75, 0.99],
        'regressor__preprocessor__num__scaler': [MinMaxScaler(), StandardScaler(), RobustScaler()]
    },
}

strategies = [Strategy.ALL, Strategy.ONE_VS_ALL, Strategy.METABOLITE_CENTRIC]
tiers = [Tier.TIER0, Tier.TIER1]

In [5]:
DataLoader.DATA_FOLDER = './data/training/'
for tier in tiers:
    for strategy in strategies:
        orchestrator = Orchestrator()
        orchestrator.prepare_orchestrator(
            *build_config(
                strategy=strategy,
                tier=tier,
                params=params,
                forced_training=False,
                forced_testing=False,
                **get_dl_config_for_strategy(tier)
            )
        )
        orchestrator.run()

Result for Strategy.ALL_all already exists
     Unnamed: 0  mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0             0       0.092504      0.008441         0.026047        0.003230   
1             1       0.077147      0.010093         0.019912        0.002954   
2             2       0.064018      0.008031         0.028832        0.006011   
3             3       0.066155      0.004979         0.020286        0.002943   
4             4       0.064538      0.006565         0.017313        0.003264   
..          ...            ...           ...              ...             ...   
112         112       0.130593      0.013060         0.013295        0.002640   
113         113       0.142476      0.009388         0.014203        0.003045   
114         114       0.146542      0.008727         0.014829        0.002841   
115         115       0.150475      0.005567         0.015463        0.002335   
116         116       0.132754      0.017819         0.010757     

In [8]:

orchestrator = Orchestrator()
orchestrator.prepare_orchestrator(
    *build_config(
        strategy=Strategy.ALL,
        tier=Tier.TIER0,
        params=params,
        forced_training=False,
        forced_testing=False,
        **get_dl_config_for_strategy(Tier.TIER0)
    )
)

In [6]:
orchestrator.run()

Result for Strategy.ALL_all already exists
     Unnamed: 0  mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0             0       0.092504      0.008441         0.026047        0.003230   
1             1       0.077147      0.010093         0.019912        0.002954   
2             2       0.064018      0.008031         0.028832        0.006011   
3             3       0.066155      0.004979         0.020286        0.002943   
4             4       0.064538      0.006565         0.017313        0.003264   
..          ...            ...           ...              ...             ...   
112         112       0.130593      0.013060         0.013295        0.002640   
113         113       0.142476      0.009388         0.014203        0.003045   
114         114       0.146542      0.008727         0.014829        0.002841   
115         115       0.150475      0.005567         0.015463        0.002335   
116         116       0.132754      0.017819         0.010757     

  0%|          | 0/29 [00:00<?, ?it/s]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
  3%|▎         | 1/29 [00:01<00:33,  1.21s/it]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(