In [13]:
import pandas as pd
from yaml import load
from yaml import CLoader as Loader
from sklearn.preprocessing import OneHotEncoder

import os, sys
os.chdir('/home/phyto/Abil/')  # Provide your path here

sys.path.insert(0, './abil/')
from abil.tune import tune

In [14]:
# Setting up a regressor model
with open('./examples/configuration/2-phase.yml', 'r') as f:
    model_config = load(f, Loader=Loader)

predictors = model_config['predictors']
#read your target + environmental data:
d = pd.read_csv(model_config['local_root'] + model_config['training'])
target =  "Emiliania huxleyi"


#in this example we introduce pseudo-absences:
d[target] = d[target].fillna(0)
d = upsample(d, target, ratio=10)

#drop any missing values:
d = d.dropna(subset=[target])
d = d.dropna(subset=predictors)

#here we randomly sample data to speed up training:
d = d.sample(1000)

X_train = d[predictors]
y = d[target]

m = tune(X_train, y, model_config)

In [15]:
'''
1-phase RF classifier
'''
m.train(model="rf", classifier=True)

training classifier
Fitting 3 folds for each of 16 candidates, totalling 48 fits
exported model to:/home/phyto/Abil/examples/ModelOutput/rf/model/Emiliania huxleyi_clf.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/rf/scoring/Emiliania huxleyi_clf.sav
[0.64985402 0.61787879 0.71212121]
clf balanced accuracy 0.66
execution time: 7.152557373046875e-07 seconds


In [5]:
'''
1-phase RF regressor
'''
m.train(model="rf", regressor=True)

training regressor
{'R2': 'r2', 'MAE': 'neg_mean_absolute_error', 'RMSE': 'neg_root_mean_squared_error', 'tau': make_scorer(tau_scoring), 'tau_p': make_scorer(tau_scoring_p)}
{'regressor__estimator__n_estimators': [100], 'regressor__estimator__max_features': [4], 'regressor__estimator__max_depth': [50], 'regressor__estimator__min_samples_leaf': [0.5, 1], 'regressor__estimator__max_samples': [0.5, 1]}
Fitting 3 folds for each of 4 candidates, totalling 12 fits
exported model to: /home/phyto/Abil/examples/ModelOutput/rf/model/Emiliania huxleyi_reg.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/rf/scoring/Emiliania huxleyi_reg.sav
reg rRMSE: 751%
reg rMAE: 184%
reg R2: -0.12
reg tau: nan
execution time: 4.76837158203125e-07 seconds


In [6]:
'''
2-phase RF regressor
'''
m.train(model="rf", classifier=True, regressor=True)

training classifier
Fitting 3 folds for each of 16 candidates, totalling 48 fits
exported model to:/home/phyto/Abil/examples/ModelOutput/rf/model/Emiliania huxleyi_clf.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/rf/scoring/Emiliania huxleyi_clf.sav
[0.56732617 0.63793103 0.63793103]
clf balanced accuracy 0.61
training regressor
{'R2': 'r2', 'MAE': 'neg_mean_absolute_error', 'RMSE': 'neg_root_mean_squared_error', 'tau': make_scorer(tau_scoring), 'tau_p': make_scorer(tau_scoring_p)}
{'regressor__estimator__n_estimators': [100], 'regressor__estimator__max_features': [4], 'regressor__estimator__max_depth': [50], 'regressor__estimator__min_samples_leaf': [0.5, 1], 'regressor__estimator__max_samples': [0.5, 1]}
Fitting 3 folds for each of 4 candidates, totalling 12 fits
exported model to: /home/phyto/Abil/examples/ModelOutput/rf/model/Emiliania huxleyi_reg.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/rf/scoring/Emiliania huxleyi_reg.sav
reg rRMSE: 2585%
r

In [7]:
'''
1-phase KNN classifier
'''
m.train(model="knn", classifier=True)

training classifier
Fitting 3 folds for each of 1 candidates, totalling 3 fits
exported model to:/home/phyto/Abil/examples/ModelOutput/knn/model/Emiliania huxleyi_clf.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/knn/scoring/Emiliania huxleyi_clf.sav
[0.51724138 0.5        0.5       ]
clf balanced accuracy 0.51
execution time: 9.5367431640625e-07 seconds


In [8]:
'''
1-phase KNN regressor
'''
m.train(model="knn", regressor=True)

training regressor
{'R2': 'r2', 'MAE': 'neg_mean_absolute_error', 'RMSE': 'neg_root_mean_squared_error', 'tau': make_scorer(tau_scoring), 'tau_p': make_scorer(tau_scoring_p)}
{'regressor__estimator__max_samples': [0.2], 'regressor__estimator__max_features': [0.2], 'regressor__estimator__estimator__leaf_size': [25], 'regressor__estimator__estimator__n_neighbors': [5]}
Fitting 3 folds for each of 1 candidates, totalling 3 fits
exported model to: /home/phyto/Abil/examples/ModelOutput/knn/model/Emiliania huxleyi_reg.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/knn/scoring/Emiliania huxleyi_reg.sav
reg rRMSE: 743%
reg rMAE: 172%
reg R2: -0.45
reg tau: 0.15
execution time: 7.152557373046875e-07 seconds


In [9]:
'''
2-phase KNN regressor
'''
m.train(model="knn", classifier=True, regressor=True)

training classifier
Fitting 3 folds for each of 1 candidates, totalling 3 fits
exported model to:/home/phyto/Abil/examples/ModelOutput/knn/model/Emiliania huxleyi_clf.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/knn/scoring/Emiliania huxleyi_clf.sav
[0.51724138 0.5        0.5       ]
clf balanced accuracy 0.51
training regressor
{'R2': 'r2', 'MAE': 'neg_mean_absolute_error', 'RMSE': 'neg_root_mean_squared_error', 'tau': make_scorer(tau_scoring), 'tau_p': make_scorer(tau_scoring_p)}
{'regressor__estimator__max_samples': [0.2], 'regressor__estimator__max_features': [0.2], 'regressor__estimator__estimator__leaf_size': [25], 'regressor__estimator__estimator__n_neighbors': [5]}
Fitting 3 folds for each of 1 candidates, totalling 3 fits
exported model to: /home/phyto/Abil/examples/ModelOutput/knn/model/Emiliania huxleyi_reg.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/knn/scoring/Emiliania huxleyi_reg.sav
reg rRMSE: 2229%
reg rMAE: 1306%
reg R2: -0.39
reg 

1 fits failed out of a total of 3.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
joblib.externals.loky.process_executor._RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/home/phyto/Abil/abil/functions.py", line 145, in fit
    check_is_fitted(self.regressor)
  File "/home/phyto/anaconda3/envs/my-geopandas-env-2/lib/python3.11/site-packages/sklearn/utils/validation.py", line 1462, in check_is_fitted
    raise NotFittedError(msg % {"name": type(estimator).__name__})
sklearn.exceptions.NotFittedError: This TransformedTargetRegressor instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

During handling of the above exception, another exceptio

In [10]:
'''
1-phase XGB classifier
'''
m.train(model="xgb", classifier=True)

training classifier
Fitting 3 folds for each of 1 candidates, totalling 3 fits
exported model to:/home/phyto/Abil/examples/ModelOutput/xgb/model/Emiliania huxleyi_clf.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/xgb/scoring/Emiliania huxleyi_clf.sav
[0.5 0.5 0.5]
clf balanced accuracy 0.5
execution time: 4.76837158203125e-07 seconds


In [11]:
'''
1-phase XGB regressor
'''
m.train(model="xgb", regressor=True)

training regressor
{'R2': 'r2', 'MAE': 'neg_mean_absolute_error', 'RMSE': 'neg_root_mean_squared_error', 'tau': make_scorer(tau_scoring), 'tau_p': make_scorer(tau_scoring_p)}
{'regressor__estimator__learning_rate': [0.05], 'regressor__estimator__n_estimators': [100], 'regressor__estimator__max_depth': [7], 'regressor__estimator__subsample': [0.8], 'regressor__estimator__colsample_bytree': [0.5], 'regressor__estimator__gamma': [1], 'regressor__estimator__reg_alpha': [0.1]}
Fitting 3 folds for each of 1 candidates, totalling 3 fits
exported model to: /home/phyto/Abil/examples/ModelOutput/xgb/model/Emiliania huxleyi_reg.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/xgb/scoring/Emiliania huxleyi_reg.sav
reg rRMSE: 738%
reg rMAE: 100%
reg R2: 0.02
reg tau: 0.16
execution time: 9.5367431640625e-07 seconds


In [12]:
'''
2-phase XGB regressor
'''
m.train(model="xgb", classifier=True, regressor=True)

training classifier
Fitting 3 folds for each of 1 candidates, totalling 3 fits
exported model to:/home/phyto/Abil/examples/ModelOutput/xgb/model/Emiliania huxleyi_clf.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/xgb/scoring/Emiliania huxleyi_clf.sav
[0.5 0.5 0.5]
clf balanced accuracy 0.5
training regressor
{'R2': 'r2', 'MAE': 'neg_mean_absolute_error', 'RMSE': 'neg_root_mean_squared_error', 'tau': make_scorer(tau_scoring), 'tau_p': make_scorer(tau_scoring_p)}
{'regressor__estimator__learning_rate': [0.05], 'regressor__estimator__n_estimators': [100], 'regressor__estimator__max_depth': [7], 'regressor__estimator__subsample': [0.8], 'regressor__estimator__colsample_bytree': [0.5], 'regressor__estimator__gamma': [1], 'regressor__estimator__reg_alpha': [0.1]}
Fitting 3 folds for each of 1 candidates, totalling 3 fits
exported model to: /home/phyto/Abil/examples/ModelOutput/xgb/model/Emiliania huxleyi_reg.sav
exported scoring to: /home/phyto/Abil/examples/ModelOutput/xgb/