In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import make_regression
import sys
from yaml import load
from yaml import CLoader as Loader

sys.path.insert(0, '/home/phyto/planktonSDM/functions/')
from tune import tune 
from functions import example_data

In [2]:
# Setting up the model

with open('/home/phyto/planktonSDM/configuration/example_model_config.yml', 'r') as f:
    model_config = load(f, Loader=Loader)

X, y = example_data(y_name =  "Coccolithus pelagicus", n_samples=500, n_features=5, noise=20, 
                    random_state=model_config['seed'])

m = tune(X, y, model_config)

In [3]:
'''
1-phase Random forest 
'''
m.train(model="rf")

Fitting 3 folds for each of 24 candidates, totalling 72 fits
finished tuning model
reg rRMSE: 53%
reg rMAE: 40%
reg R2: -0.02
execution time: 8.763795137405396 seconds


In [4]:
'''
2-phase Random forest 
note: for the 2-phase model we need to define both the classifier and the regressor
'''
m.train(model="rf", zir=True)

Fitting 3 folds for each of 24 candidates, totalling 72 fits
finished tuning model
reg rRMSE: 53%
reg rMAE: 40%
reg R2: -0.02
Fitting 3 folds for each of 12 candidates, totalling 36 fits
zir rRMSE: 53%
zir rMAE: 40%
zir R2: -0.02
execution time: 14.657129764556885 seconds


In [5]:
'''
Adding log transformation
'''
#add log:
m.train(model="rf", log="yes")

#try both:
print("both models:")
m.train(model="rf", log="both")

Fitting 3 folds for each of 24 candidates, totalling 72 fits
finished tuning model
reg rRMSE: 54%
reg rMAE: 41%
reg R2: -0.03
execution time: 8.961717128753662 seconds
both models:
Fitting 3 folds for each of 24 candidates, totalling 72 fits
Fitting 3 folds for each of 24 candidates, totalling 72 fits
finished tuning model
reg rRMSE: 53%
reg rMAE: 40%
reg R2: -0.02
execution time: 17.13132405281067 seconds


In [6]:
'''
1-phase Gradient boosting with XGBoost:
'''
m.train(model="xgb")

Fitting 3 folds for each of 1 candidates, totalling 3 fits
finished tuning model
reg rRMSE: 54%
reg rMAE: 42%
reg R2: -0.05
execution time: 0.30060482025146484 seconds


In [7]:
'''
2-phase Gradient boosting with XGBoost:
'''
m.train(model="xgb", zir=True)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
finished tuning model
reg rRMSE: 54%
reg rMAE: 42%
reg R2: -0.05
Fitting 3 folds for each of 1 candidates, totalling 3 fits
zir rRMSE: 54%
zir rMAE: 42%
zir R2: -0.05
execution time: 0.5986864566802979 seconds


In [8]:
'''
1-phase bagged nearest neighbors
'''
m.train(model="knn")

Fitting 3 folds for each of 1 candidates, totalling 3 fits
finished tuning model
reg rRMSE: 54%
reg rMAE: 41%
reg R2: -0.04
execution time: 1.5303997993469238 seconds


In [9]:
'''
2-phase bagged nearest neighbors
'''
m.train(model="knn", zir=True)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
finished tuning model
reg rRMSE: 54%
reg rMAE: 40%
reg R2: -0.05
Fitting 3 folds for each of 1 candidates, totalling 3 fits
zir rRMSE: 54%
zir rMAE: 41%
zir R2: -0.04
execution time: 4.25219988822937 seconds
