In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import make_regression
import sys
from yaml import load
from yaml import CLoader as Loader

sys.path.insert(0, '/home/phyto/planktonSDM/functions/')
from tune import tune 
from functions import example_data

In [2]:
# Setting up the model

with open('/home/phyto/planktonSDM/configuration/example_model_config.yml', 'r') as f:
    model_config = load(f, Loader=Loader)

X, y = example_data(y_name =  "Coccolithus pelagicus", n_samples=500, n_features=5, noise=20, 
                    random_state=model_config['seed'])

m = tune(X, y, model_config)

In [3]:
'''
1-phase Random forest classifier
'''
m.train(model="rf", classifier=True)

test
training classifier
Fitting 3 folds for each of 12 candidates, totalling 36 fits
{'fit_time': array([0.10586643, 0.10485506, 0.10603714]), 'score_time': array([0.00701594, 0.00693727, 0.00685215]), 'test_accuracy': array([0.5, 0.5, 0.5])}
clf balanced accuray 0.5
execution time: 2.384185791015625e-07 seconds


In [4]:
'''
1-phase Random forest regressor
'''
m.train(model="rf", regressor=True)

test
training regressor
Fitting 3 folds for each of 24 candidates, totalling 72 fits
reg rRMSE: 49%
reg rMAE: 36%
reg R2: 0.13
execution time: 2.384185791015625e-07 seconds


In [6]:
'''
1-phase Random forest regressor -  testing log transformations
'''
m.train(model="rf", regressor=True, log="both")

test
training regressor
Fitting 3 folds for each of 24 candidates, totalling 72 fits
Fitting 3 folds for each of 24 candidates, totalling 72 fits
reg rRMSE: 49%
reg rMAE: 36%
reg R2: 0.13
execution time: 2.384185791015625e-07 seconds


In [7]:
'''
2-phase Random forest regressor
'''
m.train(model="rf", classifier=True, regressor=True)

test
training classifier
Fitting 3 folds for each of 12 candidates, totalling 36 fits
{'fit_time': array([0.11016083, 0.10908222, 0.10904837]), 'score_time': array([0.00721884, 0.00712371, 0.00718117]), 'test_accuracy': array([0.5, 0.5, 0.5])}
clf balanced accuray 0.5
training regressor
Fitting 3 folds for each of 24 candidates, totalling 72 fits
reg rRMSE: 49%
reg rMAE: 36%
reg R2: 0.13
zir rRMSE: 49%
zir rMAE: 36%
zir R2: 0.13
execution time: 2.384185791015625e-07 seconds


In [None]:
'''
1-phase XGboost
'''
m.train(model="xgb", regressor=True)

Fitting 3 folds for each of 24 candidates, totalling 72 fits
finished tuning model
reg rRMSE: 54%
reg rMAE: 41%
reg R2: -0.03
execution time: 8.961717128753662 seconds
both models:
Fitting 3 folds for each of 24 candidates, totalling 72 fits
Fitting 3 folds for each of 24 candidates, totalling 72 fits
finished tuning model
reg rRMSE: 53%
reg rMAE: 40%
reg R2: -0.02
execution time: 17.13132405281067 seconds


In [None]:
'''
1-phase KNN
'''
m.train(model="xgb", regressor=True)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
finished tuning model
reg rRMSE: 54%
reg rMAE: 42%
reg R2: -0.05
execution time: 0.30060482025146484 seconds
