In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import make_regression
import sys
from yaml import load
from yaml import CLoader as Loader

sys.path.insert(0, '/home/phyto/planktonSDM/functions/')
from tune import tune 
from functions import example_data

In [2]:
# Setting up the model

with open('/home/phyto/planktonSDM/configuration/example_model_config.yml', 'r') as f:
    model_config = load(f, Loader=Loader)

X, y = example_data(y_name =  "Coccolithus pelagicus", n_samples=500, n_features=5, noise=20, 
                    random_state=model_config['seed'])

m = tune(X, y, model_config)

In [3]:
'''
1-phase Random forest classifier
'''
m.train(model="rf", classifier=True)

training classifier
Fitting 3 folds for each of 16 candidates, totalling 48 fits
clf balanced accuray 0.78
execution time: 7.152557373046875e-07 seconds


In [4]:
'''
1-phase Random forest regressor
'''
m.train(model="rf", regressor=True)

training regressor
Fitting 3 folds for each of 16 candidates, totalling 48 fits
reg rRMSE: 24%
reg rMAE: 17%
reg R2: 0.79
execution time: 9.5367431640625e-07 seconds


In [5]:
'''
1-phase Random forest regressor -  testing log transformations
'''
m.train(model="rf", regressor=True, log="both")

training regressor
Fitting 3 folds for each of 16 candidates, totalling 48 fits
Fitting 3 folds for each of 16 candidates, totalling 48 fits
reg rRMSE: 24%
reg rMAE: 17%
reg R2: 0.79
execution time: 7.152557373046875e-07 seconds


In [6]:
'''
2-phase Random forest regressor
'''
m.train(model="rf", classifier=True, regressor=True)

training classifier
Fitting 3 folds for each of 16 candidates, totalling 48 fits
clf balanced accuray 0.78
training regressor
Fitting 3 folds for each of 16 candidates, totalling 48 fits
reg rRMSE: 24%
reg rMAE: 17%
reg R2: 0.79
zir rRMSE: 27%
zir rMAE: 16%
zir R2: 0.73
execution time: 7.152557373046875e-07 seconds


In [7]:
'''
1-phase XGboost
'''
m.train(model="xgb", regressor=True)

training regressor
Fitting 3 folds for each of 1 candidates, totalling 3 fits
reg rRMSE: 43%
reg rMAE: 34%
reg R2: 0.33
execution time: 9.5367431640625e-07 seconds


In [8]:
'''
1-phase KNN regressor
'''
m.train(model="knn", regressor=True)

training regressor
Fitting 3 folds for each of 1 candidates, totalling 3 fits
reg rRMSE: 37%
reg rMAE: 27%
reg R2: 0.51
execution time: 7.152557373046875e-07 seconds
