In [None]:
using DataFrames, MLJ

In [None]:
X, y = @load_boston;
X = DataFrame(X)
y = Vector(y);

In [None]:
rr_model = @load RidgeRegressor pkg=MLJLinearModels
rr_pipe = @pipeline Standardizer rr_model

In [None]:
fieldnames(rr_model)

In [None]:
rf_model = @load RandomForestRegressor pkg=DecisionTree add=true
rf_pipe = @pipeline Standardizer rf_model

In [None]:
r = [
    range(rf_pipe, :(random_forest_regressor.max_depth), lower=1, upper=5),
    range(rf_pipe, :(random_forest_regressor.n_trees), lower=10, upper=50)
    ]

tuned2 = TunedModel(model=rf_pipe,
                    range=r,
                    measure=rms);
m = machine(tuned2, X, y) |> fit!
fitted_params(m).best_model

In [None]:
tuned_model = TunedModel(
    models=[rr_pipe, rf_pipe],
    measure=rms,
    train_best=true
)

In [None]:
m = machine(tuned_model, X, y) |> fit!

In [None]:
fitted_params(m)

In [None]:
r = report(m)
@show r.best_history_entry.measurement;

In [None]:
r.best_model

In [None]:
r.best_report

# Testing `@pipelines`

In [None]:
using DataFrames, MLJ, VegaLite
X, y = @load_boston;
X = DataFrame(X)
y = Vector(y);

In [None]:
rr_model = @load RidgeRegressor pkg=MLJLinearModels

In [None]:
# pipeline with target transformation (log) and inverse transformation (exp)
rr_pipe = @pipeline(Standardizer,
                    rr_model,
                    target=UnivariateBoxCoxTransformer)
                    # ,
                    # inverse=exp)

In [None]:
m = machine(rr_pipe, X, y)
fit!(m)

In [None]:
fitted_params(m)

In [None]:
r = report(m)

In [None]:
X

In [None]:
boxcox = UnivariateBoxCoxTransformer();
mach = fit!(machine(boxcox, y))
a, b = MLJ.transform(mach, y), y

In [None]:
DataFrame(a=a,b=b) |>
[
    @vlplot(:bar, x=a, y="count()");
    @vlplot(:bar, x=b, y="count()")
]


# Testing a simple workflow

- Import dataset, do some clean up, fix scitypes
- Unpack predictors and response variable
- Load a ridge regressor model
- Create a pipeline with the following steps:
    - Standardize all continuous predictors
    - One hot encode all multiclass predictors
    - Transform target using log
    - Inverse target using exp

In [1]:
using CSV, DataFrames, MLJ, MLJLinearModels

In [None]:
# Import CSV to dataframe, select some columns
lichen_training = CSV.read("C:/Users/julio/Downloads/lichen_training.csv", DataFrame; missingstring="NA") |>
dropmissing |>
x -> select(x, "Total.lichen", "basal_area", "PL", "mean_Long", "mean_Lat", "SX", "SNR") |>
x -> rename(x, ["biomass", "basalarea", "pl", "lon", "lat", "sx", "snr"]) |>
x -> coerce(x, Count => Continuous, Textual => Multiclass)

schema(lichen_training)

In [None]:
# Unpack predictors and response variable
y, X = unpack(lichen_training, ==(:biomass), _->true);

In [None]:
# Testing OneHotEncoder
# hot = OneHotEncoder()
# m = fit!(machine(hot, X))
# transform(m, X)

In [None]:
model = @load RidgeRegressor pkg=MLJLinearModels
pipe = @pipeline Standardizer OneHotEncoder model target=log inverse=exp

In [None]:
mach = machine(pipe, X, y)

In [None]:
fit!(mach)