Let's obtain the California Housing dataset, and dviding it into training and prediction sets. The training set itself will be split 80/20 into training and validation sets.

In [None]:
from joltml import Experiment, Xgboost, Sklearn
from sklearn.datasets import fetch_california_housing
# Logging is done implicitly
dataset = fetch_california_housing(as_frame=True)["frame"]
dataset = dataset.dropna()
training_set = dataset[:16000]
prediction_set = dataset.iloc[16000:,:-1]


Next, we create a joltml Experiment object to start our experiment. In this experiment, we add an Xgboost model via the joltml `Xgboost` wrapper class, apply regression and then predict using the trained model. Evaluation metrics are written in the file `experiment_1/jobid/fits_book.json` where `jobid` is a generated random string - unless you specify one.

In [None]:
import pandas as pd
experiment = Experiment(training_set,experiment_id='experiment_1')
experiment.add_models([Xgboost()])
experiment.regression(target_names=['MedHouseVal'],splits=0.2)
# Write results and default evaluation metrics
y = experiment.predict(prediction_set)



We could do the model addition, regression and prediction in just one line:

In [None]:
experiment = Experiment(training_set)
y = experiment.add_models([Xgboost()]).regression(target_names=['MedHouseVal'], splits=[0.8,0.2]).predict(prediction_set)

Instead of using the `Xgboost` wrapper, we could use the `sklearn` wrapper and specify `ElasticNet()` as the model.

In [None]:
from sklearn.linear_model import ElasticNet
experiment = Experiment(training_set)
y = experiment.add_models([Sklearn(ElasticNet())]).regression(target_names=['MedHouseVal'],splits=[0.8,0.2]).predict(prediction_set)

We could also add multiple ML wrappers and get joltml to use all of them in one go.

In [None]:
from sklearn.linear_model import ElasticNet, LinearRegression, Lasso

experiment = Experiment(training_set, experiment_id="trial1")
y = experiment.add_models([Xgboost(),
                           Sklearn(ElasticNet()),
                           Sklearn(LinearRegression()),
                           Sklearn(Lasso()),
                           ]).regression(target_names=['MedHouseVal'],splits=[0.8, 0.2]).predict(prediction_set)


And here are 10 models fitted together in one line:

In [None]:
from sklearn.linear_model import ElasticNet,\
LinearRegression, Ridge, RidgeCV, SGDRegressor, Lars, Lasso,\
LassoLars, ARDRegression

experiment = Experiment(training_set, experiment_id="trial2")
y = experiment.add_models([Xgboost(),
                           Sklearn(ElasticNet()),
                           Sklearn(LinearRegression()),
                           Sklearn(Ridge()),
                           Sklearn(RidgeCV()),
                           Sklearn(SGDRegressor()),
                           Sklearn(Lars()),
                           Sklearn(Lasso()),
                           Sklearn(LassoLars()),
                           Sklearn(ARDRegression()),
                           ]).regression(target_names=['MedHouseVal'],splits=[0.8, 0.2]).predict(prediction_set)


Enough with Xgboost and sklearn? Let's try training a simple `torch` neural network on a simple dataset: a linear model.

In [None]:
from joltml import Experiment, Pytorch
import torch
import torch.nn as nn
import pandas as pd

a = 1
b = 0.5

X = torch.linspace(0,10,1000)
y = a * X + b

data = pd.DataFrame(zip(X,y),columns=['X','y'])
training_set = data.iloc[:800]
prediction_set = data.iloc[800:,:-1]

class RegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.a = nn.Parameter(torch.randn(
            1, dtype=torch.float), requires_grad=True)
        self.b = nn.Parameter(torch.randn(
            1, dtype=torch.float), requires_grad=True)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.a*x + self.b


experiment = Experiment(training_set,experiment_id='linear_relation')
y = experiment.add_models([Pytorch(RegressionModel())]).regression(
    target_names=['y'], splits=[0.8, 0.2]).predict(prediction_set)


Let's train a `torch` neural network on the California housing dataset, and compute the MAE. This will take a few minutes.

In [None]:
from joltml import Experiment, Pytorch
import torch.nn as nn
import numpy as np
from sklearn.datasets import fetch_california_housing
from joltml.joltmeter import RegressionMetrics

dataset = fetch_california_housing(as_frame=True)["frame"]
dataset = dataset.dropna()
dataset.astype(np.float32)
dataset = dataset.sample(frac=1)
training_size = int(0.9*len(dataset))
training_set = dataset[:training_size]
prediction_set = dataset.iloc[training_size:,:-1]
prediction_y = dataset.iloc[training_size:,-1]

model = nn.Sequential(
    nn.Linear(training_set.shape[1]-1, 24),
    nn.ReLU(),
    nn.Linear(24, 12),
    nn.ReLU(),
    nn.Linear(12, 6),
    nn.ReLU(),
    nn.Linear(6, 1)
)

experiment = Experiment(training_set)
y = experiment.add_models([Pytorch(model)]).regression(
    target_names=['MedHouseVal'], splits=[0.8, 0.2]).predict(prediction_set)
print(RegressionMetrics.mean_absolute_error.evaluate(y[0].detach().numpy(),prediction_y))

Let's try the diabetes dataset.

In [None]:
from sklearn.datasets import load_diabetes
# Logging is done implicitly
# https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset
dataset = load_diabetes(as_frame=True)["frame"]
dataset = dataset.dropna()
training_set = dataset[:350]
prediction_set = dataset.iloc[350:,:-1]

experiment = Experiment(training_set,experiment_id='diabetes_1')
experiment.add_models([Xgboost(n_estimators=400)])
experiment.regression(target_names=['target'],splits=0.2)
# Write results and default evaluation metrics
y = experiment.predict(prediction_set)




So far, we've been doing regression tasks. Let's do classification tasks: the multi-class Iris dataset.

In [None]:
from sklearn.datasets import load_iris
from joltml import Experiment, Xgboost
from joltml.joltmeter import ClassificationMulticlassMetrics

# Logging is done implicitly
# https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset
dataset = load_iris(as_frame=True)["frame"]
dataset = dataset.dropna()
dataset = dataset.sample(frac=1)
training_set = dataset[:100]
prediction_set = dataset.iloc[100:,:-1]
prediction_targets = dataset.iloc[100:,-1]

experiment = Experiment(training_set,experiment_id='iris_1')
experiment.add_models([Xgboost(objective='multi:softprob', num_class=3)])
y = experiment.classification(target_names=['target'],splits=0.2,metrics=[ClassificationMulticlassMetrics.precision]).predict(prediction_set)

`joltml` applies `optuna` to perform hyperparameter optimization. Here is an example.

In [None]:
from joltml import Experiment, Xgboost, Sklearn
from sklearn.datasets import fetch_california_housing
# Logging is done implicitly
dataset = fetch_california_housing(as_frame=True)["frame"]
dataset = dataset.dropna()
training_set = dataset[:16000]
prediction_set = dataset.iloc[16000:,:-1]

params = {
    'booster': {'type': 'categorical', 'values': ['gbtree', 'gblinear', 'dart']},
    'lambda': {'type': 'float', 'minimum': 1e-8, 'maximum': 1.0},
    'alpha': {'type': 'float', 'minimum': 1e-8, 'maximum': 1.0},
}

experiment = Experiment(training_set, experiment_id='trial1')
experiment.add_models([Xgboost()])
experiment.regression_optimize(
    target_names=['MedHouseVal'], splits=0.2, n_trials=10, params=params)
# Write results and default evaluation metrics
y = experiment.predict(prediction_set)
