In [1]:
from dotenv import load_dotenv
load_dotenv('../.env')
load_dotenv('../.env.secret')
import os

os.environ['MLFLOW_TRACKING_URI'] = 'http://localhost:5000'
os.environ['POSTGRES_SERVER'] = 'localhost'

import pandas as pd
from fleet.utils import data
from torch.utils.data import DataLoader
from torch import nn
import torch
import datetime
from fleet.model_builder import splitters
from fleet.model_builder.constants import TrainingStep
from fleet.base_schemas import TorchModelSpec
from fleet.model_functions import fit
from fleet.torch_.schemas import TorchTrainingConfig
from fleet.model_builder import optimizers
from fleet.model_builder.dataset import Collater

model_path = '../tests/data/yaml/multiclass_classification_model.yaml'
csv_path = '../tests/data/csv/iris.csv'

model_spec = TorchModelSpec.from_yaml(model_path)
df = pd.read_csv(csv_path)

# Creates the "step" column on the dataframe with the specified splitting.
splitters.apply_split_indexes(df, split_type="random", split_target="60-20-20")

# Converts the dataframe to numpy outputs after applying featurizers and transforms.
df = data.build_columns_numpy(dataset_config=model_spec.dataset, df=df)

# Instantiate torch data loader from preprocessed dataframe
dataset = data.MarinerTorchDataset(
    data=df,
    dataset_config=model_spec.dataset,
)
dataloader = DataLoader(dataset, batch_size=4, collate_fn=Collater())

# Example of a torch dataset batch
next(iter(dataloader))

tensor([[0.2090],
        [0.3825],
        [0.2493],
        [0.8298]], grad_fn=<AddmmBackward0>)

In [6]:
now = datetime.datetime.now()

# Trains the model, uploading metrics to MLFlow
# NOT logging models because checkpoint_config is absent in train_config
result = fit(
    spec=model_spec,
    train_config=TorchTrainingConfig(
        epochs=10,
        batch_size=20,
        optimizer=optimizers.AdamOptimizer(),
    ),
    mlflow_model_name=f"Test-model-{now}",
    mlflow_experiment_name=f"Test-experiment-{now}",
    datamodule_args={
        'split_type': 'random',
        'split_target': '60-20-20',
    },
    dataset=df
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type       | Params
--------------------------------------
0 | _model | ModuleDict | 371   
--------------------------------------
371       Trainable params
0         Non-trainable params
371       Total params
0.001     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=10` reached.


In [7]:
# Getting metrics published to mlflow.
from mlflow.tracking.client import MlflowClient

client = MlflowClient()
runs = client.search_runs(experiment_ids=[result.mlflow_experiment_id])

runs[0]

<Run: data=<RunData: metrics={'epoch': 9.0,
 'train/accuracy/species': 0.36666667461395264,
 'train/f1/species': 0.36666667461395264,
 'train/loss/species': 1.085148811340332,
 'train/precision/species': 0.36666667461395264,
 'train/recall/species': 0.36666667461395264,
 'val/accuracy/species': 0.2666666805744171,
 'val/f1/species': 0.2666666805744171,
 'val/loss/species': 1.1473380327224731,
 'val/precision/species': 0.2666666805744171,
 'val/recall/species': 0.2666666805744171}, params={}, tags={'mlflow.runName': 'shivering-doe-64',
 'mlflow.source.name': '/home/vlma/.cache/pypoetry/virtualenvs/mariner-yEMAJoJH-py3.9/lib/python3.9/site-packages/ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'vlma'}>, info=<RunInfo: artifact_uri='s3://dev-mariner-datasets/59/c959d7b440164177afea2fbc156a0762/artifacts', end_time=1685727277113, experiment_id='59', lifecycle_stage='active', run_id='c959d7b440164177afea2fbc156a0762', run_name='shivering-doe-64', run_uuid='c959d7b4