In [1]:
from fleet.base_schemas import BaseFleetModelSpec, TorchModelSpec
from fleet.torch_.schemas import MonitoringConfig, TorchTrainingConfig
from fleet.model_builder import optimizers
from fleet.dataset_schemas import is_regression
small_regressor = """name: GNNExample
framework: torch
dataset:
  name: Small Zinc dataset
  targetColumns:
    - name: tpsa
      dataType:
        domainKind: numeric
      outModule: LinearJoined
  featureColumns:
    - name: smiles
      dataType:
        domainKind: smiles
    - name: mwt
      dataType:
        domainKind: numeric
  featurizers:
    - name: MolToGraphFeaturizer
      type: fleet.model_builder.featurizers.MoleculeFeaturizer
      forwardArgs:
        mol: $smiles
      constructorArgs:
        allow_unknown: false
        sym_bond_list: true
        per_atom_fragmentation: false
spec:
  layers:
    - name: GCN1
      type: torch_geometric.nn.GCNConv
      constructorArgs:
        in_channels: 26
        out_channels: 64
      forwardArgs:
        x: ${MolToGraphFeaturizer.x}
        edge_index: ${MolToGraphFeaturizer.edge_index}
    - name: GCN1_Activation
      type: torch.nn.ReLU
      constructorArgs:
        inplace: False
      forwardArgs:
        input: ${GCN1}

    - name: GCN2
      type: torch_geometric.nn.GCNConv
      forwardArgs:
        x: ${GCN1_Activation}
        edge_index: ${MolToGraphFeaturizer.edge_index}
      constructorArgs:
        in_channels: 64
        out_channels: 64

    - name: GCN2_Activation
      type: torch.nn.ReLU
      constructorArgs:
        inplace: False
      forwardArgs:
        input: ${GCN2}

    - name: GCN3
      type: torch_geometric.nn.GCNConv
      forwardArgs:
        x: ${GCN2_Activation}
        edge_index: ${MolToGraphFeaturizer.edge_index}
      constructorArgs:
        in_channels: 64
        out_channels: 64

    - name: GCN3_Activation
      type: torch.nn.ReLU
      constructorArgs:
        inplace: False
      forwardArgs:
        input: ${GCN3}

    - name: AddPool
      type: fleet.model_builder.layers.GlobalPooling
      forwardArgs:
        x: ${GCN3_Activation}
        edge_index: ${MolToGraphFeaturizer.edge_index}
        batch: ${MolToGraphFeaturizer.batch}
      constructorArgs:
        aggr: "sum"
    # End of fst branch

    # Second branch would simply be linear layers in mwt
    - name: "Linear1"
      type: torch.nn.Linear
      constructorArgs:
        in_features: 1
        out_features: 10
      forwardArgs:
        input: ${mwt}

    - name: Combiner
      type: fleet.model_builder.layers.Concat
      constructorArgs:
        dim: -1
      forwardArgs:
        xs:
          - "${AddPool}"
          - "${Linear1}"

    - name: LinearJoined
      type: torch.nn.Linear
      forwardArgs:
        input: $Combiner
      constructorArgs:
        in_features: 74
        out_features: 1
"""
csv_path = "tests/data/csv/zinc.csv"
spec = TorchModelSpec.from_yaml_str(small_regressor)
col_name = spec.dataset.target_columns[0].name
train_config = TorchTrainingConfig(
            epochs=4,
            batch_size=32,
            checkpoint_config=MonitoringConfig(
                mode="min",
                metric_key=f"val/mse/{col_name}"
                if is_regression(spec.dataset.target_columns[0])
                else f"val/precision/{col_name}",
            ),
            optimizer=optimizers.AdamOptimizer(),
        )


In [3]:
from fleet.model_functions import fit
import pandas as pd

fit(
    spec=spec,
    train_config=train_config,  # todo: make this type narrower
    mlflow_model_name="small regressora",
    mlflow_experiment_name="small regressor training",
    dataset=pd.read_csv(csv_path)
)

ERROR    [model_functions.py:84] RestException('RESOURCE_ALREADY_EXISTS: Experiment(name=small regressor training) already exists. Error: (raised as a result of Query-invoked autoflush; consider using a session.no_autoflush block if this flush is occurring prematurely)\n(psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "experiments_name_key"\nDETAIL:  Key (name)=(small regressor training) already exists.\n\n[SQL: INSERT INTO experiments (name, artifact_location, lifecycle_stage, creation_time, last_update_time) VALUES (%(name)s, %(artifact_location)s, %(lifecycle_stage)s, %(creation_time)s, %(last_update_time)s) RETURNING experiments.experiment_id]\n[parameters: {\'name\': \'small regressor training\', \'artifact_location\': \'\', \'lifecycle_stage\': \'active\', \'creation_time\': 1684263213744, \'last_update_time\': 1684263213744}]\n(Background on this error at: https://sqlalche.me/e/14/gkpj)')


RuntimeError: Failed to create mlflow experiment