In [4]:
#!/usr/bin/env python

# # Simple Model Training Example
#
# This example is the API example for this Ludwig command line example
# (https://ludwig-ai.github.io/ludwig-docs/latest/examples/titanic/).

# Import required libraries
import logging
import os
import shutil

from ludwig.api import LudwigModel
from ludwig.datasets import titanic

# clean out prior results
shutil.rmtree("./results", ignore_errors=True)

# Download and prepare the dataset
training_set, test_set, _ = titanic.load(split=True)

config = {   'combiner': {   'activation': 'relu',
                    'bias_initializer': 'zeros',
                    'dropout': 0.0,
                    'fc_layers': None,
                    'flatten_inputs': False,
                    'norm': None,
                    'norm_params': None,
                    'num_fc_layers': 0,
                    'output_size': 256,
                    'residual': False,
                    'type': 'concat',
                    'use_bias': True,
                    'weights_initializer': 'xavier_uniform'},
    'defaults': {   'audio': {   'preprocessing': {   'audio_file_length_limit_in_s': 7.5,
                                                      'in_memory': True,
                                                      'missing_value_strategy': 'backfill',
                                                      'norm': None,
                                                      'num_fft_points': None,
                                                      'num_filter_bands': 80,
                                                      'padding_value': 0,
                                                      'type': 'fbank',
                                                      'window_length_in_s': 0.04,
                                                      'window_shift_in_s': 0.02,
                                                      'window_type': 'hamming'}},
                    'bag': {   'preprocessing': {   'fill_value': '<UNK>',
                                                    'lowercase': False,
                                                    'missing_value_strategy': 'fill_with_const',
                                                    'most_common': 10000,
                                                    'tokenizer': 'space'}},
                    'binary': {   'preprocessing': {   'missing_value_strategy': 'fill_with_false'}},
                    'category': {   'preprocessing': {   'fill_value': '<UNK>',
                                                         'lowercase': False,
                                                         'missing_value_strategy': 'fill_with_const',
                                                         'most_common': 10000}},
                    'date': {   'preprocessing': {   'datetime_format': None,
                                                     'fill_value': '',
                                                     'missing_value_strategy': 'fill_with_const'}},
                    'h3': {   'preprocessing': {   'fill_value': 576495936675512319,
                                                   'missing_value_strategy': 'fill_with_const'}},
                    'image': {   'preprocessing': {   'in_memory': True,
                                                      'infer_image_dimensions': True,
                                                      'infer_image_max_height': 256,
                                                      'infer_image_max_width': 256,
                                                      'infer_image_num_channels': True,
                                                      'infer_image_sample_size': 100,
                                                      'missing_value_strategy': 'backfill',
                                                      'num_processes': 1,
                                                      'resize_method': 'interpolate',
                                                      'scaling': 'pixel_normalization'}},
                    'number': {   'preprocessing': {   'fill_value': 0,
                                                       'missing_value_strategy': 'fill_with_const',
                                                       'normalization': None}},
                    'sequence': {   'preprocessing': {   'fill_value': '<UNK>',
                                                         'lowercase': False,
                                                         'max_sequence_length': 256,
                                                         'missing_value_strategy': 'fill_with_const',
                                                         'most_common': 20000,
                                                         'padding': 'right',
                                                         'padding_symbol': '<PAD>',
                                                         'tokenizer': 'space',
                                                         'unknown_symbol': '<UNK>',
                                                         'vocab_file': None}},
                    'set': {   'preprocessing': {   'fill_value': '<UNK>',
                                                    'lowercase': False,
                                                    'missing_value_strategy': 'fill_with_const',
                                                    'most_common': 10000,
                                                    'tokenizer': 'space'}},
                    'text': {   'preprocessing': {   'fill_value': '<UNK>',
                                                     'lowercase': True,
                                                     'max_sequence_length': 256,
                                                     'missing_value_strategy': 'fill_with_const',
                                                     'most_common': 20000,
                                                     'padding': 'right',
                                                     'padding_symbol': '<PAD>',
                                                     'pretrained_model_name_or_path': None,
                                                     'tokenizer': 'space_punct',
                                                     'unknown_symbol': '<UNK>',
                                                     'vocab_file': None}},
                    'timeseries': {   'preprocessing': {   'fill_value': '',
                                                           'missing_value_strategy': 'fill_with_const',
                                                           'padding': 'right',
                                                           'padding_value': 0,
                                                           'timeseries_length_limit': 256,
                                                           'tokenizer': 'space'}},
                    'vector': {   'preprocessing': {   'fill_value': '',
                                                       'missing_value_strategy': 'fill_with_const'}}},
    'input_features': [   {   'column': 'Pclass',
                              'name': 'Pclass',
                              'proc_column': 'Pclass_mZFLky',
                              'tied': None,
                              'type': 'category'},
                          {   'column': 'Sex',
                              'name': 'Sex',
                              'proc_column': 'Sex_mZFLky',
                              'tied': None,
                              'type': 'category'},
                          {   'column': 'Age',
                              'name': 'Age',
                              'preprocessing': {   'missing_value_strategy': 'fill_with_mean'},
                              'proc_column': 'Age_DF6VxJ',
                              'tied': None,
                              'type': 'number'},
                          {   'column': 'SibSp',
                              'name': 'SibSp',
                              'proc_column': 'SibSp_mZFLky',
                              'tied': None,
                              'type': 'number'},
                          {   'column': 'Parch',
                              'name': 'Parch',
                              'proc_column': 'Parch_mZFLky',
                              'tied': None,
                              'type': 'number'},
                          {   'column': 'Fare',
                              'name': 'Fare',
                              'preprocessing': {   'missing_value_strategy': 'fill_with_mean'},
                              'proc_column': 'Fare_DF6VxJ',
                              'tied': None,
                              'type': 'number'},
                          {   'column': 'Embarked',
                              'name': 'Embarked',
                              'proc_column': 'Embarked_mZFLky',
                              'tied': None,
                              'type': 'category'}],
    'model_type': 'ecd',
    'output_features': [   {   'column': 'Survived',
                               'decoder': 'generator',
                               'dependencies': [],
                               'loss': {   'class_similarities_temperature': 0,
                                           'class_weights': 1,
                                           'confidence_penalty': 0,
                                           'robust_lambda': 0,
                                           'type': 'sequence_softmax_cross_entropy',
                                           'unique': False,
                                           'weight': 1},
                               'name': 'Survived',
                               'preprocessing': {   'missing_value_strategy': 'drop_row'},
                               'proc_column': 'Survived_mZFLky',
                               'reduce_dependencies': 'sum',
                               'reduce_input': 'sum',
                               'type': 'text'}],
    'preprocessing': {   'oversample_minority': None,
                         'sample_ratio': 1.0,
                         'split': {'probabilities': [0.7, 0, 0.3]},
                         'undersample_majority': None},
    'trainer': {   'batch_size': 128,
                   'checkpoints_per_epoch': 0,
                   'decay': False,
                   'decay_rate': 0.96,
                   'decay_steps': 10000,
                   'early_stop': 3,
                   'epochs': 10,
                   'eval_batch_size': None,
                   'evaluate_training_set': True,
                   'gradient_clipping': {   'clipglobalnorm': 0.5,
                                            'clipnorm': None,
                                            'clipvalue': None},
                   'increase_batch_size_eval_metric': 'loss',
                   'increase_batch_size_eval_split': 'training',
                   'increase_batch_size_on_plateau': 0,
                   'increase_batch_size_on_plateau_max': 512,
                   'increase_batch_size_on_plateau_patience': 5,
                   'increase_batch_size_on_plateau_rate': 2.0,
                   'learning_rate': 0.001,
                   'learning_rate_scaling': 'linear',
                   'learning_rate_warmup_epochs': 1.0,
                   'optimizer': {   'amsgrad': False,
                                    'betas': (0.9, 0.999),
                                    'eps': 1e-08,
                                    'lr': 0.001,
                                    'type': 'adam',
                                    'weight_decay': 0.0},
                   'reduce_learning_rate_eval_metric': 'loss',
                   'reduce_learning_rate_eval_split': 'training',
                   'reduce_learning_rate_on_plateau': 1.0,
                   'reduce_learning_rate_on_plateau_patience': 1,
                   'reduce_learning_rate_on_plateau_rate': 0.5,
                   'regularization_lambda': 0.0,
                   'regularization_type': 'l2',
                   'should_shuffle': True,
                   'staircase': False,
                   'steps_per_checkpoint': 0,
                   'train_steps': None,
                   'type': 'trainer',
                   'validation_field': 'combined',
                   'validation_metric': 'loss'}}

# Define Ludwig model object that drive model training
model = LudwigModel(config=config, logging_level=logging.INFO, backend="local")

# initiate model training
(
    train_stats,  # dictionary containing training statistics
    preprocessed_data,  # tuple Ludwig Dataset objects of pre-processed training data
    output_directory,  # location of training results stored on disk
) = model.train(dataset=training_set, experiment_name="simple_experiment", model_name="simple_model")

# list contents of output directory
print("contents of output directory:", output_directory)
for item in os.listdir(output_directory):
    print("\t", item)

# batch prediction
model.predict(test_set, skip_save_predictions=False)


dataset is:      PassengerId  Survived  Pclass  \
0              1       0.0       3   
1              2       1.0       1   
2              3       1.0       3   
3              4       1.0       1   
4              5       0.0       3   
..           ...       ...     ...   
886          887       0.0       2   
887          888       1.0       1   
888          889       0.0       3   
889          890       1.0       1   
890          891       0.0       3   

                                                  Name     Sex   Age  SibSp  \
0                              Braund, Mr. Owen Harris    male  22.0      1   
1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                               Heikkinen, Miss. Laina  female  26.0      0   
3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                             Allen, Mr. William Henry    male  35.0      0   
..                                                 ...   

E0810 20:43:54.489202000 4384081280 fork_posix.cc:76]                  Other threads are currently calling into gRPC, skipping fork() handlers
E0810 20:43:54.507318000 4384081280 fork_posix.cc:76]                  Other threads are currently calling into gRPC, skipping fork() handlers


ValueError: need at least one array to stack