In [76]:
import numpy as np
import pandas as pd
import tensorflow as tf
import ast

data_path = "datasets/example.csv"
model_dir = "models"

data_train = pd.read_csv(data_path)
data_train['symptoms'] = data_train['symptoms'].apply(lambda x: ast.literal_eval(x))
data_train['symptoms'] = tf.keras.preprocessing.sequence.pad_sequences(data_train['symptoms'])
y_train = data_train.pop("diagnosis")

CATEG_COLUMNS = ["gender"]
NUM_COLUMNS = ["age", "chol", "bp"]

In [77]:
def one_hot_cat_column(feature_name, vocab):
    return tf.feature_column.indicator_column(
        tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocab))

In [78]:
feature_columns = []
for feature_name in CATEG_COLUMNS:
    vocab = data_train[feature_name].unique()
    feature_columns.append(one_hot_cat_column(feature_name, vocab))
    
for feature_name in NUM_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int32))

symptoms = tf.feature_column.sequence_categorical_column_with_identity("symptoms", num_buckets=5000)
#symptoms_emb = tf.feature_column.embedding_column(symptoms, dimension=1)
#symptoms_seq = tf.keras.experimental.SequenceFeatures(symptoms_emb)
feature_columns.append(symptoms)

In [82]:
def make_input(x, y, n_epochs=None, shuffle=True):
    def input_fn():
        dataset = tf.data.Dataset.from_tensor_slices((dict(x), y))
        if shuffle:
            dataset = dataset.shuffle(len(y_train))
        dataset = dataset.repeat(n_epochs)
        dataset = dataset.batch(len(y_train))
        return dataset
    return input_fn

In [83]:
train_input = make_input(data_train, y_train)

In [None]:
linear_est = tf.estimator.BoostedTreesClassifier(feature_columns, n_classes=5000, n_batches_per_layer=1, train_in_memory=True)

model = linear_est.train(train_input, max_steps=100)



INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp9fc0wcvm', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.


In [74]:
result = model.evaluate(train_input, steps=10)

for key, value in result.items():
  print(key, ":", value)


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-03-15T12:19:40Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpl67jkq6d/model.ckpt-100
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/10]
INFO:tensorflow:Evaluation [2/10]
INFO:tensorflow:Evaluation [3/10]
INFO:tensorflow:Evaluation [4/10]
INFO:tensorflow:Evaluation [5/10]
INFO:tensorflow:Evaluation [6/10]
INFO:tensorflow:Evaluation [7/10]
INFO:tensorflow:Evaluation [8/10]
INFO:tensorflow:Evaluation [9/10]
INFO:tensorflow:Evaluation [10/10]
INFO:tensorflow:Inference Time : 28.07620s
INFO:tensorflow:Finished evaluation at 2021-03-15-12:20:08
INFO:tensorflow:Saving dict for global step 100: accuracy = 0.001021645, average_loss = 7.312817, global_step = 100, loss = 7.3128176
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 100: /tmp/tmpl67jkq6d/model.ckp