In [0]:
import pathlib
import numpy as np
import pandas as pd
import tensorflow as tf
tf.random.set_seed(123)
import keras
from sklearn.model_selection import train_test_split

In [43]:
path = keras.utils.get_file("abalone.data", "https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data")
path

'/root/.keras/datasets/abalone.data'

In [139]:
dataset = pd.read_csv(path, header=None,names=['Sex','Length','Diameter','Height','Whole_weight','Shucked_weight','Viscera_weight','Shell_weight','Rings']
                      ,na_values = "?", comment='\t',
                      sep=",", skipinitialspace=True)
dataset.head(5)

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [0]:
dataset['Rings']=dataset['Rings'].apply(lambda x: 0 if x<=14 else 1)
y=dataset.pop('Rings')
X=dataset.copy()

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [142]:
numerical_columns=['Length','Diameter','Height','Whole_weight','Shucked_weight','Viscera_weight','Shell_weight']
categorical_columns=['Sex']

feature_columns=[]

for value in numerical_columns:
  feature_columns.append(tf.feature_column.numeric_column(value,dtype=tf.float32))

for value in categorical_columns:
  unique_values=dataset[value].unique()
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(value,unique_values))

feature_columns

[NumericColumn(key='Length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Diameter', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Height', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Whole_weight', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Shucked_weight', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Viscera_weight', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Shell_weight', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 VocabularyListCategoricalColumn(key='Sex', vocabulary_list=('M', 'F', 'I'), dtype=tf.string, default_value=-1, num_oov_buckets=0)]

In [0]:
def dataframe_to_tf(X,y,shuffle=True):
  def input_function():
    dataset=tf.data.Dataset.from_tensor_slices((dict(X),y))
    dataset=dataset.shuffle(1000)
    dataset=dataset.batch(32).repeat(10)
    return dataset
  return input_function

In [0]:
training_data=dataframe_to_tf(X_train,y_train)
validation_data=dataframe_to_tf(X_test,y_test)

In [145]:
for x,y in dataframe_to_tf(X_train,y_train)().take(1):
  print('A batch of Length:', x['Length'].numpy())

A batch of Length: [0.59  0.56  0.29  0.505 0.515 0.54  0.675 0.54  0.495 0.52  0.33  0.64
 0.445 0.655 0.705 0.465 0.39  0.63  0.615 0.395 0.6   0.5   0.415 0.63
 0.665 0.21  0.55  0.245 0.415 0.685 0.46  0.615]


In [146]:
linear_classifier = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_classifier.train(training_data)
classifier_result = linear_classifier.evaluate(validation_data)
print(pd.Series(classifier_result))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp9hw_71qk', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set

In [147]:
BoostedTreesClassifier = tf.estimator.BoostedTreesClassifier(feature_columns,n_batches_per_layer=1)
BoostedTreesClassifier.train(training_data, max_steps=100)
BoostedTreesClassifier_result = BoostedTreesClassifier.evaluate(validation_data)
print(pd.Series(BoostedTreesClassifier_result))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmps0j_0wsl', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
