From 158d2f8258ffd5dfd2115f80ee80fff425411a65 Mon Sep 17 00:00:00 2001 From: Daniel Sparing Date: Thu, 4 Nov 2021 11:30:50 -0500 Subject: [PATCH 1/2] expand_dims to correctly test multi-hot Without expanding dimensions, multi-hot encoded columns are not correctly illustrated (they will be parsed as single-record multi-label, not single-label multi-record). (Another question is whether we need multi-hot for these columns instead of one-hot, as they are not multi-label?) --- site/en/tutorials/structured_data/preprocessing_layers.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/site/en/tutorials/structured_data/preprocessing_layers.ipynb b/site/en/tutorials/structured_data/preprocessing_layers.ipynb index a94fdaf0c54..1ba2212bc63 100644 --- a/site/en/tutorials/structured_data/preprocessing_layers.ipynb +++ b/site/en/tutorials/structured_data/preprocessing_layers.ipynb @@ -502,7 +502,7 @@ "test_type_layer = get_category_encoding_layer(name='Type',\n", " dataset=train_ds,\n", " dtype='string')\n", - "test_type_layer(test_type_col)" + "test_type_layer(tf.expand_dims(test_type_col, -1))" ] }, { @@ -527,7 +527,7 @@ " dataset=train_ds,\n", " dtype='int64',\n", " max_tokens=5)\n", - "test_age_layer(test_age_col)" + "test_age_layer(tf.expand_dims(test_age_col, -1))" ] }, { From 964bed4013f49c0c2e6e7b6e386feb48860aa84b Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Thu, 4 Nov 2021 10:00:26 -0700 Subject: [PATCH 2/2] Apply the expand to the dataframe instead. --- .../structured_data/preprocessing_layers.ipynb | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/site/en/tutorials/structured_data/preprocessing_layers.ipynb b/site/en/tutorials/structured_data/preprocessing_layers.ipynb index 1ba2212bc63..65762628eb5 100644 --- a/site/en/tutorials/structured_data/preprocessing_layers.ipynb +++ b/site/en/tutorials/structured_data/preprocessing_layers.ipynb @@ -295,9 +295,10 @@ "outputs": [], "source": [ "def df_to_dataset(dataframe, shuffle=True, batch_size=32):\n", - " dataframe = dataframe.copy()\n", - " labels = dataframe.pop('target')\n", - " ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))\n", + " df = dataframe.copy()\n", + " labels = df.pop('target')\n", + " df = {key: value[:,tf.newaxis] for key, value in dataframe.items()}\n", + " ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))\n", " if shuffle:\n", " ds = ds.shuffle(buffer_size=len(dataframe))\n", " ds = ds.batch(batch_size)\n", @@ -502,7 +503,7 @@ "test_type_layer = get_category_encoding_layer(name='Type',\n", " dataset=train_ds,\n", " dtype='string')\n", - "test_type_layer(tf.expand_dims(test_type_col, -1))" + "test_type_layer(test_type_col)" ] }, { @@ -527,7 +528,7 @@ " dataset=train_ds,\n", " dtype='int64',\n", " max_tokens=5)\n", - "test_age_layer(tf.expand_dims(test_age_col, -1))" + "test_age_layer(test_age_col)" ] }, {