From: https://www.tensorflow.org/tutorials/structured_data/preprocessing_layers

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow.keras import layers

In [2]:
tf.__version__

'2.8.0'

In [3]:
dataset_url = 'http://storage.googleapis.com/download.tensorflow.org/data/petfinder-mini.zip'
csv_file = 'datasets/petfinder-mini/petfinder-mini.csv'

tf.keras.utils.get_file('petfinder_mini.zip', dataset_url,
                        extract=True, cache_dir='.')
dataframe = pd.read_csv(csv_file)

In [4]:
dataframe.head()

Unnamed: 0,Type,Age,Breed1,Gender,Color1,Color2,MaturitySize,FurLength,Vaccinated,Sterilized,Health,Fee,Description,PhotoAmt,AdoptionSpeed
0,Cat,3,Tabby,Male,Black,White,Small,Short,No,No,Healthy,100,Nibble is a 3+ month old ball of cuteness. He ...,1,2
1,Cat,1,Domestic Medium Hair,Male,Black,Brown,Medium,Medium,Not Sure,Not Sure,Healthy,0,I just found it alone yesterday near my apartm...,2,0
2,Dog,1,Mixed Breed,Male,Brown,White,Medium,Medium,Yes,No,Healthy,0,Their pregnant mother was dumped by her irresp...,7,3
3,Dog,4,Mixed Breed,Female,Black,Brown,Medium,Short,Yes,No,Healthy,150,"Good guard dog, very alert, active, obedience ...",8,2
4,Dog,1,Mixed Breed,Male,Black,No Color,Medium,Short,No,No,Healthy,0,This handsome yet cute boy is up for adoption....,3,2


In [5]:
# In the original dataset, `'AdoptionSpeed'` of `4` indicates
# a pet was not adopted.
dataframe['target'] = np.where(dataframe['AdoptionSpeed']==4, 0, 1)

# Drop unused features.
dataframe = dataframe.drop(columns=['AdoptionSpeed', 'Description'])

In [6]:
train, val, test = np.split(dataframe.sample(frac=1), [int(0.8*len(dataframe)), int(0.9*len(dataframe))])


In [7]:
print(len(train), 'training examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

9229 training examples
1154 validation examples
1154 test examples


In [8]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    df = dataframe.copy()
    labels = df.pop('target')
    df = {key: value[:, tf.newaxis] for key, value in dataframe.items()}
    ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(batch_size)
    return ds

In [9]:
batch_size = 5
train_ds = df_to_dataset(train, batch_size=batch_size)

  df = {key: value[:, tf.newaxis] for key, value in dataframe.items()}
2022-08-16 10:09:52.746270: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
[(train_features, label_batch)] = train_ds.take(1)
print('Every feature:', list(train_features.keys()))
print('A batch of ages:', train_features['Age'])
print('A batch of targets:', label_batch )

Every feature: ['Type', 'Age', 'Breed1', 'Gender', 'Color1', 'Color2', 'MaturitySize', 'FurLength', 'Vaccinated', 'Sterilized', 'Health', 'Fee', 'PhotoAmt', 'target']
A batch of ages: tf.Tensor(
[[12]
 [36]
 [17]
 [60]
 [ 5]], shape=(5, 1), dtype=int64)
A batch of targets: tf.Tensor([1 1 0 1 1], shape=(5,), dtype=int64)


In [11]:
def get_normalization_layer(name, dataset):
    # Create a Normalization layer for the feature.
    normalizer = layers.Normalization(axis=None)

    # Prepare a Dataset that only yields the feature.
    feature_ds = dataset.map(lambda x, y: x[name])

    # Learn the statistics of the data.
    normalizer.adapt(feature_ds)

    return normalizer

In [12]:
photo_count_col = train_features['PhotoAmt']
layer = get_normalization_layer('PhotoAmt', train_ds)
layer(photo_count_col)

<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
array([[-0.1925312],
       [-0.5121148],
       [ 0.446636 ],
       [ 0.446636 ],
       [-0.1925312]], dtype=float32)>

In [13]:
def get_category_encoding_layer(name, dataset, dtype, max_tokens=None):
    # Create a layer that turns strings into integer indices.
    if dtype == 'string':
        index = layers.StringLookup(max_tokens=max_tokens)
    # Otherwise, create a layer that turns integer values into integer indices.
    else:
        index = layers.IntegerLookup(max_tokens=max_tokens)

    # Prepare a `tf.data.Dataset` that only yields the feature.
    feature_ds = dataset.map(lambda x, y: x[name])

    # Learn the set of possible values and assign them a fixed integer index.
    index.adapt(feature_ds)

    # Encode the integer indices.
    encoder = layers.CategoryEncoding(num_tokens=index.vocabulary_size())

    # Apply multi-hot encoding to the indices. The lambda function captures the
    # layer, so you can use them, or include them in the Keras Functional model later.
    return lambda feature: encoder(index(feature))

In [14]:
test_type_col = train_features['Type']
test_type_layer = get_category_encoding_layer(name='Type',
                                              dataset=train_ds,
                                              dtype='string')
test_type_layer(test_type_col)

<tf.Tensor: shape=(5, 3), dtype=float32, numpy=
array([[0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.]], dtype=float32)>

In [15]:
test_age_col = train_features['Age']
test_age_layer = get_category_encoding_layer(name='Age',
                                             dataset=train_ds,
                                             dtype='int64',
                                             max_tokens=5)
test_age_layer(test_age_col)

<tf.Tensor: shape=(5, 5), dtype=float32, numpy=
array([[1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.]], dtype=float32)>

In [16]:
batch_size = 256
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

  df = {key: value[:, tf.newaxis] for key, value in dataframe.items()}
  df = {key: value[:, tf.newaxis] for key, value in dataframe.items()}
  df = {key: value[:, tf.newaxis] for key, value in dataframe.items()}


In [17]:
all_inputs = []
encoded_features = []

# Numerical features.
for header in ['PhotoAmt', 'Fee']:
    numeric_col = tf.keras.Input(shape=(1,), name=header)
    normalization_layer = get_normalization_layer(header, train_ds)
    encoded_numeric_col = normalization_layer(numeric_col)
    all_inputs.append(numeric_col)
    encoded_features.append(encoded_numeric_col)

In [18]:
age_col = tf.keras.Input(shape=(1,), name='Age', dtype='int64')

encoding_layer = get_category_encoding_layer(name='Age',
                                             dataset=train_ds,
                                             dtype='int64',
                                             max_tokens=5)
encoded_age_col = encoding_layer(age_col)
all_inputs.append(age_col)
encoded_features.append(encoded_age_col)

In [19]:
categorical_cols = ['Type', 'Color1', 'Color2', 'Gender', 'MaturitySize',
                    'FurLength', 'Vaccinated', 'Sterilized', 'Health', 'Breed1']

for header in categorical_cols:
    categorical_col = tf.keras.Input(shape=(1,), name=header, dtype='string')
    encoding_layer = get_category_encoding_layer(name=header,
                                                 dataset=train_ds,
                                                 dtype='string',
                                                 max_tokens=5)
    encoded_categorical_col = encoding_layer(categorical_col)
    all_inputs.append(categorical_col)
    encoded_features.append(encoded_categorical_col)

In [20]:
all_features = tf.keras.layers.concatenate(encoded_features)
x = tf.keras.layers.Dense(32, activation="relu")(all_features)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1)(x)

model = tf.keras.Model(all_inputs, output)

In [21]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=["accuracy"])

In [22]:
# Use `rankdir='LR'` to make the graph horizontal.
tf.keras.utils.plot_model(model, show_shapes=True, rankdir="LR")

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [23]:
model.fit(train_ds, epochs=10, validation_data=val_ds)

Epoch 1/10


  inputs = self._flatten_to_reference_inputs(inputs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fd2a85fbb80>

In [24]:
loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy)

Accuracy 0.7400346398353577


In [25]:
model.save('my_pet_classifier')

2022-08-16 10:10:06.792649: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: my_pet_classifier/assets


INFO:tensorflow:Assets written to: my_pet_classifier/assets


In [26]:
reloaded_model = tf.keras.models.load_model('my_pet_classifier')

In [27]:
reloaded_model.inputs

[<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'PhotoAmt')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'Fee')>,
 <KerasTensor: shape=(None, 1) dtype=int64 (created by layer 'Age')>,
 <KerasTensor: shape=(None, 1) dtype=string (created by layer 'Type')>,
 <KerasTensor: shape=(None, 1) dtype=string (created by layer 'Color1')>,
 <KerasTensor: shape=(None, 1) dtype=string (created by layer 'Color2')>,
 <KerasTensor: shape=(None, 1) dtype=string (created by layer 'Gender')>,
 <KerasTensor: shape=(None, 1) dtype=string (created by layer 'MaturitySize')>,
 <KerasTensor: shape=(None, 1) dtype=string (created by layer 'FurLength')>,
 <KerasTensor: shape=(None, 1) dtype=string (created by layer 'Vaccinated')>,
 <KerasTensor: shape=(None, 1) dtype=string (created by layer 'Sterilized')>,
 <KerasTensor: shape=(None, 1) dtype=string (created by layer 'Health')>,
 <KerasTensor: shape=(None, 1) dtype=string (created by layer 'Breed1')>]

In [28]:
sample = {
    'Type': 'Cat',
    'Age': 3,
    'Breed1': 'Tabby',
    'Gender': 'Male',
    'Color1': 'Black',
    'Color2': 'White',
    'MaturitySize': 'Small',
    'FurLength': 'Short',
    'Vaccinated': 'No',
    'Sterilized': 'No',
    'Health': 'Healthy',
    'Fee': 100,
    'PhotoAmt': 2,
}

input_dict = {name: tf.convert_to_tensor([value]) for name, value in sample.items()}
predictions = reloaded_model.predict(input_dict)
prob = tf.nn.sigmoid(predictions[0])

print(
    "This particular pet had a %.1f percent probability "
    "of getting adopted." % (100 * prob)
)

This particular pet had a 82.9 percent probability of getting adopted.


## PySpark

In [29]:
df = spark.createDataFrame(dataframe)

In [30]:
df.write.mode("overwrite").parquet("datasets/petfinder-mini")

22/08/16 10:10:28 WARN TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster UI to ensure that workers are registered and have sufficient resources
22/08/16 10:10:43 WARN TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster UI to ensure that workers are registered and have sufficient resources
                                                                                

In [31]:
df.show()

+----+---+--------------------+------+------+--------+------------+---------+----------+----------+-------+---+--------+------+
|Type|Age|              Breed1|Gender|Color1|  Color2|MaturitySize|FurLength|Vaccinated|Sterilized| Health|Fee|PhotoAmt|target|
+----+---+--------------------+------+------+--------+------------+---------+----------+----------+-------+---+--------+------+
| Cat|  3|               Tabby|  Male| Black|   White|       Small|    Short|        No|        No|Healthy|100|       1|     1|
| Cat|  1|Domestic Medium Hair|  Male| Black|   Brown|      Medium|   Medium|  Not Sure|  Not Sure|Healthy|  0|       2|     1|
| Dog|  1|         Mixed Breed|  Male| Brown|   White|      Medium|   Medium|       Yes|        No|Healthy|  0|       7|     1|
| Dog|  4|         Mixed Breed|Female| Black|   Brown|      Medium|    Short|       Yes|        No|Healthy|150|       8|     1|
| Dog|  1|         Mixed Breed|  Male| Black|No Color|      Medium|    Short|        No|        No|Healt

## Inference using Spark ML Model

In [32]:
import sparkext

In [33]:
df = spark.read.parquet("datasets/petfinder-mini")

In [34]:
df.show()

+----+---+--------------------+------+------+--------+------------+---------+----------+----------+-------+---+--------+------+
|Type|Age|              Breed1|Gender|Color1|  Color2|MaturitySize|FurLength|Vaccinated|Sterilized| Health|Fee|PhotoAmt|target|
+----+---+--------------------+------+------+--------+------------+---------+----------+----------+-------+---+--------+------+
| Cat|  3|               Tabby|  Male| Black|   White|       Small|    Short|        No|        No|Healthy|100|       1|     1|
| Cat|  1|Domestic Medium Hair|  Male| Black|   Brown|      Medium|   Medium|  Not Sure|  Not Sure|Healthy|  0|       2|     1|
| Dog|  1|         Mixed Breed|  Male| Brown|   White|      Medium|   Medium|       Yes|        No|Healthy|  0|       7|     1|
| Dog|  4|         Mixed Breed|Female| Black|   Brown|      Medium|    Short|       Yes|        No|Healthy|150|       8|     1|
| Dog|  1|         Mixed Breed|  Male| Black|No Color|      Medium|    Short|        No|        No|Healt

In [35]:
columns = df.columns
print(columns)

['Type', 'Age', 'Breed1', 'Gender', 'Color1', 'Color2', 'MaturitySize', 'FurLength', 'Vaccinated', 'Sterilized', 'Health', 'Fee', 'PhotoAmt', 'target']


In [36]:
# remove label column
columns.remove("target")
print(columns)

['Type', 'Age', 'Breed1', 'Gender', 'Color1', 'Color2', 'MaturitySize', 'FurLength', 'Vaccinated', 'Sterilized', 'Health', 'Fee', 'PhotoAmt']


In [37]:
import sparkext.tensorflow
my_model = sparkext.tensorflow.Model("my_pet_classifier") \
                .setInputCols(columns) \
                .setOutputCol("pred")

In [38]:
%%time
predictions = my_model.transform(df)
results = predictions.collect()

Loading model on driver from my_pet_classifier
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Age (InputLayer)               [(None, 1)]          0           []                               
                                                                                                  
 Type (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 Color1 (InputLayer)            [(None, 1)]          0           []                               
                                                                                                  
 Color2 (InputLayer)            [(None, 1)]          0           []                               
                                               



INFO:tensorflow:Assets written to: ram://6a259b38-a773-4c88-ae86-88e34d4bf373/assets


INFO:tensorflow:Assets written to: ram://6a259b38-a773-4c88-ae86-88e34d4bf373/assets
22/08/16 10:10:54 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.

CPU times: user 4.1 s, sys: 0 ns, total: 4.1 s
Wall time: 10.3 s


                                                                                

In [39]:
predictions.show()

[Stage 5:>                                                          (0 + 1) / 1]

+----+---+--------------------+------+------+--------+------------+---------+----------+----------+-------+---+--------+------+--------------+
|Type|Age|              Breed1|Gender|Color1|  Color2|MaturitySize|FurLength|Vaccinated|Sterilized| Health|Fee|PhotoAmt|target|          pred|
+----+---+--------------------+------+------+--------+------------+---------+----------+----------+-------+---+--------+------+--------------+
| Cat|  3|               Tabby|  Male| Black|   White|       Small|    Short|        No|        No|Healthy|100|       1|     1|   [1.4765713]|
| Cat|  1|Domestic Medium Hair|  Male| Black|   Brown|      Medium|   Medium|  Not Sure|  Not Sure|Healthy|  0|       2|     1|   [1.2110845]|
| Dog|  1|         Mixed Breed|  Male| Brown|   White|      Medium|   Medium|       Yes|        No|Healthy|  0|       7|     1|   [2.2120945]|
| Dog|  4|         Mixed Breed|Female| Black|   Brown|      Medium|    Short|       Yes|        No|Healthy|150|       8|     1|   [0.5492994]|

                                                                                

## Inference using Spark DL UDF

### Spark DataFrame column names match model input names

In [40]:
from pyspark.sql.functions import struct
from sparkext.tensorflow import model_udf

In [41]:
df = spark.read.parquet("datasets/petfinder-mini")

In [42]:
# df.show()

In [43]:
columns = df.columns
print(columns)

['Type', 'Age', 'Breed1', 'Gender', 'Color1', 'Color2', 'MaturitySize', 'FurLength', 'Vaccinated', 'Sterilized', 'Health', 'Fee', 'PhotoAmt', 'target']


In [44]:
# remove label column
columns.remove("target")
print(columns)

['Type', 'Age', 'Breed1', 'Gender', 'Color1', 'Color2', 'MaturitySize', 'FurLength', 'Vaccinated', 'Sterilized', 'Health', 'Fee', 'PhotoAmt']


In [45]:
# need to pass the list of columns into the model_udf
classify = model_udf("my_pet_classifier", input_columns=columns, batch_size=-1)

Loading model on driver from my_pet_classifier
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Age (InputLayer)               [(None, 1)]          0           []                               
                                                                                                  
 Type (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 Color1 (InputLayer)            [(None, 1)]          0           []                               
                                                                                                  
 Color2 (InputLayer)            [(None, 1)]          0           []                               
                                               

In [46]:
df.withColumn("preds", classify(struct(*columns))).show(truncate=10)



INFO:tensorflow:Assets written to: ram://002740fd-d4ff-44e5-bccd-75ac2ef5faf9/assets


INFO:tensorflow:Assets written to: ram://002740fd-d4ff-44e5-bccd-75ac2ef5faf9/assets
[Stage 7:>                                                          (0 + 1) / 1]

+----+---+----------+------+------+--------+------------+---------+----------+----------+-------+---+--------+------+----------+
|Type|Age|    Breed1|Gender|Color1|  Color2|MaturitySize|FurLength|Vaccinated|Sterilized| Health|Fee|PhotoAmt|target|     preds|
+----+---+----------+------+------+--------+------------+---------+----------+----------+-------+---+--------+------+----------+
| Cat|  3|     Tabby|  Male| Black|   White|       Small|    Short|        No|        No|Healthy|100|       1|     1|[1.4765...|
| Cat|  1|Domesti...|  Male| Black|   Brown|      Medium|   Medium|  Not Sure|  Not Sure|Healthy|  0|       2|     1|[1.2110...|
| Dog|  1|Mixed B...|  Male| Brown|   White|      Medium|   Medium|       Yes|        No|Healthy|  0|       7|     1|[2.2120...|
| Dog|  4|Mixed B...|Female| Black|   Brown|      Medium|    Short|       Yes|        No|Healthy|150|       8|     1|[0.5492...|
| Dog|  1|Mixed B...|  Male| Black|No Color|      Medium|    Short|        No|        No|Healthy|

                                                                                

In [47]:
%%time
results = df.withColumn("preds", classify(struct(*columns))).collect()

[Stage 8:>                                                          (0 + 2) / 2]

CPU times: user 98.4 ms, sys: 0 ns, total: 98.4 ms
Wall time: 4.39 s


                                                                                

### Simulate Spark DataFrame column names not matching model input names

In [48]:
from pyspark.sql.functions import struct
from sparkext.tensorflow import model_udf

In [49]:
df = spark.read.parquet("datasets/petfinder-mini")

In [50]:
df2 = df.withColumnRenamed("Type", "species") \
        .withColumnRenamed("Age", "years") \
        .withColumnRenamed("Breed1", "breed") \
        .withColumnRenamed("Gender", "sex") \
        .withColumnRenamed("Color1", "main_color") \
        .withColumnRenamed("Color2", "secondary_color") \
        .withColumnRenamed("MaturitySize", "full_size") \
        .withColumnRenamed("FurLength", "fur_length") \
        .withColumnRenamed("Vaccinated", "immunized") \
        .withColumnRenamed("Sterilized", "spayed") \
        .withColumnRenamed("Health", "health") \
        .withColumnRenamed("Fee", "cost") \
        .withColumnRenamed("PhotoAmt","photos") \
        .withColumnRenamed("target", "adopted")
df2.show(truncate=10)

+-------+-----+----------+------+----------+---------------+---------+----------+---------+--------+-------+----+------+-------+
|species|years|     breed|   sex|main_color|secondary_color|full_size|fur_length|immunized|  spayed| health|cost|photos|adopted|
+-------+-----+----------+------+----------+---------------+---------+----------+---------+--------+-------+----+------+-------+
|    Cat|    3|     Tabby|  Male|     Black|          White|    Small|     Short|       No|      No|Healthy| 100|     1|      1|
|    Cat|    1|Domesti...|  Male|     Black|          Brown|   Medium|    Medium| Not Sure|Not Sure|Healthy|   0|     2|      1|
|    Dog|    1|Mixed B...|  Male|     Brown|          White|   Medium|    Medium|      Yes|      No|Healthy|   0|     7|      1|
|    Dog|    4|Mixed B...|Female|     Black|          Brown|   Medium|     Short|      Yes|      No|Healthy| 150|     8|      1|
|    Dog|    1|Mixed B...|  Male|     Black|       No Color|   Medium|     Short|       No|      

In [51]:
spark_columns = df2.columns
spark_columns.remove('adopted')
spark_columns

['species',
 'years',
 'breed',
 'sex',
 'main_color',
 'secondary_color',
 'full_size',
 'fur_length',
 'immunized',
 'spayed',
 'health',
 'cost',
 'photos']

In [52]:
# User must provide ordered list of equivalent model input names 
model_columns = ['Type', 'Age', 'Breed1', 'Gender', 'Color1', 'Color2', 'MaturitySize', 'FurLength', 'Vaccinated', 'Sterilized', 'Health', 'Fee', 'PhotoAmt']
model_columns

['Type',
 'Age',
 'Breed1',
 'Gender',
 'Color1',
 'Color2',
 'MaturitySize',
 'FurLength',
 'Vaccinated',
 'Sterilized',
 'Health',
 'Fee',
 'PhotoAmt']

In [53]:
# pass the list of model input names into the `model_udf` helper
classify = model_udf("my_pet_classifier", input_columns=model_columns)

Loading model on driver from my_pet_classifier
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Age (InputLayer)               [(None, 1)]          0           []                               
                                                                                                  
 Type (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 Color1 (InputLayer)            [(None, 1)]          0           []                               
                                                                                                  
 Color2 (InputLayer)            [(None, 1)]          0           []                               
                                               

In [54]:
# pass the list of Spark columns as the Spark SQL selectors
df2.withColumn("preds", classify(struct(*spark_columns))).show(truncate=10)



INFO:tensorflow:Assets written to: ram://ef6ed5ee-b21b-4ca7-82c8-e4ae31cedeb3/assets


INFO:tensorflow:Assets written to: ram://ef6ed5ee-b21b-4ca7-82c8-e4ae31cedeb3/assets
[Stage 11:>                                                         (0 + 1) / 1]

+-------+-----+----------+------+----------+---------------+---------+----------+---------+--------+-------+----+------+-------+----------+
|species|years|     breed|   sex|main_color|secondary_color|full_size|fur_length|immunized|  spayed| health|cost|photos|adopted|     preds|
+-------+-----+----------+------+----------+---------------+---------+----------+---------+--------+-------+----+------+-------+----------+
|    Cat|    3|     Tabby|  Male|     Black|          White|    Small|     Short|       No|      No|Healthy| 100|     1|      1|[1.4765...|
|    Cat|    1|Domesti...|  Male|     Black|          Brown|   Medium|    Medium| Not Sure|Not Sure|Healthy|   0|     2|      1|[1.2110...|
|    Dog|    1|Mixed B...|  Male|     Brown|          White|   Medium|    Medium|      Yes|      No|Healthy|   0|     7|      1|[2.2120...|
|    Dog|    4|Mixed B...|Female|     Black|          Brown|   Medium|     Short|      Yes|      No|Healthy| 150|     8|      1|[0.5492...|
|    Dog|    1|Mixed

                                                                                