# tensorflow pistachio


In [1]:
import tensorflow as tf
print(tf.__version__)

2024-04-03 18:00:46.580148: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.16.1


## arff to csv





In [2]:
import pandas as pd 
from scipy.io import arff
import os 
label_mapping = {'Kirmizi_Pistachio': 0, 'Siit_Pistachio': 1}

def load_arff_file(input_arff: str) -> pd.DataFrame:
    """convert arff file to parquet"""
    if not os.path.exists(input_arff):
        raise ValueError(f"input file '{input_arff}' does not exist")
    print(f'loading arff file {input_arff}')
    data, meta = arff.loadarff(input_arff)
    print(f"arff metadata: {meta}")
    df = pd.DataFrame(data)
    df['Class'] = df['Class'].astype(str).map(label_mapping)
    
    return df
##################
arff_filename = './data/Pistachio_16_Features_Dataset.arff'
csv_filename = './data/pistachio_16.csv'
if not os.path.exists(csv_filename):
    df = load_arff_file(arff_filename)
    df.head()
    df.to_csv(csv_filename, index=False, header=True)
    print(f'wrote file to {csv_filename}')
else:
    print(f'{csv_filename} exists')


./data/pistachio_16.csv exists


## dataset


In [10]:
import numpy as np
def split_csv_data(infilename: str, train_filename: str, test_filename:str, test_fraction: float):
    df = pd.read_csv(infilename, header=0)
    columns = df.columns
    df['split_var'] = np.random.uniform(size=len(df))
    train_df = df.loc[df.split_var <= test_fraction][columns]
    test_df = df.loc[df.split_var > test_fraction][columns]
    train_df.to_csv(train_filename, index=False, header=True)
    test_df.to_csv(test_filename, index=False, header=True)
    print(f'wrote {len(train_df)} records to {train_filename}')
    print(f'wrote {len(test_df)} records to {test_filename}')
    
train_filename = './data/pistachio_train.csv'
test_filename = './data/pistachio_test.csv'

if not (os.path.exists(train_filename) and os.path.exists(test_filename)):
    split_csv_data(csv_filename, train_filename, test_filename, 0.2)
else:
    print(f'{train_filename} and {test_filename} exist')






./data/pistachio_train.csv and ./data/pistachio_test.csv exist


In [11]:
pistachio_train_batches = tf.data.experimental.make_csv_dataset(
    train_filename, batch_size=4,
    num_epochs=1,
    label_name="Class")
pistachio_test_batches = tf.data.experimental.make_csv_dataset(
    test_filename, batch_size=4,
    num_epochs=1,
    label_name="Class")

In [20]:
# batch = 0
for feature_batch, label_batch in pistachio_train_batches.take(2):
    # print(f'{batch}, {label_batch.shape}')
    # cat_batch = tf.stack([feature_batch['AREA'],feature_batch['PERIMETER']],axis=1)
    cat_batch = tf.stack([feature_batch[k] for k in feature_batch],axis=1)

    # batch += 1
    
    print("'label': {}".format(label_batch))
    print(cat_batch)
    print("features:")
    for key, value in feature_batch.items():
        print("  {!r:20s}: {}".format(key, value))

'label': [1 0 1 0]
tf.Tensor(
[[7.304000e+04 2.473311e+03 4.656840e+02 3.026644e+02 7.600000e-01
  3.049548e+02 6.975000e-01 1.047200e+05 5.385000e-01 1.538600e+00
  1.500000e-01 6.549000e-01 6.400000e-03 4.100000e-03 4.288000e-01
  6.598000e-01]
 [6.404600e+04 1.431364e+03 4.099027e+02 2.070709e+02 8.630000e-01
  2.855624e+02 9.182000e-01 6.975500e+04 6.562000e-01 1.979500e+00
  3.928000e-01 6.967000e-01 6.400000e-03 3.200000e-03 4.853000e-01
  9.607000e-01]
 [8.204300e+04 1.477527e+03 4.592950e+02 2.305736e+02 8.649000e-01
  3.232033e+02 9.653000e-01 8.499200e+04 7.758000e-01 1.992000e+00
  4.723000e-01 7.037000e-01 5.600000e-03 2.800000e-03 4.952000e-01
  9.864000e-01]
 [9.334000e+04 1.290415e+03 5.118364e+02 2.356817e+02 8.877000e-01
  3.447379e+02 9.710000e-01 9.612300e+04 7.512000e-01 2.171700e+00
  7.044000e-01 6.735000e-01 5.500000e-03 2.500000e-03 4.536000e-01
  9.852000e-01]], shape=(4, 16), dtype=float32)
features:
  'AREA'              : [73040. 64046. 82043. 93340.]
  'PER

2024-04-03 19:33:13.176990: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


## Model

In [5]:
from tensorflow.keras.layers import Dense, Flatten, Conv2D, BatchNormalization
from tensorflow.keras import Model

class PistachioModel(Model):
  def __init__(self):
    super().__init__()
    self.bn = BatchNormalization()
    self.d1 = Dense(10, activation='relu')
    self.d2 = Dense(10)
    self.lout = Dense(1, activation='sigmoid')

  def call(self, x):
    x = self.bn(x)
    x = self.d1(x)
    x = self.d2(x)
    return self.lout(x)

# Create an instance of the model
model = PistachioModel()

## Keras model.fit api

In [14]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
              metrics=['accuracy', 'auc'])







In [15]:
model.fit(pistachio_train_batches, epochs=10)

Epoch 1/10


1. The `call()` method of your layer may be crashing. Try to `__call__()` the layer eagerly on some test input first to see if it works. E.g. `x = np.random.random((3, 4)); y = layer(x)`
2. If the `call()` method is correct, then you may need to implement the `def build(self, input_shape)` method on your layer. It should create all variables used by the layer (e.g. by calling `layer.build()` on all its children layers).
Exception encoutered: ''-1''


KeyError: "Exception encountered when calling PistachioModel.call().\n\n\x1b[1m-1\x1b[0m\n\nArguments received by PistachioModel.call():\n  • x=OrderedDict([('AREA', 'tf.Tensor(shape=(None,), dtype=float32)'), ('PERIMETER', 'tf.Tensor(shape=(None,), dtype=float32)'), ('MAJOR_AXIS', 'tf.Tensor(shape=(None,), dtype=float32)'), ('MINOR_AXIS', 'tf.Tensor(shape=(None,), dtype=float32)'), ('ECCENTRICITY', 'tf.Tensor(shape=(None,), dtype=float32)'), ('EQDIASQ', 'tf.Tensor(shape=(None,), dtype=float32)'), ('SOLIDITY', 'tf.Tensor(shape=(None,), dtype=float32)'), ('CONVEX_AREA', 'tf.Tensor(shape=(None,), dtype=float32)'), ('EXTENT', 'tf.Tensor(shape=(None,), dtype=float32)'), ('ASPECT_RATIO', 'tf.Tensor(shape=(None,), dtype=float32)'), ('ROUNDNESS', 'tf.Tensor(shape=(None,), dtype=float32)'), ('COMPACTNESS', 'tf.Tensor(shape=(None,), dtype=float32)'), ('SHAPEFACTOR_1', 'tf.Tensor(shape=(None,), dtype=float32)'), ('SHAPEFACTOR_2', 'tf.Tensor(shape=(None,), dtype=float32)'), ('SHAPEFACTOR_3', 'tf.Tensor(shape=(None,), dtype=float32)'), ('SHAPEFACTOR_4', 'tf.Tensor(shape=(None,), dtype=float32)')])"

In [None]:
## custom training loop stuff

In [None]:
@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    # training=True is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(images, training=True)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss)
  train_accuracy(labels, predictions)

In [None]:
@tf.function
def test_step(images, labels):
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
  predictions = model(images, training=False)
  t_loss = loss_object(labels, predictions)

  test_loss(t_loss)
  test_accuracy(labels, predictions)

In [None]:

for epoch in range(EPOCHS):
  # Reset the metrics at the start of the next epoch
  train_loss.reset_state()
  train_accuracy.reset_state()
  test_loss.reset_state()
  test_accuracy.reset_state()

  for images, labels in train_ds:
    train_step(images, labels)

  for test_images, test_labels in test_ds:
    test_step(test_images, test_labels)

  print(
    f'Epoch {epoch + 1}, '
    f'Loss: {train_loss.result():0.2f}, '
    f'Accuracy: {train_accuracy.result() * 100:0.2f}, '
    f'Test Loss: {test_loss.result():0.2f}, '
    f'Test Accuracy: {test_accuracy.result() * 100:0.2f}'
  )