# tensorflow pistachio


In [3]:
import tensorflow as tf
print(tf.__version__)

2024-04-02 19:16:01.273741: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.16.1


## arff to csv





In [4]:
import pandas as pd 
from scipy.io import arff
import os 
label_mapping = {'Kirmizi_Pistachio': 0, 'Siit_Pistachio': 1}

def load_arff_file(input_arff: str) -> pd.DataFrame:
    """convert arff file to parquet"""
    if not os.path.exists(input_arff):
        raise ValueError(f"input file '{input_arff}' does not exist")
    print(f'loading arff file {input_arff}')
    data, meta = arff.loadarff(input_arff)
    print(f"arff metadata: {meta}")
    df = pd.DataFrame(data)
    df['Class'] = df['Class'].astype(str).map(label_mapping)
    
    return df
##################
arff_filename = './data/Pistachio_16_Features_Dataset.arff'
csv_filename = './data/pistachio_16.csv'
if not os.path.exists(csv_filename):
    df = load_arff_file(arff_filename)
    df.head()
    df.to_csv(csv_filename, index=False, header=True)
    print(f'wrote file to {csv_filename}')
else:
    print(f'{csv_filename} exists')


./data/pistachio_16.csv exists


In [None]:
## dataset

In [5]:
pistachio_batches = tf.data.experimental.make_csv_dataset(
    csv_filename, batch_size=4,
    num_epochs=1,
    label_name="Class")




In [6]:
# batch = 0
for feature_batch, label_batch in pistachio_batches.take(2):
    # print(f'{batch}, {label_batch.shape}')
    # batch += 1
    
    print("'label': {}".format(label_batch))
    print("features:")
    for key, value in feature_batch.items():
        print("  {!r:20s}: {}".format(key, value))

'label': [1 0 1 0]
features:
  'AREA'              : [99923. 72612. 93715. 69816.]
  'PERIMETER'         : [1296.055 1075.491 1297.    1637.181]
  'MAJOR_AXIS'        : [455.4228 431.4258 495.952  398.5653]
  'MINOR_AXIS'        : [280.7125 216.4973 242.0692 230.7142]
  'ECCENTRICITY'      : [0.7875 0.865  0.8728 0.8154]
  'EQDIASQ'           : [356.6874 304.06   345.4297 298.1484]
  'SOLIDITY'          : [0.9871 0.9797 0.9788 0.9256]
  'CONVEX_AREA'       : [101229.  74117.  95748.  75427.]
  'EXTENT'            : [0.7286 0.8003 0.8001 0.7451]
  'ASPECT_RATIO'      : [1.6224 1.9928 2.0488 1.7275]
  'ROUNDNESS'         : [0.7475 0.7889 0.7001 0.3273]
  'COMPACTNESS'       : [0.7832 0.7048 0.6965 0.7481]
  'SHAPEFACTOR_1'     : [0.0046 0.0059 0.0053 0.0057]
  'SHAPEFACTOR_2'     : [0.0028 0.003  0.0026 0.0033]
  'SHAPEFACTOR_3'     : [0.6134 0.4967 0.4851 0.5596]
  'SHAPEFACTOR_4'     : [0.9952 0.9898 0.9939 0.9667]
'label': [1 1 0 0]
features:
  'AREA'              : [100291.  88947.  

2024-04-02 19:16:06.651189: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


## Model

In [3]:
from tensorflow.keras.layers import Dense, Flatten, Conv2D, BatchNormalization
from tensorflow.keras import Model

class PistachioModel(Model):
  def __init__(self):
    super().__init__()
    self.bn = BatchNormalization()
    self.d1 = Dense(10, activation='relu')
    self.d2 = Dense(10)
    self.lout = Dense(1, activation='sigmoid')

  def call(self, x):
    x = self.bn(x)
    x = self.d1(x)
    x = self.d2(x)
    return self.lout(x)

# Create an instance of the model
model = PistachioModel()

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    # training=True is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(images, training=True)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss)
  train_accuracy(labels, predictions)

In [None]:
@tf.function
def test_step(images, labels):
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
  predictions = model(images, training=False)
  t_loss = loss_object(labels, predictions)

  test_loss(t_loss)
  test_accuracy(labels, predictions)

In [None]:

for epoch in range(EPOCHS):
  # Reset the metrics at the start of the next epoch
  train_loss.reset_state()
  train_accuracy.reset_state()
  test_loss.reset_state()
  test_accuracy.reset_state()

  for images, labels in train_ds:
    train_step(images, labels)

  for test_images, test_labels in test_ds:
    test_step(test_images, test_labels)

  print(
    f'Epoch {epoch + 1}, '
    f'Loss: {train_loss.result():0.2f}, '
    f'Accuracy: {train_accuracy.result() * 100:0.2f}, '
    f'Test Loss: {test_loss.result():0.2f}, '
    f'Test Accuracy: {test_accuracy.result() * 100:0.2f}'
  )