# tensorflow pistachio


In [1]:
import tensorflow as tf
print(tf.__version__)

2024-04-30 02:03:21.324534: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.16.1


## arff to csv





In [2]:
import pandas as pd 
from scipy.io import arff
import os 
label_mapping = {'Kirmizi_Pistachio': 0, 'Siit_Pistachio': 1}

def load_arff_file(input_arff: str) -> pd.DataFrame:
    """convert arff file to parquet"""
    if not os.path.exists(input_arff):
        raise ValueError(f"input file '{input_arff}' does not exist")
    print(f'loading arff file {input_arff}')
    data, meta = arff.loadarff(input_arff)
    print(f"arff metadata: {meta}")
    df = pd.DataFrame(data)
    df['Class'] = df['Class'].astype(str).map(label_mapping)
    
    return df
##################

arff_filename = './data/Pistachio_16_Features_Dataset.arff'
csv_filename = './data/pistachio_16.csv'
if not os.path.exists(csv_filename):
    df = load_arff_file(arff_filename)
    df.head()
    df.to_csv(csv_filename, index=False, header=True)
    print(f'wrote file to {csv_filename}')
else:
    print(f'{csv_filename} exists')


loading arff file ./data/Pistachio_16_Features_Dataset.arff
arff metadata: Dataset: Pistachio_16_Features_Dataset
	AREA's type is numeric
	PERIMETER's type is numeric
	MAJOR_AXIS's type is numeric
	MINOR_AXIS's type is numeric
	ECCENTRICITY's type is numeric
	EQDIASQ's type is numeric
	SOLIDITY's type is numeric
	CONVEX_AREA's type is numeric
	EXTENT's type is numeric
	ASPECT_RATIO's type is numeric
	ROUNDNESS's type is numeric
	COMPACTNESS's type is numeric
	SHAPEFACTOR_1's type is numeric
	SHAPEFACTOR_2's type is numeric
	SHAPEFACTOR_3's type is numeric
	SHAPEFACTOR_4's type is numeric
	Class's type is nominal, range is ('Kirmizi_Pistachio', 'Siit_Pistachio')

wrote file to ./data/pistachio_16.csv


## dataset


In [3]:
import numpy as np
def split_csv_data(infilename: str, train_filename: str, test_filename:str, test_fraction: float):
    df = pd.read_csv(infilename, header=0)
    columns = df.columns
    df['split_var'] = np.random.uniform(size=len(df))
    train_df = df.loc[df.split_var <= test_fraction][columns]
    test_df = df.loc[df.split_var > test_fraction][columns]
    train_df.to_csv(train_filename, index=False, header=True)
    test_df.to_csv(test_filename, index=False, header=True)
    print(f'wrote {len(train_df)} records to {train_filename}')
    print(f'wrote {len(test_df)} records to {test_filename}')
    
train_filename = './data/pistachio_train.csv'
test_filename = './data/pistachio_test.csv'

if not (os.path.exists(train_filename) and os.path.exists(test_filename)):
    split_csv_data(csv_filename, train_filename, test_filename, 0.2)
else:
    print(f'{train_filename} and {test_filename} exist')






wrote 443 records to ./data/pistachio_train.csv
wrote 1705 records to ./data/pistachio_test.csv


In [60]:
def map_func(features, labels):
    return tf.transpose(tf.stack([features[k] for k in features])), tf.reshape(labels,[-1,1])

# use dataset.map to concatenate feature dictionary into tensor
pistachio_train_batches = tf.data.experimental.make_csv_dataset(
    train_filename, batch_size=4,
    num_epochs=1,
    label_name="Class").map(map_func)
pistachio_test_batches = tf.data.experimental.make_csv_dataset(
    test_filename, batch_size=4,
    num_epochs=1,
    label_name="Class").map(map_func)

In [57]:
batch = 0
for feature_batch, label_batch in pistachio_train_batches.take(2):
    # print(f'{batch}, {label_batch.shape}')
    # cat_batch = tf.stack([feature_batch['AREA'],feature_batch['PERIMETER']],axis=1)
    # cat_batch = tf.stack([feature_batch[k] for k in feature_batch],axis=1)

    # batch += 1
    
    print("'label': {}".format(label_batch))
    # print(cat_batch)
    print(f"features batch shape: {feature_batch.shape}")
    # print(feature_batch.shape)
    

'label': [[1]
 [1]
 [0]
 [0]]
features batch shape: (4, 16)
'label': [[1]
 [0]
 [0]
 [0]]
features batch shape: (4, 16)


2024-04-30 02:46:08.477719: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


## Model

In [47]:
from tensorflow.keras.layers import Dense, Flatten, Conv2D, BatchNormalization
from tensorflow.keras import Model

class PistachioModel(Model):
    def __init__(self, units: int=10):
        super().__init__()
        self._units = units
    

    def build(self, input_shape):
        self.bn = BatchNormalization(axis=0, input_shape=input_shape)
        self.d1 = Dense(self._units, activation='relu', input_shape=input_shape)
        self.d2 = Dense(self._units)
        self.lout = Dense(1, activation='sigmoid')
        


    def call(self, x):
        x = self.bn(x)
        x = self.d1(x)
        x = self.d2(x)
        return self.lout(x)

# Create an instance of the model
model = PistachioModel()

## Keras model.fit api

In [48]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
              metrics=['accuracy', 'auc'])







In [49]:
model.fit(pistachio_train_batches, epochs=10)

Epoch 1/10


ValueError: Exception encountered when calling PistachioModel.call().

[1mShapes used to initialize variables must be fully-defined (no `None` dimensions). Received: shape=(None,) for variable path='pistachio_model_10/batch_normalization_9/gamma'[0m

Arguments received by PistachioModel.call():
  • x=tf.Tensor(shape=(None, 16), dtype=float32)

## sequential model

In [66]:
model2 = tf.keras.models.Sequential([
  tf.keras.layers.BatchNormalization(), 
  tf.keras.layers.Dense(16, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(16),
  tf.keras.layers.Dense(1, activation='sigmoid')
])
model2.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
              metrics=['accuracy', 'auc'])

In [68]:
model2.fit(pistachio_train_batches, epochs=10)


Epoch 1/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 935us/step - accuracy: 0.7284 - auc: 0.8170 - loss: 0.5204
Epoch 2/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 880us/step - accuracy: 0.7720 - auc: 0.8719 - loss: 0.4395
Epoch 3/10
[1m  1/111[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 30ms/step - accuracy: 0.7500 - auc: 1.0000 - loss: 0.2692

2024-04-30 02:52:53.956163: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-30 02:52:54.084876: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 881us/step - accuracy: 0.8066 - auc: 0.8861 - loss: 0.4293
Epoch 4/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 887us/step - accuracy: 0.7972 - auc: 0.8884 - loss: 0.4236
Epoch 5/10
[1m  1/111[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 29ms/step - accuracy: 1.0000 - auc: 1.0000 - loss: 0.2399

2024-04-30 02:52:54.215464: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-30 02:52:54.346195: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8084 - auc: 0.8767 - loss: 0.4463  
Epoch 6/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 904us/step - accuracy: 0.7599 - auc: 0.8354 - loss: 0.5226 
Epoch 7/10
[1m  1/111[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 39ms/step - accuracy: 0.7500 - auc: 0.6667 - loss: 0.5059

2024-04-30 02:52:54.488375: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-30 02:52:54.622119: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 878us/step - accuracy: 0.7879 - auc: 0.8461 - loss: 0.4867
Epoch 8/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 894us/step - accuracy: 0.8098 - auc: 0.8833 - loss: 0.4322
Epoch 9/10
[1m  1/111[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 28ms/step - accuracy: 1.0000 - auc: 1.0000 - loss: 0.2679

2024-04-30 02:52:54.760318: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-30 02:52:54.891889: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 917us/step - accuracy: 0.7919 - auc: 0.8692 - loss: 0.4556
Epoch 10/10
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 932us/step - accuracy: 0.7603 - auc: 0.8445 - loss: 0.4759 


2024-04-30 02:52:55.023937: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-04-30 02:52:55.158823: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


<keras.src.callbacks.history.History at 0x7f8e2ac1f350>

In [69]:
model2.evaluate(pistachio_test_batches,verbose=2)


427/427 - 0s - 1ms/step - accuracy: 0.8639 - auc: 0.9313 - loss: 0.3545


2024-04-30 02:54:06.293289: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[0.35447990894317627, 0.8639296293258667, 0.9313317537307739]

In [84]:
for features, labels in pistachio_test_batches.take(1):
    predictions = model2(features)
    for p,l in zip(predictions, labels):
        print(f'predicted prob: {p}, label: {l}')

predicted prob: [0.8645645], label: [1]
predicted prob: [0.31095818], label: [0]
predicted prob: [0.94427866], label: [1]
predicted prob: [0.00063194], label: [0]


2024-04-30 03:04:05.021128: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


## custom training loop stuff

In [None]:
@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    # training=True is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(images, training=True)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss)
  train_accuracy(labels, predictions)

In [None]:
@tf.function
def test_step(images, labels):
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
  predictions = model(images, training=False)
  t_loss = loss_object(labels, predictions)

  test_loss(t_loss)
  test_accuracy(labels, predictions)

In [None]:

for epoch in range(EPOCHS):
  # Reset the metrics at the start of the next epoch
  train_loss.reset_state()
  train_accuracy.reset_state()
  test_loss.reset_state()
  test_accuracy.reset_state()

  for images, labels in train_ds:
    train_step(images, labels)

  for test_images, test_labels in test_ds:
    test_step(test_images, test_labels)

  print(
    f'Epoch {epoch + 1}, '
    f'Loss: {train_loss.result():0.2f}, '
    f'Accuracy: {train_accuracy.result() * 100:0.2f}, '
    f'Test Loss: {test_loss.result():0.2f}, '
    f'Test Accuracy: {test_accuracy.result() * 100:0.2f}'
  )