In [0]:
'''
*    Author: Olufemi Onimole
*    Date: 2019
*    Code version: 0.1

Dataset Source:
Dua, D. and Graff, C. (2019). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.

@misc{Dua:2019 ,
author = "Dua, Dheeru and Graff, Casey",
year = "2017",
title = "{UCI} Machine Learning Repository",
url = "http://archive.ics.uci.edu/ml",
institution = "University of California, Irvine, School of Information and Computer Sciences" }
'''

!pip install --upgrade tensorflow

from __future__ import absolute_import, division, print_function, unicode_literals

import pandas as pd
import tensorflow as tf

Read csv file

In [0]:
csv_file = "/content/drive/My Drive/Colab Notebooks/Projects/Abalone/abalone.csv"
df = pd.read_csv(csv_file)

examine data

In [0]:
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


examine data types

In [0]:
df.dtypes

Sex                object
Length            float64
Diameter          float64
Height            float64
Whole weight      float64
Shucked weight    float64
Viscera weight    float64
Shell weight      float64
Rings               int64
dtype: object

convert category to numerical

In [0]:
df['Sex'] = pd.Categorical(df['Sex'])
df['Sex'] = df['Sex'].cat.codes
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,2,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,2,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,0,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,2,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,1,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


shuffle data

In [0]:
df = df.sample(frac=1).reset_index(drop=True)

Count unique category values

In [0]:
category_count = df['Rings'].max() + 1

split data into train, validation and test dataframes

In [0]:
# create test dataframe
dflen = len(df.index)
split_amount = int(dflen * .8)
df_train = df[:split_amount]
df_test = df[split_amount:]

# create train dataframe
dflen = len(df_train.index)
split_amount = int(dflen * .8)
df_train = df_train[:split_amount]

# create validation dataframe
df_val = df_train[split_amount:]

separate target

In [0]:
train_target = df_train.pop('Rings')
val_target = df_val.pop('Rings')
test_target = df_test.pop('Rings')

move dataframe to dataset

In [0]:
# create train dataset
train_dataset = tf.data.Dataset.from_tensor_slices((df_train.values, train_target.values))

# create validation dataset
val_dataset = tf.data.Dataset.from_tensor_slices((df_val.values, val_target.values))

# create test dataset
test_dataset = tf.data.Dataset.from_tensor_slices((df_test.values, test_target.values))


examine dataset

In [0]:
for feat, targ in train_dataset.take(5):
  print ('Features: {}, Target: {}:'.format(feat, targ))

Features: [1.     0.42   0.325  0.11   0.325  0.1245 0.0755 0.1025], Target: 7:
Features: [0.    0.51  0.385 0.135 0.632 0.282 0.145 0.17 ], Target: 8:
Features: [0.     0.7    0.525  0.19   1.6465 0.8545 0.307  0.3995], Target: 9:
Features: [0.     0.53   0.435  0.17   0.8155 0.2985 0.155  0.275 ], Target: 13:
Features: [1.     0.345  0.255  0.095  0.183  0.075  0.0385 0.06  ], Target: 6:


shuffle and batch datasets

In [0]:
train_dataset = train_dataset.shuffle(len(df)).batch(16, drop_remainder=True)
val_dataset = train_dataset.batch(16, drop_remainder=True)
test_dataset = train_dataset.batch(16, drop_remainder=True)

Create and train a model

In [0]:
def get_compiled_model():
  model = tf.keras.Sequential([
                               tf.keras.layers.Dense(1000, activation='relu'),
                               tf.keras.layers.Dense(1000, activation='relu'),
                               tf.keras.layers.Dense(1000, activation='relu'),
                               tf.keras.layers.Dense(1000, activation='relu'),
                               tf.keras.layers.Dropout(0.2),
                               tf.keras.layers.Dense(category_count, activation='softmax')
])
  
  model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
  return model

In [0]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

model = get_compiled_model()
model.fit(train_dataset, validation_data=val_dataset, epochs=1000, verbose=2, callbacks=[callback])

model.save('/content/drive/My Drive/Colab Notebooks/Projects/Abalone/my_model.h5')



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/1000
167/167 - 5s - loss: 2.3714 - accuracy: 0.2058 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/1000
167/167 - 4s - loss: 2.1327 - accuracy: 0.2410 - val_loss: 2.0857 - val_accuracy: 0.2441
Epoch 3/1000
167/167 - 4s - loss: 2.0538 - accuracy: 0.2519 - val_loss: 1.9986 - val_accuracy: 0.2559
Epoch 4/1000
167/167 - 4s - loss: 2.0139 - accuracy: 0.2556 - val_loss: 1.9391 - val_accuracy: 0.2809
Epoch 5/1000
167/167 - 4s - loss: 2.0007 - accuracy: 0.2571 - val_loss: 1.9583 - val_accuracy: 0.2570
Epoch 6/1000
167/167 - 4s - loss: 1.9830 - accuracy: 0.2687 - val_loss: 1.9798 - val_accuracy: 0.2699
Epoch 7/1000
167/167 - 4s - loss: 1.9578 - accuracy: 0.2582 - val_loss: 1.8971 

In [0]:
model.evaluate(test_dataset)



[0.3136028558015823, 0.8882812]