In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Load the csv data**

In [None]:
fetal_health_data = pd.read_csv('../input/fetal-health-classification/fetal_health.csv')
fetal_health_data.shape

In [None]:
pd.DataFrame(fetal_health_data).head()

**Define x (input) and y (output/label)**

In [None]:
x = fetal_health_data.drop(labels=['fetal_health'], axis=1)
x.shape

In [None]:
y = fetal_health_data['fetal_health']
y.shape

**Split data (x and y) into training data and testing data.**

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

**Normalizate x_train and x_test.**

In [None]:
ct = make_column_transformer(
  (MinMaxScaler(), ['baseline value', 'accelerations', 'fetal_movement',
       'uterine_contractions', 'light_decelerations', 'severe_decelerations',
       'prolongued_decelerations', 'abnormal_short_term_variability',
       'mean_value_of_short_term_variability',
       'percentage_of_time_with_abnormal_long_term_variability',
       'mean_value_of_long_term_variability', 'histogram_width',
       'histogram_min', 'histogram_max', 'histogram_number_of_peaks',
       'histogram_number_of_zeroes', 'histogram_mode', 'histogram_mean',
       'histogram_median', 'histogram_variance', 'histogram_tendency']),
)

ct.fit(x_train)

In [None]:
x_train_normalized = ct.transform(x_train)
x_train_normalized.shape

In [None]:
x_test_normalized = ct.transform(x_test)
x_test_normalized.shape

In [None]:
x_train_normalized[0].shape

**Trun y_train and y_test into one-hot vectors.**

In [None]:
y_train_one_hot = tf.one_hot((y_train - 1), depth=3)
y_test_one_hot = tf.one_hot((y_test - 1), depth=3)
y_train_one_hot.shape, y_test_one_hot.shape

**Use a test model (model_test) to estimate the best learning rate (by the lr_scheduler as the callback).**

In [None]:
tf.random.set_seed(42)

model_test = tf.keras.Sequential([
  tf.keras.layers.InputLayer(input_shape=[21]),
  tf.keras.layers.Dense(64, activation='relu'),
  tf.keras.layers.Dense(32, activation='relu'),
  tf.keras.layers.Dense(3, activation='softmax'),
])

model_test.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy'])

lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-4 * 10 ** (epoch / 20))

history_test = model_test.fit(x_train_normalized, y_train_one_hot, validation_data=(x_test_normalized, y_test_one_hot), epochs=100, verbose=0, callbacks=[lr_scheduler])

In [None]:
pd.DataFrame(history_test.history).plot()

In [None]:
lrs = 1e-4 * 10 ** (tf.range(100) / 20)
plt.semilogx(lrs, history_test.history['loss'])

**With the largest dropping rate (slope) at learning rate of 10 * -4, we use this as our learning rate in the real model.**

**Train the real model.**

In [None]:
model = tf.keras.Sequential([
  tf.keras.layers.InputLayer(input_shape=[21]),
  tf.keras.layers.Dense(64, activation='relu'),
  tf.keras.layers.Dense(32, activation='relu'),
  tf.keras.layers.Dense(3, activation='softmax'),
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy'])

history = model.fit(x_train_normalized, y_train_one_hot, validation_data=(x_test_normalized, y_test_one_hot), epochs=100, verbose=0)

**Get the validation accuracy.**

In [None]:
model.evaluate(x_test_normalized, y_test_one_hot)

In [None]:
pd.DataFrame(history.history).plot()