# A - tensorflow.keras Intro mit Baumdaten

In [1]:
import keras

2023-12-13 21:30:54.174953: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import tensorflow as tf

tf.__version__

'2.14.0'

In [3]:
import numpy as np

np.random.seed(42)
tf.random.set_seed(42)

## Transformierte Daten einlesen

In [4]:
from repml.fast import get_transformed_tree_data

x_train, y_train, x_test, y_test = get_transformed_tree_data(components=100)

In [5]:
x_train.shape

(220307, 100)

## Modell mit der Sequential API

In [6]:
model = keras.models.Sequential(
    [
        keras.layers.Flatten(
            input_shape=[
                x_train.shape[1],
            ]
        ),
        keras.layers.Dense(50, activation="relu"),
        keras.layers.Dense(50, activation="relu"),
        keras.layers.Dense(1),
    ]
)

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 100)               0         
                                                                 
 dense (Dense)               (None, 50)                5050      
                                                                 
 dense_1 (Dense)             (None, 50)                2550      
                                                                 
 dense_2 (Dense)             (None, 1)                 51        
                                                                 
Total params: 7651 (29.89 KB)
Trainable params: 7651 (29.89 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [8]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss="mse", metrics=["mae"])

## Logging mit TensorBoard

In [9]:
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping

tb_callback = TensorBoard("./tf_logs/tree-nn/trial-A", update_freq=1)
es_callback = EarlyStopping(monitor="loss", patience=20, restore_best_weights=True)

In [10]:
history = model.fit(
    x=x_train,
    y=y_train,
    epochs=350,
    batch_size=64,
    verbose=2,
    validation_data=(x_test, y_test),
    callbacks=[tb_callback, es_callback],
)

Epoch 1/350
3443/3443 - 40s - loss: 403044.3750 - mae: 305.6706 - val_loss: 1951.5757 - val_mae: 31.7882 - 40s/epoch - 12ms/step
Epoch 2/350
3443/3443 - 35s - loss: 935.7590 - mae: 21.1920 - val_loss: 475.7950 - val_mae: 15.3573 - 35s/epoch - 10ms/step
Epoch 3/350
3443/3443 - 34s - loss: 374.6759 - mae: 13.6794 - val_loss: 322.3090 - val_mae: 12.5960 - 34s/epoch - 10ms/step
Epoch 4/350
3443/3443 - 51s - loss: 293.3372 - mae: 12.1369 - val_loss: 283.7797 - val_mae: 11.7386 - 51s/epoch - 15ms/step
Epoch 5/350
3443/3443 - 36s - loss: 265.0407 - mae: 11.4955 - val_loss: 276.5505 - val_mae: 11.8432 - 36s/epoch - 10ms/step
Epoch 6/350
3443/3443 - 49s - loss: 250.1312 - mae: 11.1495 - val_loss: 257.0118 - val_mae: 11.2612 - 49s/epoch - 14ms/step
Epoch 7/350
3443/3443 - 31s - loss: 241.7390 - mae: 10.9475 - val_loss: 243.8409 - val_mae: 10.8020 - 31s/epoch - 9ms/step
Epoch 8/350
3443/3443 - 38s - loss: 235.1051 - mae: 10.7713 - val_loss: 246.4159 - val_mae: 10.9744 - 38s/epoch - 11ms/step
Epoc

In [11]:
mse, mae = model.evaluate(x=x_test, y=y_test, verbose=0)

In [12]:
print("MSE: %.3f, RMSE: %.3f, MAE: %.3f" % (mse, np.sqrt(mse), mae))

MSE: 186.110, RMSE: 13.642, MAE: 9.058


## Übung
Welche Veränderungen könnten für starkes Overfitting sorgen? (Das lässt sich theoretisch wie praktisch beantworten.)