In [7]:
import sys
sys.path.append('/Users/majid/git/housing/')


from collections import defaultdict, OrderedDict
from datetime import datetime
from pprint import pprint

import pytest
import tensorflow_datasets as tfds
import numpy as np

from housing_model.data.example import Features, Example
from housing_model.data.tf_housing import TfHousing
from housing_model.models.keras_model import ModelBuilder, ModelParams, KerasModel, TrainParams
import tensorflow as tf

In [8]:
data = tfds.load('tf_housing', split='train').take(1).cache()

def setup_data(tf_data: tf.data.Dataset, batch_size: int):
    input_features = set(tf_data.element_spec.keys())
    input_features.remove('metadata')
    input_features.remove('sold_price')

    return tf_data.map(
        lambda ex: (
            {f_name: ex[f_name] for f_name in input_features},
            ex['sold_price']
        )
    ).batch(batch_size)

train_ds = setup_data(data, batch_size=2)
test_ds = setup_data(data, batch_size=1).take(1).map(lambda x, y: x)

ex = next(iter(train_ds))

In [9]:
pprint(ex)

({'date_end': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([18079.], dtype=float32)>,
  'land/depth': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([200.9], dtype=float32)>,
  'land/front': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([93.41], dtype=float32)>,
  'map/lat': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([44.55023], dtype=float32)>,
  'map/lon': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-79.41331], dtype=float32)>},
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([870000.], dtype=float32)>)


In [54]:
feature_name = 'date_end'

def build_custom_model():
    input_feature = tf.keras.layers.Input(name=feature_name, shape=(), dtype='float32')
    date_end_reshaped = tf.keras.layers.Reshape((1,), name='to_vector')(input_feature)

    emb = []
    emb.append(tf.keras.layers.Dense(
        units=10, activation='sigmoid', name='emb-0',
        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01)
    )(date_end_reshaped))

    for i in range(0):
        emb.append(tf.keras.layers.Dense(units=10, activation='sigmoid', name=f'emb-{i+1}')(emb[i]))
    
    sold_price = tf.keras.layers.Dense(units=1, name='sold')(emb[-1])

    model = tf.keras.Model(inputs=input_feature, outputs=sold_price)
    return model
    
def build_simple_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(units=10, name="feature-dense", activation='sigmoid'),
        tf.keras.layers.Dense(units=1, name="output-dense", activation=None)
    ])
    return model


def create_debug_model(model):
    debug_outputs = {}

    for layer in model.layers:
        for (name, tensor) in [
            (f'{layer.name}-in', layer.input), 
            (f'{layer.name}-out', layer.output),
        ]:
            debug_outputs[name] = tensor


    debug_model = tf.keras.Model(inputs={feature_name: input_feature}, outputs=debug_outputs)
    return debug_model

In [55]:
model = build_simple_model()
model.build(input_shape=(1, 1))
model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=1e-1),
    loss=tf.keras.losses.MeanSquaredError()
)
print(model.summary())


Model: "sequential_28"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
feature-dense (Dense)        multiple                  20        
_________________________________________________________________
output-dense (Dense)         multiple                  11        
Total params: 31
Trainable params: 31
Non-trainable params: 0
_________________________________________________________________
None


In [51]:
model.fit(train_ds.map(lambda x, y: (x[feature_name], y)), epochs=100)

TypeError: in converted code:

    <ipython-input-51-9e08e5f66923>:1 None  *
        model.fit(train_ds.map(lambda x, y: (x[feature_name], y)), epochs=100)
    /Users/majid/git/housing/venv/lib/python3.7/site-packages/tensorflow_core/python/ops/array_ops.py:862 _slice_helper
        _check_index(s)
    /Users/majid/git/housing/venv/lib/python3.7/site-packages/tensorflow_core/python/ops/array_ops.py:752 _check_index
        raise TypeError(_SLICE_TYPE_ERROR + ", got {!r}".format(idx))

    TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got 'date_end'


In [None]:
for ex in test_ds:
    pprint(model(ex))
    pprint(debug_model(ex))

In [None]:
model.get_layer('emb-0').weights

In [62]:
model = build_simple_model()

train_ds = tf.data.Dataset.from_tensor_slices([1, 2]).map(lambda x: (x, x)).batch(2)
loss_fn = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)

iter_print = 10000
repeat_cnt = iter_print + 2

for idx, (x, y_true) in enumerate(train_ds.repeat(repeat_cnt)):
    y_true = tf.expand_dims(y_true, -1)
    x = tf.expand_dims(x, -1)

#     trainable_variables = [y_pred]
#     layer_names = ['y_pred']
#     for layer in model.layers:
#         trainable_variables.append(layer.trainable_variables)
#         layer_names.append(layer.name)

#    grads = tape.gradient(loss, trainable_variables)
    
#     layers_output = debug_model(x)
#     for layer_name, layer_grad in zip(layer_names, grads):
#         if layer_name != 'y_pred':
#             print(f'*** {layer_name} **')
#             print(f'weights:\n{model.get_layer(layer_name).weights}')
#             print(f'layer inputs:\n{layers_output.get(layer_name+"-in")}')
#             print(f'layer outputs:\n{layers_output.get(layer_name+"-out")}')
#         print(f'grad:\n{layer_grad}\n\n')
        
    with tf.GradientTape() as tape:
        y_pred = model(x, training=True)
        loss = loss_fn(y_true, y_pred)
    grads = tape.gradient(loss, model.trainable_variables)

    if idx > iter_print or idx % 1000 == 0:
        print(f"----------- {idx} -----------")
        print(x)

        print(f'label: {y_true.numpy()}\npred: {y_pred.numpy()}')
        print(f'loss: {loss.numpy().mean()}')
        print(f'weights: {model.layers[0].get_weights()}')
        print(f'grad = {grads}')
    
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    
    if idx > iter_print:
        print(f'new weights: {model.layers[0].get_weights()}')

----------- 0 -----------
tf.Tensor(
[[1]
 [2]], shape=(2, 1), dtype=int32)
label: [[1]
 [2]]
pred: [[-0.7986613]
 [-0.8221377]]
loss: 5.599822044372559
weights: [array([[ 0.5064483 , -0.56430084, -0.25820133, -0.10090905, -0.5163412 ,
         0.3539031 , -0.04874986,  0.07362783,  0.08765167, -0.303747  ]],
      dtype=float32), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)]
grad = [<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[ 0.6526663 ,  0.7453051 ,  1.009214  ,  0.6809789 , -0.8829108 ,
         0.63363945, -0.31018203,  0.5218337 , -0.7712487 ,  0.29162368]],
      dtype=float32)>, <tf.Tensor: shape=(10,), dtype=float32, numpy=
array([ 0.41663367,  0.47889382,  0.6311691 ,  0.42324862, -0.5642173 ,
        0.3988238 , -0.192621  ,  0.32416445, -0.47922152,  0.18289483],
      dtype=float32)>, <tf.Tensor: shape=(10, 1), dtype=float32, numpy=
array([[-3.1925998],
       [-1.3418946],
       [-1.8384826],
       [-2.1231544],
       [-1.4131674],
      

----------- 6000 -----------
tf.Tensor(
[[1]
 [2]], shape=(2, 1), dtype=int32)
label: [[1]
 [2]]
pred: [[1.0009891]
 [1.9990877]]
loss: 9.052926657204807e-07
weights: [array([[ 0.6249065 , -0.929889  , -0.80805147, -0.49422652,  0.43854994,
         0.47065222,  0.5965599 ,  0.05023013,  0.8635803 , -0.5253631 ]],
      dtype=float32), array([-0.10266718,  0.34291065,  0.20752306,  0.03248665, -0.00191011,
       -0.06480761, -0.07222247, -0.01936799, -0.2823748 ,  0.05210562],
      dtype=float32)]
grad = [<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[-4.3776599e-05,  5.4770586e-05,  7.1453716e-05,  8.3032224e-05,
        -9.7737007e-05, -4.2413041e-05, -7.4618569e-05,  1.1839441e-05,
        -6.7303015e-05,  7.1560542e-05]], dtype=float32)>, <tf.Tensor: shape=(10,), dtype=float32, numpy=
array([ 2.74145641e-05, -1.20409153e-04, -9.50964750e-05, -3.25712754e-05,
        3.17253725e-05,  1.44190599e-05,  4.30428045e-05, -1.10938799e-06,
        1.07581145e-04, -3.11550102e-0